diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/acl.c | 6 | ||||
-rw-r--r-- | fs/9p/acl.h | 4 | ||||
-rw-r--r-- | fs/9p/v9fs_vfs.h | 6 | ||||
-rw-r--r-- | fs/9p/vfs_file.c | 36 | ||||
-rw-r--r-- | fs/9p/vfs_inode.c | 139 | ||||
-rw-r--r-- | fs/9p/vfs_inode_dotl.c | 92 | ||||
-rw-r--r-- | fs/9p/vfs_super.c | 2 | ||||
-rw-r--r-- | fs/Kconfig | 15 | ||||
-rw-r--r-- | fs/anon_inodes.c | 2 | ||||
-rw-r--r-- | fs/autofs4/autofs_i.h | 26 | ||||
-rw-r--r-- | fs/autofs4/waitq.c | 2 | ||||
-rw-r--r-- | fs/befs/linuxvfs.c | 23 | ||||
-rw-r--r-- | fs/block_dev.c | 28 | ||||
-rw-r--r-- | fs/btrfs/Makefile | 4 | ||||
-rw-r--r-- | fs/btrfs/acl.c | 27 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 22 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 14 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 457 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 54 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 2 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.h | 2 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 39 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 116 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 10 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 401 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 309 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 55 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 155 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 50 | ||||
-rw-r--r-- | fs/btrfs/file.c | 76 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 193 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 259 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 34 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 280 | ||||
-rw-r--r-- | fs/btrfs/locking.h | 36 | ||||
-rw-r--r-- | fs/btrfs/ref-cache.c | 68 | ||||
-rw-r--r-- | fs/btrfs/ref-cache.h | 52 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 3 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 5 | ||||
-rw-r--r-- | fs/btrfs/struct-funcs.c | 100 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 116 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 46 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 65 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 2 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 73 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 2 | ||||
-rw-r--r-- | fs/ceph/dir.c | 116 | ||||
-rw-r--r-- | fs/ceph/export.c | 24 | ||||
-rw-r--r-- | fs/ceph/file.c | 61 | ||||
-rw-r--r-- | fs/ceph/inode.c | 48 | ||||
-rw-r--r-- | fs/ceph/ioctl.c | 15 | ||||
-rw-r--r-- | fs/ceph/ioctl.h | 1 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 58 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/snap.c | 25 | ||||
-rw-r--r-- | fs/ceph/super.c | 11 | ||||
-rw-r--r-- | fs/ceph/super.h | 20 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 8 | ||||
-rw-r--r-- | fs/cifs/cifs_debug.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifs_dfs_ref.c | 5 | ||||
-rw-r--r-- | fs/cifs/cifsacl.c | 28 | ||||
-rw-r--r-- | fs/cifs/cifsencrypt.c | 126 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 22 | ||||
-rw-r--r-- | fs/cifs/cifsfs.h | 6 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 60 | ||||
-rw-r--r-- | fs/cifs/cifssmb.c | 6 | ||||
-rw-r--r-- | fs/cifs/connect.c | 662 | ||||
-rw-r--r-- | fs/cifs/dir.c | 9 | ||||
-rw-r--r-- | fs/cifs/dns_resolve.c | 4 | ||||
-rw-r--r-- | fs/cifs/file.c | 27 | ||||
-rw-r--r-- | fs/cifs/inode.c | 14 | ||||
-rw-r--r-- | fs/cifs/link.c | 8 | ||||
-rw-r--r-- | fs/cifs/misc.c | 11 | ||||
-rw-r--r-- | fs/cifs/readdir.c | 427 | ||||
-rw-r--r-- | fs/cifs/smbencrypt.c | 8 | ||||
-rw-r--r-- | fs/cifs/transport.c | 53 | ||||
-rw-r--r-- | fs/compat.c | 5 | ||||
-rw-r--r-- | fs/compat_ioctl.c | 1 | ||||
-rw-r--r-- | fs/dcache.c | 83 | ||||
-rw-r--r-- | fs/direct-io.c | 2 | ||||
-rw-r--r-- | fs/ecryptfs/Kconfig | 2 | ||||
-rw-r--r-- | fs/ecryptfs/ecryptfs_kernel.h | 150 | ||||
-rw-r--r-- | fs/ecryptfs/inode.c | 1 | ||||
-rw-r--r-- | fs/ecryptfs/keystore.c | 62 | ||||
-rw-r--r-- | fs/ecryptfs/main.c | 23 | ||||
-rw-r--r-- | fs/ecryptfs/read_write.c | 18 | ||||
-rw-r--r-- | fs/eventpoll.c | 2 | ||||
-rw-r--r-- | fs/exec.c | 77 | ||||
-rw-r--r-- | fs/exofs/Kbuild | 5 | ||||
-rw-r--r-- | fs/exofs/Kconfig | 4 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 159 | ||||
-rw-r--r-- | fs/exofs/inode.c | 152 | ||||
-rw-r--r-- | fs/exofs/ore.c (renamed from fs/exofs/ios.c) | 370 | ||||
-rw-r--r-- | fs/exofs/pnfs.h | 45 | ||||
-rw-r--r-- | fs/exofs/super.c | 251 | ||||
-rw-r--r-- | fs/ext2/acl.c | 8 | ||||
-rw-r--r-- | fs/ext2/acl.h | 1 | ||||
-rw-r--r-- | fs/ext2/xattr.c | 10 | ||||
-rw-r--r-- | fs/ext3/acl.c | 9 | ||||
-rw-r--r-- | fs/ext3/balloc.c | 38 | ||||
-rw-r--r-- | fs/ext3/file.c | 1 | ||||
-rw-r--r-- | fs/ext3/fsync.c | 11 | ||||
-rw-r--r-- | fs/ext3/ialloc.c | 4 | ||||
-rw-r--r-- | fs/ext3/inode.c | 193 | ||||
-rw-r--r-- | fs/ext3/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ext3/namei.c | 13 | ||||
-rw-r--r-- | fs/ext3/super.c | 13 | ||||
-rw-r--r-- | fs/ext3/xattr.c | 12 | ||||
-rw-r--r-- | fs/ext4/Makefile | 2 | ||||
-rw-r--r-- | fs/ext4/acl.c | 9 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 48 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 21 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 56 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 4 | ||||
-rw-r--r-- | fs/ext4/extents.c | 129 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 26 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 1487 | ||||
-rw-r--r-- | fs/ext4/inode.c | 1623 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 12 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 230 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 1 | ||||
-rw-r--r-- | fs/ext4/namei.c | 27 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 30 | ||||
-rw-r--r-- | fs/ext4/resize.c | 199 | ||||
-rw-r--r-- | fs/ext4/super.c | 89 | ||||
-rw-r--r-- | fs/ext4/truncate.h | 43 | ||||
-rw-r--r-- | fs/fat/dir.c | 2 | ||||
-rw-r--r-- | fs/fat/inode.c | 7 | ||||
-rw-r--r-- | fs/file_table.c | 2 | ||||
-rw-r--r-- | fs/fs-writeback.c | 378 | ||||
-rw-r--r-- | fs/fuse/dev.c | 16 | ||||
-rw-r--r-- | fs/fuse/file.c | 84 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 8 | ||||
-rw-r--r-- | fs/fuse/inode.c | 13 | ||||
-rw-r--r-- | fs/generic_acl.c | 13 | ||||
-rw-r--r-- | fs/gfs2/acl.c | 6 | ||||
-rw-r--r-- | fs/gfs2/main.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 4 | ||||
-rw-r--r-- | fs/hppfs/hppfs.c | 1 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 1 | ||||
-rw-r--r-- | fs/inode.c | 82 | ||||
-rw-r--r-- | fs/jbd/checkpoint.c | 37 | ||||
-rw-r--r-- | fs/jbd/commit.c | 57 | ||||
-rw-r--r-- | fs/jbd/journal.c | 99 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 83 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 5 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 67 | ||||
-rw-r--r-- | fs/jffs2/acl.c | 4 | ||||
-rw-r--r-- | fs/jffs2/acl.h | 2 | ||||
-rw-r--r-- | fs/jffs2/fs.c | 4 | ||||
-rw-r--r-- | fs/jffs2/os-linux.h | 2 | ||||
-rw-r--r-- | fs/jfs/acl.c | 4 | ||||
-rw-r--r-- | fs/jfs/jfs_dmap.c | 5 | ||||
-rw-r--r-- | fs/jfs/jfs_txnmgr.c | 6 | ||||
-rw-r--r-- | fs/jfs/jfs_umount.c | 4 | ||||
-rw-r--r-- | fs/jfs/namei.c | 3 | ||||
-rw-r--r-- | fs/jfs/xattr.c | 4 | ||||
-rw-r--r-- | fs/lockd/clntproc.c | 9 | ||||
-rw-r--r-- | fs/namei.c | 118 | ||||
-rw-r--r-- | fs/nfs/Kconfig | 15 | ||||
-rw-r--r-- | fs/nfs/Makefile | 1 | ||||
-rw-r--r-- | fs/nfs/blocklayout/Makefile | 5 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 1020 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.h | 207 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayoutdev.c | 410 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayoutdm.c | 111 | ||||
-rw-r--r-- | fs/nfs/blocklayout/extents.c | 935 | ||||
-rw-r--r-- | fs/nfs/cache_lib.h | 2 | ||||
-rw-r--r-- | fs/nfs/callback.h | 2 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 82 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 24 | ||||
-rw-r--r-- | fs/nfs/client.c | 18 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 16 | ||||
-rw-r--r-- | fs/nfs/dir.c | 57 | ||||
-rw-r--r-- | fs/nfs/direct.c | 2 | ||||
-rw-r--r-- | fs/nfs/internal.h | 13 | ||||
-rw-r--r-- | fs/nfs/namespace.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 6 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 7 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.c | 82 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.h | 17 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayoutdev.c | 452 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 277 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 9 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 480 | ||||
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 48 | ||||
-rw-r--r-- | fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 3 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 69 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 307 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 102 | ||||
-rw-r--r-- | fs/nfs/pnfs_dev.c | 64 | ||||
-rw-r--r-- | fs/nfs/read.c | 166 | ||||
-rw-r--r-- | fs/nfs/unlink.c | 37 | ||||
-rw-r--r-- | fs/nfs/write.c | 159 | ||||
-rw-r--r-- | fs/notify/group.c | 2 | ||||
-rw-r--r-- | fs/notify/inode_mark.c | 2 | ||||
-rw-r--r-- | fs/notify/mark.c | 2 | ||||
-rw-r--r-- | fs/notify/notification.c | 2 | ||||
-rw-r--r-- | fs/notify/vfsmount_mark.c | 2 | ||||
-rw-r--r-- | fs/ntfs/inode.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/acl.c | 4 | ||||
-rw-r--r-- | fs/omfs/dir.c | 2 | ||||
-rw-r--r-- | fs/open.c | 78 | ||||
-rw-r--r-- | fs/pipe.c | 2 | ||||
-rw-r--r-- | fs/posix_acl.c | 18 | ||||
-rw-r--r-- | fs/proc/base.c | 28 | ||||
-rw-r--r-- | fs/proc/generic.c | 3 | ||||
-rw-r--r-- | fs/proc/inode.c | 2 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 2 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 4 | ||||
-rw-r--r-- | fs/proc/root.c | 2 | ||||
-rw-r--r-- | fs/pstore/inode.c | 12 | ||||
-rw-r--r-- | fs/pstore/internal.h | 2 | ||||
-rw-r--r-- | fs/pstore/platform.c | 30 | ||||
-rw-r--r-- | fs/read_write.c | 12 | ||||
-rw-r--r-- | fs/reiserfs/xattr_acl.c | 10 | ||||
-rw-r--r-- | fs/stack.c | 5 | ||||
-rw-r--r-- | fs/stat.c | 4 | ||||
-rw-r--r-- | fs/ubifs/debug.h | 6 | ||||
-rw-r--r-- | fs/xfs/Makefile | 119 | ||||
-rw-r--r-- | fs/xfs/kmem.c (renamed from fs/xfs/linux-2.6/kmem.c) | 0 | ||||
-rw-r--r-- | fs/xfs/kmem.h (renamed from fs/xfs/linux-2.6/kmem.h) | 0 | ||||
-rw-r--r-- | fs/xfs/mrlock.h (renamed from fs/xfs/linux-2.6/mrlock.h) | 0 | ||||
-rw-r--r-- | fs/xfs/time.h (renamed from fs/xfs/linux-2.6/time.h) | 0 | ||||
-rw-r--r-- | fs/xfs/uuid.c (renamed from fs/xfs/support/uuid.c) | 0 | ||||
-rw-r--r-- | fs/xfs/uuid.h (renamed from fs/xfs/support/uuid.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.c (renamed from fs/xfs/linux-2.6/xfs_acl.c) | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c (renamed from fs/xfs/linux-2.6/xfs_aops.c) | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.h (renamed from fs/xfs/linux-2.6/xfs_aops.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_attr.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.c | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_btree.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c (renamed from fs/xfs/linux-2.6/xfs_buf.c) | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h (renamed from fs/xfs/linux-2.6/xfs_buf.h) | 32 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 24 | ||||
-rw-r--r-- | fs/xfs/xfs_da_btree.c | 44 | ||||
-rw-r--r-- | fs/xfs/xfs_dinode.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_dir2.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c (renamed from fs/xfs/linux-2.6/xfs_discard.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.h (renamed from fs/xfs/linux-2.6/xfs_discard.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c (renamed from fs/xfs/quota/xfs_dquot.c) | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.h (renamed from fs/xfs/quota/xfs_dquot.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.c (renamed from fs/xfs/quota/xfs_dquot_item.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item.h (renamed from fs/xfs/quota/xfs_dquot_item.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_export.c (renamed from fs/xfs/linux-2.6/xfs_export.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_export.h (renamed from fs/xfs/linux-2.6/xfs_export.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c (renamed from fs/xfs/linux-2.6/xfs_file.c) | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_fs_subr.c (renamed from fs/xfs/linux-2.6/xfs_fs_subr.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_globals.c (renamed from fs/xfs/linux-2.6/xfs_globals.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c (renamed from fs/xfs/linux-2.6/xfs_ioctl.c) | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.h (renamed from fs/xfs/linux-2.6/xfs_ioctl.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl32.c (renamed from fs/xfs/linux-2.6/xfs_ioctl32.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl32.h (renamed from fs/xfs/linux-2.6/xfs_ioctl32.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c (renamed from fs/xfs/linux-2.6/xfs_iops.c) | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.h (renamed from fs/xfs/linux-2.6/xfs_iops.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h (renamed from fs/xfs/linux-2.6/xfs_linux.h) | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 42 | ||||
-rw-r--r-- | fs/xfs/xfs_message.c (renamed from fs/xfs/linux-2.6/xfs_message.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_message.h (renamed from fs/xfs/linux-2.6/xfs_message.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c (renamed from fs/xfs/quota/xfs_qm.c) | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.h (renamed from fs/xfs/quota/xfs_qm.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_bhv.c (renamed from fs/xfs/quota/xfs_qm_bhv.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_stats.c (renamed from fs/xfs/quota/xfs_qm_stats.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_stats.h (renamed from fs/xfs/quota/xfs_qm_stats.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_syscalls.c (renamed from fs/xfs/quota/xfs_qm_syscalls.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_quota_priv.h (renamed from fs/xfs/quota/xfs_quota_priv.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_quotaops.c (renamed from fs/xfs/linux-2.6/xfs_quotaops.c) | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_rename.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 32 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_rw.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_sb.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.c (renamed from fs/xfs/linux-2.6/xfs_stats.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_stats.h (renamed from fs/xfs/linux-2.6/xfs_stats.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c (renamed from fs/xfs/linux-2.6/xfs_super.c) | 36 | ||||
-rw-r--r-- | fs/xfs/xfs_super.h (renamed from fs/xfs/linux-2.6/xfs_super.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_sync.c (renamed from fs/xfs/linux-2.6/xfs_sync.c) | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_sync.h (renamed from fs/xfs/linux-2.6/xfs_sync.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_sysctl.c (renamed from fs/xfs/linux-2.6/xfs_sysctl.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_sysctl.h (renamed from fs/xfs/linux-2.6/xfs_sysctl.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.c (renamed from fs/xfs/linux-2.6/xfs_trace.c) | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h (renamed from fs/xfs/linux-2.6/xfs_trace.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 67 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c (renamed from fs/xfs/quota/xfs_trans_dquot.c) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_vnode.h (renamed from fs/xfs/linux-2.6/xfs_vnode.h) | 0 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_xattr.c (renamed from fs/xfs/linux-2.6/xfs_xattr.c) | 0 |
301 files changed, 11478 insertions, 6949 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index e9cb57f07546..9a1d42630751 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c | |||
@@ -182,11 +182,11 @@ int v9fs_set_create_acl(struct dentry *dentry, | |||
182 | return 0; | 182 | return 0; |
183 | } | 183 | } |
184 | 184 | ||
185 | int v9fs_acl_mode(struct inode *dir, mode_t *modep, | 185 | int v9fs_acl_mode(struct inode *dir, umode_t *modep, |
186 | struct posix_acl **dpacl, struct posix_acl **pacl) | 186 | struct posix_acl **dpacl, struct posix_acl **pacl) |
187 | { | 187 | { |
188 | int retval = 0; | 188 | int retval = 0; |
189 | mode_t mode = *modep; | 189 | umode_t mode = *modep; |
190 | struct posix_acl *acl = NULL; | 190 | struct posix_acl *acl = NULL; |
191 | 191 | ||
192 | if (!S_ISLNK(mode)) { | 192 | if (!S_ISLNK(mode)) { |
@@ -319,7 +319,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, | |||
319 | case ACL_TYPE_ACCESS: | 319 | case ACL_TYPE_ACCESS: |
320 | name = POSIX_ACL_XATTR_ACCESS; | 320 | name = POSIX_ACL_XATTR_ACCESS; |
321 | if (acl) { | 321 | if (acl) { |
322 | mode_t mode = inode->i_mode; | 322 | umode_t mode = inode->i_mode; |
323 | retval = posix_acl_equiv_mode(acl, &mode); | 323 | retval = posix_acl_equiv_mode(acl, &mode); |
324 | if (retval < 0) | 324 | if (retval < 0) |
325 | goto err_out; | 325 | goto err_out; |
diff --git a/fs/9p/acl.h b/fs/9p/acl.h index ddb7ae19d971..559556411965 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h | |||
@@ -20,7 +20,7 @@ extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); | |||
20 | extern int v9fs_acl_chmod(struct dentry *); | 20 | extern int v9fs_acl_chmod(struct dentry *); |
21 | extern int v9fs_set_create_acl(struct dentry *, | 21 | extern int v9fs_set_create_acl(struct dentry *, |
22 | struct posix_acl **, struct posix_acl **); | 22 | struct posix_acl **, struct posix_acl **); |
23 | extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, | 23 | extern int v9fs_acl_mode(struct inode *dir, umode_t *modep, |
24 | struct posix_acl **dpacl, struct posix_acl **pacl); | 24 | struct posix_acl **dpacl, struct posix_acl **pacl); |
25 | #else | 25 | #else |
26 | #define v9fs_iop_get_acl NULL | 26 | #define v9fs_iop_get_acl NULL |
@@ -38,7 +38,7 @@ static inline int v9fs_set_create_acl(struct dentry *dentry, | |||
38 | { | 38 | { |
39 | return 0; | 39 | return 0; |
40 | } | 40 | } |
41 | static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep, | 41 | static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep, |
42 | struct posix_acl **dpacl, | 42 | struct posix_acl **dpacl, |
43 | struct posix_acl **pacl) | 43 | struct posix_acl **pacl) |
44 | { | 44 | { |
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 46ce357ca1ab..410ffd6ceb5f 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache; | |||
54 | 54 | ||
55 | struct inode *v9fs_alloc_inode(struct super_block *sb); | 55 | struct inode *v9fs_alloc_inode(struct super_block *sb); |
56 | void v9fs_destroy_inode(struct inode *inode); | 56 | void v9fs_destroy_inode(struct inode *inode); |
57 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | 57 | struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t); |
58 | int v9fs_init_inode(struct v9fs_session_info *v9ses, | 58 | int v9fs_init_inode(struct v9fs_session_info *v9ses, |
59 | struct inode *inode, int mode); | 59 | struct inode *inode, int mode, dev_t); |
60 | void v9fs_evict_inode(struct inode *inode); | 60 | void v9fs_evict_inode(struct inode *inode); |
61 | ino_t v9fs_qid2ino(struct p9_qid *qid); | 61 | ino_t v9fs_qid2ino(struct p9_qid *qid); |
62 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); | 62 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); |
@@ -83,4 +83,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode) | |||
83 | v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; | 83 | v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; |
84 | return; | 84 | return; |
85 | } | 85 | } |
86 | |||
87 | int v9fs_open_to_dotl_flags(int flags); | ||
86 | #endif | 88 | #endif |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3c173fcc2c5a..62857a810a79 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
65 | v9inode = V9FS_I(inode); | 65 | v9inode = V9FS_I(inode); |
66 | v9ses = v9fs_inode2v9ses(inode); | 66 | v9ses = v9fs_inode2v9ses(inode); |
67 | if (v9fs_proto_dotl(v9ses)) | 67 | if (v9fs_proto_dotl(v9ses)) |
68 | omode = file->f_flags; | 68 | omode = v9fs_open_to_dotl_flags(file->f_flags); |
69 | else | 69 | else |
70 | omode = v9fs_uflags2omode(file->f_flags, | 70 | omode = v9fs_uflags2omode(file->f_flags, |
71 | v9fs_proto_dotu(v9ses)); | 71 | v9fs_proto_dotu(v9ses)); |
@@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
169 | 169 | ||
170 | /* convert posix lock to p9 tlock args */ | 170 | /* convert posix lock to p9 tlock args */ |
171 | memset(&flock, 0, sizeof(flock)); | 171 | memset(&flock, 0, sizeof(flock)); |
172 | flock.type = fl->fl_type; | 172 | /* map the lock type */ |
173 | switch (fl->fl_type) { | ||
174 | case F_RDLCK: | ||
175 | flock.type = P9_LOCK_TYPE_RDLCK; | ||
176 | break; | ||
177 | case F_WRLCK: | ||
178 | flock.type = P9_LOCK_TYPE_WRLCK; | ||
179 | break; | ||
180 | case F_UNLCK: | ||
181 | flock.type = P9_LOCK_TYPE_UNLCK; | ||
182 | break; | ||
183 | } | ||
173 | flock.start = fl->fl_start; | 184 | flock.start = fl->fl_start; |
174 | if (fl->fl_end == OFFSET_MAX) | 185 | if (fl->fl_end == OFFSET_MAX) |
175 | flock.length = 0; | 186 | flock.length = 0; |
@@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) | |||
245 | 256 | ||
246 | /* convert posix lock to p9 tgetlock args */ | 257 | /* convert posix lock to p9 tgetlock args */ |
247 | memset(&glock, 0, sizeof(glock)); | 258 | memset(&glock, 0, sizeof(glock)); |
248 | glock.type = fl->fl_type; | 259 | glock.type = P9_LOCK_TYPE_UNLCK; |
249 | glock.start = fl->fl_start; | 260 | glock.start = fl->fl_start; |
250 | if (fl->fl_end == OFFSET_MAX) | 261 | if (fl->fl_end == OFFSET_MAX) |
251 | glock.length = 0; | 262 | glock.length = 0; |
@@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) | |||
257 | res = p9_client_getlock_dotl(fid, &glock); | 268 | res = p9_client_getlock_dotl(fid, &glock); |
258 | if (res < 0) | 269 | if (res < 0) |
259 | return res; | 270 | return res; |
260 | if (glock.type != F_UNLCK) { | 271 | /* map 9p lock type to os lock type */ |
261 | fl->fl_type = glock.type; | 272 | switch (glock.type) { |
273 | case P9_LOCK_TYPE_RDLCK: | ||
274 | fl->fl_type = F_RDLCK; | ||
275 | break; | ||
276 | case P9_LOCK_TYPE_WRLCK: | ||
277 | fl->fl_type = F_WRLCK; | ||
278 | break; | ||
279 | case P9_LOCK_TYPE_UNLCK: | ||
280 | fl->fl_type = F_UNLCK; | ||
281 | break; | ||
282 | } | ||
283 | if (glock.type != P9_LOCK_TYPE_UNLCK) { | ||
262 | fl->fl_start = glock.start; | 284 | fl->fl_start = glock.start; |
263 | if (glock.length == 0) | 285 | if (glock.length == 0) |
264 | fl->fl_end = OFFSET_MAX; | 286 | fl->fl_end = OFFSET_MAX; |
265 | else | 287 | else |
266 | fl->fl_end = glock.start + glock.length - 1; | 288 | fl->fl_end = glock.start + glock.length - 1; |
267 | fl->fl_pid = glock.proc_id; | 289 | fl->fl_pid = glock.proc_id; |
268 | } else | 290 | } |
269 | fl->fl_type = F_UNLCK; | ||
270 | |||
271 | return res; | 291 | return res; |
272 | } | 292 | } |
273 | 293 | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8bb5507e822f..e3c03db3c788 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) | |||
95 | /** | 95 | /** |
96 | * p9mode2unixmode- convert plan9 mode bits to unix mode bits | 96 | * p9mode2unixmode- convert plan9 mode bits to unix mode bits |
97 | * @v9ses: v9fs session information | 97 | * @v9ses: v9fs session information |
98 | * @mode: mode to convert | 98 | * @stat: p9_wstat from which mode need to be derived |
99 | * @rdev: major number, minor number in case of device files. | ||
99 | * | 100 | * |
100 | */ | 101 | */ |
101 | 102 | static int p9mode2unixmode(struct v9fs_session_info *v9ses, | |
102 | static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | 103 | struct p9_wstat *stat, dev_t *rdev) |
103 | { | 104 | { |
104 | int res; | 105 | int res; |
106 | int mode = stat->mode; | ||
105 | 107 | ||
106 | res = mode & 0777; | 108 | res = mode & S_IALLUGO; |
109 | *rdev = 0; | ||
107 | 110 | ||
108 | if ((mode & P9_DMDIR) == P9_DMDIR) | 111 | if ((mode & P9_DMDIR) == P9_DMDIR) |
109 | res |= S_IFDIR; | 112 | res |= S_IFDIR; |
@@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | |||
116 | && (v9ses->nodev == 0)) | 119 | && (v9ses->nodev == 0)) |
117 | res |= S_IFIFO; | 120 | res |= S_IFIFO; |
118 | else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) | 121 | else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) |
119 | && (v9ses->nodev == 0)) | 122 | && (v9ses->nodev == 0)) { |
120 | res |= S_IFBLK; | 123 | char type = 0, ext[32]; |
121 | else | 124 | int major = -1, minor = -1; |
125 | |||
126 | strncpy(ext, stat->extension, sizeof(ext)); | ||
127 | sscanf(ext, "%c %u %u", &type, &major, &minor); | ||
128 | switch (type) { | ||
129 | case 'c': | ||
130 | res |= S_IFCHR; | ||
131 | break; | ||
132 | case 'b': | ||
133 | res |= S_IFBLK; | ||
134 | break; | ||
135 | default: | ||
136 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
137 | "Unknown special type %c %s\n", type, | ||
138 | stat->extension); | ||
139 | }; | ||
140 | *rdev = MKDEV(major, minor); | ||
141 | } else | ||
122 | res |= S_IFREG; | 142 | res |= S_IFREG; |
123 | 143 | ||
124 | if (v9fs_proto_dotu(v9ses)) { | 144 | if (v9fs_proto_dotu(v9ses)) { |
@@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) | |||
131 | if ((mode & P9_DMSETVTX) == P9_DMSETVTX) | 151 | if ((mode & P9_DMSETVTX) == P9_DMSETVTX) |
132 | res |= S_ISVTX; | 152 | res |= S_ISVTX; |
133 | } | 153 | } |
134 | |||
135 | return res; | 154 | return res; |
136 | } | 155 | } |
137 | 156 | ||
@@ -242,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode) | |||
242 | } | 261 | } |
243 | 262 | ||
244 | int v9fs_init_inode(struct v9fs_session_info *v9ses, | 263 | int v9fs_init_inode(struct v9fs_session_info *v9ses, |
245 | struct inode *inode, int mode) | 264 | struct inode *inode, int mode, dev_t rdev) |
246 | { | 265 | { |
247 | int err = 0; | 266 | int err = 0; |
248 | 267 | ||
249 | inode_init_owner(inode, NULL, mode); | 268 | inode_init_owner(inode, NULL, mode); |
250 | inode->i_blocks = 0; | 269 | inode->i_blocks = 0; |
251 | inode->i_rdev = 0; | 270 | inode->i_rdev = rdev; |
252 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 271 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
253 | inode->i_mapping->a_ops = &v9fs_addr_operations; | 272 | inode->i_mapping->a_ops = &v9fs_addr_operations; |
254 | 273 | ||
@@ -335,7 +354,7 @@ error: | |||
335 | * | 354 | * |
336 | */ | 355 | */ |
337 | 356 | ||
338 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | 357 | struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev) |
339 | { | 358 | { |
340 | int err; | 359 | int err; |
341 | struct inode *inode; | 360 | struct inode *inode; |
@@ -348,7 +367,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
348 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | 367 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); |
349 | return ERR_PTR(-ENOMEM); | 368 | return ERR_PTR(-ENOMEM); |
350 | } | 369 | } |
351 | err = v9fs_init_inode(v9ses, inode, mode); | 370 | err = v9fs_init_inode(v9ses, inode, mode, rdev); |
352 | if (err) { | 371 | if (err) { |
353 | iput(inode); | 372 | iput(inode); |
354 | return ERR_PTR(err); | 373 | return ERR_PTR(err); |
@@ -435,11 +454,12 @@ void v9fs_evict_inode(struct inode *inode) | |||
435 | static int v9fs_test_inode(struct inode *inode, void *data) | 454 | static int v9fs_test_inode(struct inode *inode, void *data) |
436 | { | 455 | { |
437 | int umode; | 456 | int umode; |
457 | dev_t rdev; | ||
438 | struct v9fs_inode *v9inode = V9FS_I(inode); | 458 | struct v9fs_inode *v9inode = V9FS_I(inode); |
439 | struct p9_wstat *st = (struct p9_wstat *)data; | 459 | struct p9_wstat *st = (struct p9_wstat *)data; |
440 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); | 460 | struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); |
441 | 461 | ||
442 | umode = p9mode2unixmode(v9ses, st->mode); | 462 | umode = p9mode2unixmode(v9ses, st, &rdev); |
443 | /* don't match inode of different type */ | 463 | /* don't match inode of different type */ |
444 | if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) | 464 | if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) |
445 | return 0; | 465 | return 0; |
@@ -473,6 +493,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, | |||
473 | struct p9_wstat *st, | 493 | struct p9_wstat *st, |
474 | int new) | 494 | int new) |
475 | { | 495 | { |
496 | dev_t rdev; | ||
476 | int retval, umode; | 497 | int retval, umode; |
477 | unsigned long i_ino; | 498 | unsigned long i_ino; |
478 | struct inode *inode; | 499 | struct inode *inode; |
@@ -496,8 +517,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, | |||
496 | * later. | 517 | * later. |
497 | */ | 518 | */ |
498 | inode->i_ino = i_ino; | 519 | inode->i_ino = i_ino; |
499 | umode = p9mode2unixmode(v9ses, st->mode); | 520 | umode = p9mode2unixmode(v9ses, st, &rdev); |
500 | retval = v9fs_init_inode(v9ses, inode, umode); | 521 | retval = v9fs_init_inode(v9ses, inode, umode, rdev); |
501 | if (retval) | 522 | if (retval) |
502 | goto error; | 523 | goto error; |
503 | 524 | ||
@@ -532,6 +553,19 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
532 | } | 553 | } |
533 | 554 | ||
534 | /** | 555 | /** |
556 | * v9fs_at_to_dotl_flags- convert Linux specific AT flags to | ||
557 | * plan 9 AT flag. | ||
558 | * @flags: flags to convert | ||
559 | */ | ||
560 | static int v9fs_at_to_dotl_flags(int flags) | ||
561 | { | ||
562 | int rflags = 0; | ||
563 | if (flags & AT_REMOVEDIR) | ||
564 | rflags |= P9_DOTL_AT_REMOVEDIR; | ||
565 | return rflags; | ||
566 | } | ||
567 | |||
568 | /** | ||
535 | * v9fs_remove - helper function to remove files and directories | 569 | * v9fs_remove - helper function to remove files and directories |
536 | * @dir: directory inode that is being deleted | 570 | * @dir: directory inode that is being deleted |
537 | * @dentry: dentry that is being deleted | 571 | * @dentry: dentry that is being deleted |
@@ -558,7 +592,8 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) | |||
558 | return retval; | 592 | return retval; |
559 | } | 593 | } |
560 | if (v9fs_proto_dotl(v9ses)) | 594 | if (v9fs_proto_dotl(v9ses)) |
561 | retval = p9_client_unlinkat(dfid, dentry->d_name.name, flags); | 595 | retval = p9_client_unlinkat(dfid, dentry->d_name.name, |
596 | v9fs_at_to_dotl_flags(flags)); | ||
562 | if (retval == -EOPNOTSUPP) { | 597 | if (retval == -EOPNOTSUPP) { |
563 | /* Try the one based on path */ | 598 | /* Try the one based on path */ |
564 | v9fid = v9fs_fid_clone(dentry); | 599 | v9fid = v9fs_fid_clone(dentry); |
@@ -645,13 +680,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
645 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 680 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
646 | goto error; | 681 | goto error; |
647 | } | 682 | } |
648 | d_instantiate(dentry, inode); | ||
649 | err = v9fs_fid_add(dentry, fid); | 683 | err = v9fs_fid_add(dentry, fid); |
650 | if (err < 0) | 684 | if (err < 0) |
651 | goto error; | 685 | goto error; |
652 | 686 | d_instantiate(dentry, inode); | |
653 | return ofid; | 687 | return ofid; |
654 | |||
655 | error: | 688 | error: |
656 | if (ofid) | 689 | if (ofid) |
657 | p9_client_clunk(ofid); | 690 | p9_client_clunk(ofid); |
@@ -792,6 +825,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
792 | struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | 825 | struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, |
793 | struct nameidata *nameidata) | 826 | struct nameidata *nameidata) |
794 | { | 827 | { |
828 | struct dentry *res; | ||
795 | struct super_block *sb; | 829 | struct super_block *sb; |
796 | struct v9fs_session_info *v9ses; | 830 | struct v9fs_session_info *v9ses; |
797 | struct p9_fid *dfid, *fid; | 831 | struct p9_fid *dfid, *fid; |
@@ -823,22 +857,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
823 | 857 | ||
824 | return ERR_PTR(result); | 858 | return ERR_PTR(result); |
825 | } | 859 | } |
826 | 860 | /* | |
827 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); | 861 | * Make sure we don't use a wrong inode due to parallel |
862 | * unlink. For cached mode create calls request for new | ||
863 | * inode. But with cache disabled, lookup should do this. | ||
864 | */ | ||
865 | if (v9ses->cache) | ||
866 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); | ||
867 | else | ||
868 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | ||
828 | if (IS_ERR(inode)) { | 869 | if (IS_ERR(inode)) { |
829 | result = PTR_ERR(inode); | 870 | result = PTR_ERR(inode); |
830 | inode = NULL; | 871 | inode = NULL; |
831 | goto error; | 872 | goto error; |
832 | } | 873 | } |
833 | |||
834 | result = v9fs_fid_add(dentry, fid); | 874 | result = v9fs_fid_add(dentry, fid); |
835 | if (result < 0) | 875 | if (result < 0) |
836 | goto error_iput; | 876 | goto error_iput; |
837 | |||
838 | inst_out: | 877 | inst_out: |
839 | d_add(dentry, inode); | 878 | /* |
840 | return NULL; | 879 | * If we had a rename on the server and a parallel lookup |
841 | 880 | * for the new name, then make sure we instantiate with | |
881 | * the new name. ie look up for a/b, while on server somebody | ||
882 | * moved b under k and client parallely did a lookup for | ||
883 | * k/b. | ||
884 | */ | ||
885 | res = d_materialise_unique(dentry, inode); | ||
886 | if (!IS_ERR(res)) | ||
887 | return res; | ||
888 | result = PTR_ERR(res); | ||
842 | error_iput: | 889 | error_iput: |
843 | iput(inode); | 890 | iput(inode); |
844 | error: | 891 | error: |
@@ -1002,7 +1049,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1002 | return PTR_ERR(st); | 1049 | return PTR_ERR(st); |
1003 | 1050 | ||
1004 | v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); | 1051 | v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); |
1005 | generic_fillattr(dentry->d_inode, stat); | 1052 | generic_fillattr(dentry->d_inode, stat); |
1006 | 1053 | ||
1007 | p9stat_free(st); | 1054 | p9stat_free(st); |
1008 | kfree(st); | 1055 | kfree(st); |
@@ -1086,6 +1133,7 @@ void | |||
1086 | v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | 1133 | v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, |
1087 | struct super_block *sb) | 1134 | struct super_block *sb) |
1088 | { | 1135 | { |
1136 | mode_t mode; | ||
1089 | char ext[32]; | 1137 | char ext[32]; |
1090 | char tag_name[14]; | 1138 | char tag_name[14]; |
1091 | unsigned int i_nlink; | 1139 | unsigned int i_nlink; |
@@ -1121,31 +1169,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |||
1121 | inode->i_nlink = i_nlink; | 1169 | inode->i_nlink = i_nlink; |
1122 | } | 1170 | } |
1123 | } | 1171 | } |
1124 | inode->i_mode = p9mode2unixmode(v9ses, stat->mode); | 1172 | mode = stat->mode & S_IALLUGO; |
1125 | if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { | 1173 | mode |= inode->i_mode & ~S_IALLUGO; |
1126 | char type = 0; | 1174 | inode->i_mode = mode; |
1127 | int major = -1; | ||
1128 | int minor = -1; | ||
1129 | |||
1130 | strncpy(ext, stat->extension, sizeof(ext)); | ||
1131 | sscanf(ext, "%c %u %u", &type, &major, &minor); | ||
1132 | switch (type) { | ||
1133 | case 'c': | ||
1134 | inode->i_mode &= ~S_IFBLK; | ||
1135 | inode->i_mode |= S_IFCHR; | ||
1136 | break; | ||
1137 | case 'b': | ||
1138 | break; | ||
1139 | default: | ||
1140 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
1141 | "Unknown special type %c %s\n", type, | ||
1142 | stat->extension); | ||
1143 | }; | ||
1144 | inode->i_rdev = MKDEV(major, minor); | ||
1145 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
1146 | } else | ||
1147 | inode->i_rdev = 0; | ||
1148 | |||
1149 | i_size_write(inode, stat->length); | 1175 | i_size_write(inode, stat->length); |
1150 | 1176 | ||
1151 | /* not real number of blocks, but 512 byte ones ... */ | 1177 | /* not real number of blocks, but 512 byte ones ... */ |
@@ -1411,6 +1437,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1411 | 1437 | ||
1412 | int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) | 1438 | int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) |
1413 | { | 1439 | { |
1440 | int umode; | ||
1441 | dev_t rdev; | ||
1414 | loff_t i_size; | 1442 | loff_t i_size; |
1415 | struct p9_wstat *st; | 1443 | struct p9_wstat *st; |
1416 | struct v9fs_session_info *v9ses; | 1444 | struct v9fs_session_info *v9ses; |
@@ -1419,6 +1447,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) | |||
1419 | st = p9_client_stat(fid); | 1447 | st = p9_client_stat(fid); |
1420 | if (IS_ERR(st)) | 1448 | if (IS_ERR(st)) |
1421 | return PTR_ERR(st); | 1449 | return PTR_ERR(st); |
1450 | /* | ||
1451 | * Don't update inode if the file type is different | ||
1452 | */ | ||
1453 | umode = p9mode2unixmode(v9ses, st, &rdev); | ||
1454 | if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) | ||
1455 | goto out; | ||
1422 | 1456 | ||
1423 | spin_lock(&inode->i_lock); | 1457 | spin_lock(&inode->i_lock); |
1424 | /* | 1458 | /* |
@@ -1430,6 +1464,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) | |||
1430 | if (v9ses->cache) | 1464 | if (v9ses->cache) |
1431 | inode->i_size = i_size; | 1465 | inode->i_size = i_size; |
1432 | spin_unlock(&inode->i_lock); | 1466 | spin_unlock(&inode->i_lock); |
1467 | out: | ||
1433 | p9stat_free(st); | 1468 | p9stat_free(st); |
1434 | kfree(st); | 1469 | kfree(st); |
1435 | return 0; | 1470 | return 0; |
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 9a26dce5a99f..aded79fcd5cf 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c | |||
@@ -153,7 +153,8 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, | |||
153 | * later. | 153 | * later. |
154 | */ | 154 | */ |
155 | inode->i_ino = i_ino; | 155 | inode->i_ino = i_ino; |
156 | retval = v9fs_init_inode(v9ses, inode, st->st_mode); | 156 | retval = v9fs_init_inode(v9ses, inode, |
157 | st->st_mode, new_decode_dev(st->st_rdev)); | ||
157 | if (retval) | 158 | if (retval) |
158 | goto error; | 159 | goto error; |
159 | 160 | ||
@@ -190,6 +191,58 @@ v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
190 | return inode; | 191 | return inode; |
191 | } | 192 | } |
192 | 193 | ||
194 | struct dotl_openflag_map { | ||
195 | int open_flag; | ||
196 | int dotl_flag; | ||
197 | }; | ||
198 | |||
199 | static int v9fs_mapped_dotl_flags(int flags) | ||
200 | { | ||
201 | int i; | ||
202 | int rflags = 0; | ||
203 | struct dotl_openflag_map dotl_oflag_map[] = { | ||
204 | { O_CREAT, P9_DOTL_CREATE }, | ||
205 | { O_EXCL, P9_DOTL_EXCL }, | ||
206 | { O_NOCTTY, P9_DOTL_NOCTTY }, | ||
207 | { O_TRUNC, P9_DOTL_TRUNC }, | ||
208 | { O_APPEND, P9_DOTL_APPEND }, | ||
209 | { O_NONBLOCK, P9_DOTL_NONBLOCK }, | ||
210 | { O_DSYNC, P9_DOTL_DSYNC }, | ||
211 | { FASYNC, P9_DOTL_FASYNC }, | ||
212 | { O_DIRECT, P9_DOTL_DIRECT }, | ||
213 | { O_LARGEFILE, P9_DOTL_LARGEFILE }, | ||
214 | { O_DIRECTORY, P9_DOTL_DIRECTORY }, | ||
215 | { O_NOFOLLOW, P9_DOTL_NOFOLLOW }, | ||
216 | { O_NOATIME, P9_DOTL_NOATIME }, | ||
217 | { O_CLOEXEC, P9_DOTL_CLOEXEC }, | ||
218 | { O_SYNC, P9_DOTL_SYNC}, | ||
219 | }; | ||
220 | for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { | ||
221 | if (flags & dotl_oflag_map[i].open_flag) | ||
222 | rflags |= dotl_oflag_map[i].dotl_flag; | ||
223 | } | ||
224 | return rflags; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * v9fs_open_to_dotl_flags- convert Linux specific open flags to | ||
229 | * plan 9 open flag. | ||
230 | * @flags: flags to convert | ||
231 | */ | ||
232 | int v9fs_open_to_dotl_flags(int flags) | ||
233 | { | ||
234 | int rflags = 0; | ||
235 | |||
236 | /* | ||
237 | * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY | ||
238 | * and P9_DOTL_NOACCESS | ||
239 | */ | ||
240 | rflags |= flags & O_ACCMODE; | ||
241 | rflags |= v9fs_mapped_dotl_flags(flags); | ||
242 | |||
243 | return rflags; | ||
244 | } | ||
245 | |||
193 | /** | 246 | /** |
194 | * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. | 247 | * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. |
195 | * @dir: directory inode that is being created | 248 | * @dir: directory inode that is being created |
@@ -206,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
206 | int err = 0; | 259 | int err = 0; |
207 | gid_t gid; | 260 | gid_t gid; |
208 | int flags; | 261 | int flags; |
209 | mode_t mode; | 262 | umode_t mode; |
210 | char *name = NULL; | 263 | char *name = NULL; |
211 | struct file *filp; | 264 | struct file *filp; |
212 | struct p9_qid qid; | 265 | struct p9_qid qid; |
@@ -258,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
258 | "Failed to get acl values in creat %d\n", err); | 311 | "Failed to get acl values in creat %d\n", err); |
259 | goto error; | 312 | goto error; |
260 | } | 313 | } |
261 | err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); | 314 | err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), |
315 | mode, gid, &qid); | ||
262 | if (err < 0) { | 316 | if (err < 0) { |
263 | P9_DPRINTK(P9_DEBUG_VFS, | 317 | P9_DPRINTK(P9_DEBUG_VFS, |
264 | "p9_client_open_dotl failed in creat %d\n", | 318 | "p9_client_open_dotl failed in creat %d\n", |
@@ -281,10 +335,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
281 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 335 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
282 | goto error; | 336 | goto error; |
283 | } | 337 | } |
284 | d_instantiate(dentry, inode); | ||
285 | err = v9fs_fid_add(dentry, fid); | 338 | err = v9fs_fid_add(dentry, fid); |
286 | if (err < 0) | 339 | if (err < 0) |
287 | goto error; | 340 | goto error; |
341 | d_instantiate(dentry, inode); | ||
288 | 342 | ||
289 | /* Now set the ACL based on the default value */ | 343 | /* Now set the ACL based on the default value */ |
290 | v9fs_set_create_acl(dentry, &dacl, &pacl); | 344 | v9fs_set_create_acl(dentry, &dacl, &pacl); |
@@ -348,7 +402,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
348 | struct p9_fid *fid = NULL, *dfid = NULL; | 402 | struct p9_fid *fid = NULL, *dfid = NULL; |
349 | gid_t gid; | 403 | gid_t gid; |
350 | char *name; | 404 | char *name; |
351 | mode_t mode; | 405 | umode_t mode; |
352 | struct inode *inode; | 406 | struct inode *inode; |
353 | struct p9_qid qid; | 407 | struct p9_qid qid; |
354 | struct dentry *dir_dentry; | 408 | struct dentry *dir_dentry; |
@@ -403,10 +457,10 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
403 | err); | 457 | err); |
404 | goto error; | 458 | goto error; |
405 | } | 459 | } |
406 | d_instantiate(dentry, inode); | ||
407 | err = v9fs_fid_add(dentry, fid); | 460 | err = v9fs_fid_add(dentry, fid); |
408 | if (err < 0) | 461 | if (err < 0) |
409 | goto error; | 462 | goto error; |
463 | d_instantiate(dentry, inode); | ||
410 | fid = NULL; | 464 | fid = NULL; |
411 | } else { | 465 | } else { |
412 | /* | 466 | /* |
@@ -414,7 +468,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
414 | * inode with stat. We need to get an inode | 468 | * inode with stat. We need to get an inode |
415 | * so that we can set the acl with dentry | 469 | * so that we can set the acl with dentry |
416 | */ | 470 | */ |
417 | inode = v9fs_get_inode(dir->i_sb, mode); | 471 | inode = v9fs_get_inode(dir->i_sb, mode, 0); |
418 | if (IS_ERR(inode)) { | 472 | if (IS_ERR(inode)) { |
419 | err = PTR_ERR(inode); | 473 | err = PTR_ERR(inode); |
420 | goto error; | 474 | goto error; |
@@ -540,6 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) | |||
540 | void | 594 | void |
541 | v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) | 595 | v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) |
542 | { | 596 | { |
597 | mode_t mode; | ||
543 | struct v9fs_inode *v9inode = V9FS_I(inode); | 598 | struct v9fs_inode *v9inode = V9FS_I(inode); |
544 | 599 | ||
545 | if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { | 600 | if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { |
@@ -552,11 +607,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) | |||
552 | inode->i_uid = stat->st_uid; | 607 | inode->i_uid = stat->st_uid; |
553 | inode->i_gid = stat->st_gid; | 608 | inode->i_gid = stat->st_gid; |
554 | inode->i_nlink = stat->st_nlink; | 609 | inode->i_nlink = stat->st_nlink; |
555 | inode->i_mode = stat->st_mode; | ||
556 | inode->i_rdev = new_decode_dev(stat->st_rdev); | ||
557 | 610 | ||
558 | if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) | 611 | mode = stat->st_mode & S_IALLUGO; |
559 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | 612 | mode |= inode->i_mode & ~S_IALLUGO; |
613 | inode->i_mode = mode; | ||
560 | 614 | ||
561 | i_size_write(inode, stat->st_size); | 615 | i_size_write(inode, stat->st_size); |
562 | inode->i_blocks = stat->st_blocks; | 616 | inode->i_blocks = stat->st_blocks; |
@@ -657,14 +711,14 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
657 | err); | 711 | err); |
658 | goto error; | 712 | goto error; |
659 | } | 713 | } |
660 | d_instantiate(dentry, inode); | ||
661 | err = v9fs_fid_add(dentry, fid); | 714 | err = v9fs_fid_add(dentry, fid); |
662 | if (err < 0) | 715 | if (err < 0) |
663 | goto error; | 716 | goto error; |
717 | d_instantiate(dentry, inode); | ||
664 | fid = NULL; | 718 | fid = NULL; |
665 | } else { | 719 | } else { |
666 | /* Not in cached mode. No need to populate inode with stat */ | 720 | /* Not in cached mode. No need to populate inode with stat */ |
667 | inode = v9fs_get_inode(dir->i_sb, S_IFLNK); | 721 | inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0); |
668 | if (IS_ERR(inode)) { | 722 | if (IS_ERR(inode)) { |
669 | err = PTR_ERR(inode); | 723 | err = PTR_ERR(inode); |
670 | goto error; | 724 | goto error; |
@@ -751,7 +805,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
751 | int err; | 805 | int err; |
752 | gid_t gid; | 806 | gid_t gid; |
753 | char *name; | 807 | char *name; |
754 | mode_t mode; | 808 | umode_t mode; |
755 | struct v9fs_session_info *v9ses; | 809 | struct v9fs_session_info *v9ses; |
756 | struct p9_fid *fid = NULL, *dfid = NULL; | 810 | struct p9_fid *fid = NULL, *dfid = NULL; |
757 | struct inode *inode; | 811 | struct inode *inode; |
@@ -810,17 +864,17 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
810 | err); | 864 | err); |
811 | goto error; | 865 | goto error; |
812 | } | 866 | } |
813 | d_instantiate(dentry, inode); | ||
814 | err = v9fs_fid_add(dentry, fid); | 867 | err = v9fs_fid_add(dentry, fid); |
815 | if (err < 0) | 868 | if (err < 0) |
816 | goto error; | 869 | goto error; |
870 | d_instantiate(dentry, inode); | ||
817 | fid = NULL; | 871 | fid = NULL; |
818 | } else { | 872 | } else { |
819 | /* | 873 | /* |
820 | * Not in cached mode. No need to populate inode with stat. | 874 | * Not in cached mode. No need to populate inode with stat. |
821 | * socket syscall returns a fd, so we need instantiate | 875 | * socket syscall returns a fd, so we need instantiate |
822 | */ | 876 | */ |
823 | inode = v9fs_get_inode(dir->i_sb, mode); | 877 | inode = v9fs_get_inode(dir->i_sb, mode, rdev); |
824 | if (IS_ERR(inode)) { | 878 | if (IS_ERR(inode)) { |
825 | err = PTR_ERR(inode); | 879 | err = PTR_ERR(inode); |
826 | goto error; | 880 | goto error; |
@@ -886,6 +940,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) | |||
886 | st = p9_client_getattr_dotl(fid, P9_STATS_ALL); | 940 | st = p9_client_getattr_dotl(fid, P9_STATS_ALL); |
887 | if (IS_ERR(st)) | 941 | if (IS_ERR(st)) |
888 | return PTR_ERR(st); | 942 | return PTR_ERR(st); |
943 | /* | ||
944 | * Don't update inode if the file type is different | ||
945 | */ | ||
946 | if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) | ||
947 | goto out; | ||
889 | 948 | ||
890 | spin_lock(&inode->i_lock); | 949 | spin_lock(&inode->i_lock); |
891 | /* | 950 | /* |
@@ -897,6 +956,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) | |||
897 | if (v9ses->cache) | 956 | if (v9ses->cache) |
898 | inode->i_size = i_size; | 957 | inode->i_size = i_size; |
899 | spin_unlock(&inode->i_lock); | 958 | spin_unlock(&inode->i_lock); |
959 | out: | ||
900 | kfree(st); | 960 | kfree(st); |
901 | return 0; | 961 | return 0; |
902 | } | 962 | } |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index feef6cdc1fd2..c70251d47ed1 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, | |||
149 | else | 149 | else |
150 | sb->s_d_op = &v9fs_dentry_operations; | 150 | sb->s_d_op = &v9fs_dentry_operations; |
151 | 151 | ||
152 | inode = v9fs_get_inode(sb, S_IFDIR | mode); | 152 | inode = v9fs_get_inode(sb, S_IFDIR | mode, 0); |
153 | if (IS_ERR(inode)) { | 153 | if (IS_ERR(inode)) { |
154 | retval = PTR_ERR(inode); | 154 | retval = PTR_ERR(inode); |
155 | goto release_sb; | 155 | goto release_sb; |
diff --git a/fs/Kconfig b/fs/Kconfig index 19891aab9c6e..9fe0b349f4cd 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL | |||
127 | select TMPFS_XATTR | 127 | select TMPFS_XATTR |
128 | select GENERIC_ACL | 128 | select GENERIC_ACL |
129 | help | 129 | help |
130 | POSIX Access Control Lists (ACLs) support permissions for users and | 130 | POSIX Access Control Lists (ACLs) support additional access rights |
131 | groups beyond the owner/group/world scheme. | 131 | for users and groups beyond the standard owner/group/world scheme, |
132 | and this option selects support for ACLs specifically for tmpfs | ||
133 | filesystems. | ||
134 | |||
135 | If you've selected TMPFS, it's possible that you'll also need | ||
136 | this option as there are a number of Linux distros that require | ||
137 | POSIX ACL support under /dev for certain features to work properly. | ||
138 | For example, some distros need this feature for ALSA-related /dev | ||
139 | files for sound to work properly. In short, if you're not sure, | ||
140 | say Y. | ||
132 | 141 | ||
133 | To learn more about Access Control Lists, visit the POSIX ACLs for | 142 | To learn more about Access Control Lists, visit the POSIX ACLs for |
134 | Linux website <http://acl.bestbits.at/>. | 143 | Linux website <http://acl.bestbits.at/>. |
135 | 144 | ||
136 | If you don't know what Access Control Lists are, say N. | ||
137 | |||
138 | config TMPFS_XATTR | 145 | config TMPFS_XATTR |
139 | bool "Tmpfs extended attributes" | 146 | bool "Tmpfs extended attributes" |
140 | depends on TMPFS | 147 | depends on TMPFS |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 4d433d34736f..f11e43ed907d 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -187,7 +187,7 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd); | |||
187 | */ | 187 | */ |
188 | static struct inode *anon_inode_mkinode(void) | 188 | static struct inode *anon_inode_mkinode(void) |
189 | { | 189 | { |
190 | struct inode *inode = new_inode(anon_inode_mnt->mnt_sb); | 190 | struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb); |
191 | 191 | ||
192 | if (!inode) | 192 | if (!inode) |
193 | return ERR_PTR(-ENOMEM); | 193 | return ERR_PTR(-ENOMEM); |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 475f9c597cb7..326dc08d3e3f 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -39,27 +39,17 @@ | |||
39 | 39 | ||
40 | /* #define DEBUG */ | 40 | /* #define DEBUG */ |
41 | 41 | ||
42 | #ifdef DEBUG | 42 | #define DPRINTK(fmt, ...) \ |
43 | #define DPRINTK(fmt, args...) \ | 43 | pr_debug("pid %d: %s: " fmt "\n", \ |
44 | do { \ | 44 | current->pid, __func__, ##__VA_ARGS__) |
45 | printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \ | 45 | |
46 | current->pid, __func__, ##args); \ | 46 | #define AUTOFS_WARN(fmt, ...) \ |
47 | } while (0) | ||
48 | #else | ||
49 | #define DPRINTK(fmt, args...) do {} while (0) | ||
50 | #endif | ||
51 | |||
52 | #define AUTOFS_WARN(fmt, args...) \ | ||
53 | do { \ | ||
54 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ | 47 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ |
55 | current->pid, __func__, ##args); \ | 48 | current->pid, __func__, ##__VA_ARGS__) |
56 | } while (0) | ||
57 | 49 | ||
58 | #define AUTOFS_ERROR(fmt, args...) \ | 50 | #define AUTOFS_ERROR(fmt, ...) \ |
59 | do { \ | ||
60 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ | 51 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ |
61 | current->pid, __func__, ##args); \ | 52 | current->pid, __func__, ##__VA_ARGS__) |
62 | } while (0) | ||
63 | 53 | ||
64 | /* Unified info structure. This is pointed to by both the dentry and | 54 | /* Unified info structure. This is pointed to by both the dentry and |
65 | inode structures. Each file in the filesystem has an instance of this | 55 | inode structures. Each file in the filesystem has an instance of this |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 25435987d6ae..e1fbdeef85db 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
104 | size_t pktsz; | 104 | size_t pktsz; |
105 | 105 | ||
106 | DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", | 106 | DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", |
107 | wq->wait_queue_token, wq->name.len, wq->name.name, type); | 107 | (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type); |
108 | 108 | ||
109 | memset(&pkt,0,sizeof pkt); /* For security reasons */ | 109 | memset(&pkt,0,sizeof pkt); /* For security reasons */ |
110 | 110 | ||
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 54b8c28bebc8..720d885e8dca 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
474 | befs_data_stream *data = &befs_ino->i_data.ds; | 474 | befs_data_stream *data = &befs_ino->i_data.ds; |
475 | befs_off_t len = data->size; | 475 | befs_off_t len = data->size; |
476 | 476 | ||
477 | befs_debug(sb, "Follow long symlink"); | 477 | if (len == 0) { |
478 | 478 | befs_error(sb, "Long symlink with illegal length"); | |
479 | link = kmalloc(len, GFP_NOFS); | ||
480 | if (!link) { | ||
481 | link = ERR_PTR(-ENOMEM); | ||
482 | } else if (befs_read_lsymlink(sb, data, link, len) != len) { | ||
483 | kfree(link); | ||
484 | befs_error(sb, "Failed to read entire long symlink"); | ||
485 | link = ERR_PTR(-EIO); | 479 | link = ERR_PTR(-EIO); |
486 | } else { | 480 | } else { |
487 | link[len - 1] = '\0'; | 481 | befs_debug(sb, "Follow long symlink"); |
482 | |||
483 | link = kmalloc(len, GFP_NOFS); | ||
484 | if (!link) { | ||
485 | link = ERR_PTR(-ENOMEM); | ||
486 | } else if (befs_read_lsymlink(sb, data, link, len) != len) { | ||
487 | kfree(link); | ||
488 | befs_error(sb, "Failed to read entire long symlink"); | ||
489 | link = ERR_PTR(-EIO); | ||
490 | } else { | ||
491 | link[len - 1] = '\0'; | ||
492 | } | ||
488 | } | 493 | } |
489 | } else { | 494 | } else { |
490 | link = befs_ino->i_data.symlink; | 495 | link = befs_ino->i_data.symlink; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index c62fb84944d5..95f786ec7f08 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -44,24 +44,28 @@ inline struct block_device *I_BDEV(struct inode *inode) | |||
44 | { | 44 | { |
45 | return &BDEV_I(inode)->bdev; | 45 | return &BDEV_I(inode)->bdev; |
46 | } | 46 | } |
47 | |||
48 | EXPORT_SYMBOL(I_BDEV); | 47 | EXPORT_SYMBOL(I_BDEV); |
49 | 48 | ||
50 | /* | 49 | /* |
51 | * move the inode from it's current bdi to the a new bdi. if the inode is dirty | 50 | * Move the inode from its current bdi to a new bdi. If the inode is dirty we |
52 | * we need to move it onto the dirty list of @dst so that the inode is always | 51 | * need to move it onto the dirty list of @dst so that the inode is always on |
53 | * on the right list. | 52 | * the right list. |
54 | */ | 53 | */ |
55 | static void bdev_inode_switch_bdi(struct inode *inode, | 54 | static void bdev_inode_switch_bdi(struct inode *inode, |
56 | struct backing_dev_info *dst) | 55 | struct backing_dev_info *dst) |
57 | { | 56 | { |
58 | spin_lock(&inode_wb_list_lock); | 57 | struct backing_dev_info *old = inode->i_data.backing_dev_info; |
58 | |||
59 | if (unlikely(dst == old)) /* deadlock avoidance */ | ||
60 | return; | ||
61 | bdi_lock_two(&old->wb, &dst->wb); | ||
59 | spin_lock(&inode->i_lock); | 62 | spin_lock(&inode->i_lock); |
60 | inode->i_data.backing_dev_info = dst; | 63 | inode->i_data.backing_dev_info = dst; |
61 | if (inode->i_state & I_DIRTY) | 64 | if (inode->i_state & I_DIRTY) |
62 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | 65 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); |
63 | spin_unlock(&inode->i_lock); | 66 | spin_unlock(&inode->i_lock); |
64 | spin_unlock(&inode_wb_list_lock); | 67 | spin_unlock(&old->wb.list_lock); |
68 | spin_unlock(&dst->wb.list_lock); | ||
65 | } | 69 | } |
66 | 70 | ||
67 | static sector_t max_block(struct block_device *bdev) | 71 | static sector_t max_block(struct block_device *bdev) |
@@ -383,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) | |||
383 | struct inode *bd_inode = filp->f_mapping->host; | 387 | struct inode *bd_inode = filp->f_mapping->host; |
384 | struct block_device *bdev = I_BDEV(bd_inode); | 388 | struct block_device *bdev = I_BDEV(bd_inode); |
385 | int error; | 389 | int error; |
390 | |||
391 | error = filemap_write_and_wait_range(filp->f_mapping, start, end); | ||
392 | if (error) | ||
393 | return error; | ||
386 | 394 | ||
387 | /* | 395 | /* |
388 | * There is no need to serialise calls to blkdev_issue_flush with | 396 | * There is no need to serialise calls to blkdev_issue_flush with |
@@ -548,6 +556,7 @@ struct block_device *bdget(dev_t dev) | |||
548 | 556 | ||
549 | if (inode->i_state & I_NEW) { | 557 | if (inode->i_state & I_NEW) { |
550 | bdev->bd_contains = NULL; | 558 | bdev->bd_contains = NULL; |
559 | bdev->bd_super = NULL; | ||
551 | bdev->bd_inode = inode; | 560 | bdev->bd_inode = inode; |
552 | bdev->bd_block_size = (1 << inode->i_blkbits); | 561 | bdev->bd_block_size = (1 << inode->i_blkbits); |
553 | bdev->bd_part_count = 0; | 562 | bdev->bd_part_count = 0; |
@@ -1420,6 +1429,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1420 | WARN_ON_ONCE(bdev->bd_holders); | 1429 | WARN_ON_ONCE(bdev->bd_holders); |
1421 | sync_blockdev(bdev); | 1430 | sync_blockdev(bdev); |
1422 | kill_bdev(bdev); | 1431 | kill_bdev(bdev); |
1432 | /* ->release can cause the old bdi to disappear, | ||
1433 | * so must switch it out first | ||
1434 | */ | ||
1435 | bdev_inode_switch_bdi(bdev->bd_inode, | ||
1436 | &default_backing_dev_info); | ||
1423 | } | 1437 | } |
1424 | if (bdev->bd_contains == bdev) { | 1438 | if (bdev->bd_contains == bdev) { |
1425 | if (disk->fops->release) | 1439 | if (disk->fops->release) |
@@ -1433,8 +1447,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1433 | disk_put_part(bdev->bd_part); | 1447 | disk_put_part(bdev->bd_part); |
1434 | bdev->bd_part = NULL; | 1448 | bdev->bd_part = NULL; |
1435 | bdev->bd_disk = NULL; | 1449 | bdev->bd_disk = NULL; |
1436 | bdev_inode_switch_bdi(bdev->bd_inode, | ||
1437 | &default_backing_dev_info); | ||
1438 | if (bdev != bdev->bd_contains) | 1450 | if (bdev != bdev->bd_contains) |
1439 | victim = bdev->bd_contains; | 1451 | victim = bdev->bd_contains; |
1440 | bdev->bd_contains = NULL; | 1452 | bdev->bd_contains = NULL; |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9b72dcf1cd25..40e6ac08c21f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o |
11 | |||
12 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 65a735d8f6e4..eb159aaa5a11 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -28,8 +28,6 @@ | |||
28 | #include "btrfs_inode.h" | 28 | #include "btrfs_inode.h" |
29 | #include "xattr.h" | 29 | #include "xattr.h" |
30 | 30 | ||
31 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | ||
32 | |||
33 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | 31 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type) |
34 | { | 32 | { |
35 | int size; | 33 | int size; |
@@ -111,7 +109,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
111 | int ret, size = 0; | 109 | int ret, size = 0; |
112 | const char *name; | 110 | const char *name; |
113 | char *value = NULL; | 111 | char *value = NULL; |
114 | mode_t mode; | ||
115 | 112 | ||
116 | if (acl) { | 113 | if (acl) { |
117 | ret = posix_acl_valid(acl); | 114 | ret = posix_acl_valid(acl); |
@@ -122,13 +119,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
122 | 119 | ||
123 | switch (type) { | 120 | switch (type) { |
124 | case ACL_TYPE_ACCESS: | 121 | case ACL_TYPE_ACCESS: |
125 | mode = inode->i_mode; | ||
126 | name = POSIX_ACL_XATTR_ACCESS; | 122 | name = POSIX_ACL_XATTR_ACCESS; |
127 | if (acl) { | 123 | if (acl) { |
128 | ret = posix_acl_equiv_mode(acl, &mode); | 124 | ret = posix_acl_equiv_mode(acl, &inode->i_mode); |
129 | if (ret < 0) | 125 | if (ret < 0) |
130 | return ret; | 126 | return ret; |
131 | inode->i_mode = mode; | ||
132 | } | 127 | } |
133 | ret = 0; | 128 | ret = 0; |
134 | break; | 129 | break; |
@@ -222,19 +217,16 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, | |||
222 | } | 217 | } |
223 | 218 | ||
224 | if (IS_POSIXACL(dir) && acl) { | 219 | if (IS_POSIXACL(dir) && acl) { |
225 | mode_t mode = inode->i_mode; | ||
226 | |||
227 | if (S_ISDIR(inode->i_mode)) { | 220 | if (S_ISDIR(inode->i_mode)) { |
228 | ret = btrfs_set_acl(trans, inode, acl, | 221 | ret = btrfs_set_acl(trans, inode, acl, |
229 | ACL_TYPE_DEFAULT); | 222 | ACL_TYPE_DEFAULT); |
230 | if (ret) | 223 | if (ret) |
231 | goto failed; | 224 | goto failed; |
232 | } | 225 | } |
233 | ret = posix_acl_create(&acl, GFP_NOFS, &mode); | 226 | ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); |
234 | if (ret < 0) | 227 | if (ret < 0) |
235 | return ret; | 228 | return ret; |
236 | 229 | ||
237 | inode->i_mode = mode; | ||
238 | if (ret > 0) { | 230 | if (ret > 0) { |
239 | /* we need an acl */ | 231 | /* we need an acl */ |
240 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); | 232 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); |
@@ -282,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = { | |||
282 | .get = btrfs_xattr_acl_get, | 274 | .get = btrfs_xattr_acl_get, |
283 | .set = btrfs_xattr_acl_set, | 275 | .set = btrfs_xattr_acl_set, |
284 | }; | 276 | }; |
285 | |||
286 | #else /* CONFIG_BTRFS_FS_POSIX_ACL */ | ||
287 | |||
288 | int btrfs_acl_chmod(struct inode *inode) | ||
289 | { | ||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | int btrfs_init_acl(struct btrfs_trans_handle *trans, | ||
294 | struct inode *inode, struct inode *dir) | ||
295 | { | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | #endif /* CONFIG_BTRFS_FS_POSIX_ACL */ | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 52d7eca8c7bf..d9f99a16edd6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -34,6 +34,9 @@ struct btrfs_inode { | |||
34 | */ | 34 | */ |
35 | struct btrfs_key location; | 35 | struct btrfs_key location; |
36 | 36 | ||
37 | /* Lock for counters */ | ||
38 | spinlock_t lock; | ||
39 | |||
37 | /* the extent_tree has caches of all the extent mappings to disk */ | 40 | /* the extent_tree has caches of all the extent mappings to disk */ |
38 | struct extent_map_tree extent_tree; | 41 | struct extent_map_tree extent_tree; |
39 | 42 | ||
@@ -134,8 +137,8 @@ struct btrfs_inode { | |||
134 | * items we think we'll end up using, and reserved_extents is the number | 137 | * items we think we'll end up using, and reserved_extents is the number |
135 | * of extent items we've reserved metadata for. | 138 | * of extent items we've reserved metadata for. |
136 | */ | 139 | */ |
137 | atomic_t outstanding_extents; | 140 | unsigned outstanding_extents; |
138 | atomic_t reserved_extents; | 141 | unsigned reserved_extents; |
139 | 142 | ||
140 | /* | 143 | /* |
141 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -173,7 +176,11 @@ static inline u64 btrfs_ino(struct inode *inode) | |||
173 | { | 176 | { |
174 | u64 ino = BTRFS_I(inode)->location.objectid; | 177 | u64 ino = BTRFS_I(inode)->location.objectid; |
175 | 178 | ||
176 | if (ino <= BTRFS_FIRST_FREE_OBJECTID) | 179 | /* |
180 | * !ino: btree_inode | ||
181 | * type == BTRFS_ROOT_ITEM_KEY: subvol dir | ||
182 | */ | ||
183 | if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY) | ||
177 | ino = inode->i_ino; | 184 | ino = inode->i_ino; |
178 | return ino; | 185 | return ino; |
179 | } | 186 | } |
@@ -184,4 +191,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) | |||
184 | BTRFS_I(inode)->disk_i_size = size; | 191 | BTRFS_I(inode)->disk_i_size = size; |
185 | } | 192 | } |
186 | 193 | ||
194 | static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | ||
195 | struct inode *inode) | ||
196 | { | ||
197 | if (root == root->fs_info->tree_root || | ||
198 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
199 | return true; | ||
200 | return false; | ||
201 | } | ||
202 | |||
187 | #endif | 203 | #endif |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bfe42b03eaf9..8ec5d86f1734 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
338 | u64 first_byte = disk_start; | 338 | u64 first_byte = disk_start; |
339 | struct block_device *bdev; | 339 | struct block_device *bdev; |
340 | int ret; | 340 | int ret; |
341 | int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
341 | 342 | ||
342 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); | 343 | WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); |
343 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 344 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
392 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 393 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
393 | BUG_ON(ret); | 394 | BUG_ON(ret); |
394 | 395 | ||
395 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); | 396 | if (!skip_sum) { |
396 | BUG_ON(ret); | 397 | ret = btrfs_csum_one_bio(root, inode, bio, |
398 | start, 1); | ||
399 | BUG_ON(ret); | ||
400 | } | ||
397 | 401 | ||
398 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | 402 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
399 | BUG_ON(ret); | 403 | BUG_ON(ret); |
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
418 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 422 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
419 | BUG_ON(ret); | 423 | BUG_ON(ret); |
420 | 424 | ||
421 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); | 425 | if (!skip_sum) { |
422 | BUG_ON(ret); | 426 | ret = btrfs_csum_one_bio(root, inode, bio, start, 1); |
427 | BUG_ON(ret); | ||
428 | } | ||
423 | 429 | ||
424 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); | 430 | ret = btrfs_map_bio(root, WRITE, bio, 0, 1); |
425 | BUG_ON(ret); | 431 | BUG_ON(ret); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2e667868e0d2..011cab3aca8d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | 56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
57 | if (p->nodes[i] && p->locks[i]) | 57 | if (!p->nodes[i] || !p->locks[i]) |
58 | btrfs_set_lock_blocking(p->nodes[i]); | 58 | continue; |
59 | btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); | ||
60 | if (p->locks[i] == BTRFS_READ_LOCK) | ||
61 | p->locks[i] = BTRFS_READ_LOCK_BLOCKING; | ||
62 | else if (p->locks[i] == BTRFS_WRITE_LOCK) | ||
63 | p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; | ||
59 | } | 64 | } |
60 | } | 65 | } |
61 | 66 | ||
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
68 | * for held | 73 | * for held |
69 | */ | 74 | */ |
70 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | 75 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, |
71 | struct extent_buffer *held) | 76 | struct extent_buffer *held, int held_rw) |
72 | { | 77 | { |
73 | int i; | 78 | int i; |
74 | 79 | ||
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
79 | * really sure by forcing the path to blocking before we clear | 84 | * really sure by forcing the path to blocking before we clear |
80 | * the path blocking. | 85 | * the path blocking. |
81 | */ | 86 | */ |
82 | if (held) | 87 | if (held) { |
83 | btrfs_set_lock_blocking(held); | 88 | btrfs_set_lock_blocking_rw(held, held_rw); |
89 | if (held_rw == BTRFS_WRITE_LOCK) | ||
90 | held_rw = BTRFS_WRITE_LOCK_BLOCKING; | ||
91 | else if (held_rw == BTRFS_READ_LOCK) | ||
92 | held_rw = BTRFS_READ_LOCK_BLOCKING; | ||
93 | } | ||
84 | btrfs_set_path_blocking(p); | 94 | btrfs_set_path_blocking(p); |
85 | #endif | 95 | #endif |
86 | 96 | ||
87 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | 97 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { |
88 | if (p->nodes[i] && p->locks[i]) | 98 | if (p->nodes[i] && p->locks[i]) { |
89 | btrfs_clear_lock_blocking(p->nodes[i]); | 99 | btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); |
100 | if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) | ||
101 | p->locks[i] = BTRFS_WRITE_LOCK; | ||
102 | else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) | ||
103 | p->locks[i] = BTRFS_READ_LOCK; | ||
104 | } | ||
90 | } | 105 | } |
91 | 106 | ||
92 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 107 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
93 | if (held) | 108 | if (held) |
94 | btrfs_clear_lock_blocking(held); | 109 | btrfs_clear_lock_blocking_rw(held, held_rw); |
95 | #endif | 110 | #endif |
96 | } | 111 | } |
97 | 112 | ||
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p) | |||
119 | if (!p->nodes[i]) | 134 | if (!p->nodes[i]) |
120 | continue; | 135 | continue; |
121 | if (p->locks[i]) { | 136 | if (p->locks[i]) { |
122 | btrfs_tree_unlock(p->nodes[i]); | 137 | btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); |
123 | p->locks[i] = 0; | 138 | p->locks[i] = 0; |
124 | } | 139 | } |
125 | free_extent_buffer(p->nodes[i]); | 140 | free_extent_buffer(p->nodes[i]); |
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
167 | return eb; | 182 | return eb; |
168 | } | 183 | } |
169 | 184 | ||
185 | /* loop around taking references on and locking the root node of the | ||
186 | * tree until you end up with a lock on the root. A locked buffer | ||
187 | * is returned, with a reference held. | ||
188 | */ | ||
189 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | ||
190 | { | ||
191 | struct extent_buffer *eb; | ||
192 | |||
193 | while (1) { | ||
194 | eb = btrfs_root_node(root); | ||
195 | btrfs_tree_read_lock(eb); | ||
196 | if (eb == root->node) | ||
197 | break; | ||
198 | btrfs_tree_read_unlock(eb); | ||
199 | free_extent_buffer(eb); | ||
200 | } | ||
201 | return eb; | ||
202 | } | ||
203 | |||
170 | /* cowonly root (everything not a reference counted cow subvolume), just get | 204 | /* cowonly root (everything not a reference counted cow subvolume), just get |
171 | * put onto a simple dirty list. transaction.c walks this to make sure they | 205 | * put onto a simple dirty list. transaction.c walks this to make sure they |
172 | * get properly updated on disk. | 206 | * get properly updated on disk. |
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
626 | for (i = start_slot; i < end_slot; i++) { | 660 | for (i = start_slot; i < end_slot; i++) { |
627 | int close = 1; | 661 | int close = 1; |
628 | 662 | ||
629 | if (!parent->map_token) { | ||
630 | map_extent_buffer(parent, | ||
631 | btrfs_node_key_ptr_offset(i), | ||
632 | sizeof(struct btrfs_key_ptr), | ||
633 | &parent->map_token, &parent->kaddr, | ||
634 | &parent->map_start, &parent->map_len, | ||
635 | KM_USER1); | ||
636 | } | ||
637 | btrfs_node_key(parent, &disk_key, i); | 663 | btrfs_node_key(parent, &disk_key, i); |
638 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) | 664 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) |
639 | continue; | 665 | continue; |
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
656 | last_block = blocknr; | 682 | last_block = blocknr; |
657 | continue; | 683 | continue; |
658 | } | 684 | } |
659 | if (parent->map_token) { | ||
660 | unmap_extent_buffer(parent, parent->map_token, | ||
661 | KM_USER1); | ||
662 | parent->map_token = NULL; | ||
663 | } | ||
664 | 685 | ||
665 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | 686 | cur = btrfs_find_tree_block(root, blocknr, blocksize); |
666 | if (cur) | 687 | if (cur) |
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
701 | btrfs_tree_unlock(cur); | 722 | btrfs_tree_unlock(cur); |
702 | free_extent_buffer(cur); | 723 | free_extent_buffer(cur); |
703 | } | 724 | } |
704 | if (parent->map_token) { | ||
705 | unmap_extent_buffer(parent, parent->map_token, | ||
706 | KM_USER1); | ||
707 | parent->map_token = NULL; | ||
708 | } | ||
709 | return err; | 725 | return err; |
710 | } | 726 | } |
711 | 727 | ||
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
746 | struct btrfs_disk_key *tmp = NULL; | 762 | struct btrfs_disk_key *tmp = NULL; |
747 | struct btrfs_disk_key unaligned; | 763 | struct btrfs_disk_key unaligned; |
748 | unsigned long offset; | 764 | unsigned long offset; |
749 | char *map_token = NULL; | ||
750 | char *kaddr = NULL; | 765 | char *kaddr = NULL; |
751 | unsigned long map_start = 0; | 766 | unsigned long map_start = 0; |
752 | unsigned long map_len = 0; | 767 | unsigned long map_len = 0; |
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
756 | mid = (low + high) / 2; | 771 | mid = (low + high) / 2; |
757 | offset = p + mid * item_size; | 772 | offset = p + mid * item_size; |
758 | 773 | ||
759 | if (!map_token || offset < map_start || | 774 | if (!kaddr || offset < map_start || |
760 | (offset + sizeof(struct btrfs_disk_key)) > | 775 | (offset + sizeof(struct btrfs_disk_key)) > |
761 | map_start + map_len) { | 776 | map_start + map_len) { |
762 | if (map_token) { | ||
763 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
764 | map_token = NULL; | ||
765 | } | ||
766 | 777 | ||
767 | err = map_private_extent_buffer(eb, offset, | 778 | err = map_private_extent_buffer(eb, offset, |
768 | sizeof(struct btrfs_disk_key), | 779 | sizeof(struct btrfs_disk_key), |
769 | &map_token, &kaddr, | 780 | &kaddr, &map_start, &map_len); |
770 | &map_start, &map_len, KM_USER0); | ||
771 | 781 | ||
772 | if (!err) { | 782 | if (!err) { |
773 | tmp = (struct btrfs_disk_key *)(kaddr + offset - | 783 | tmp = (struct btrfs_disk_key *)(kaddr + offset - |
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
790 | high = mid; | 800 | high = mid; |
791 | else { | 801 | else { |
792 | *slot = mid; | 802 | *slot = mid; |
793 | if (map_token) | ||
794 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
795 | return 0; | 803 | return 0; |
796 | } | 804 | } |
797 | } | 805 | } |
798 | *slot = low; | 806 | *slot = low; |
799 | if (map_token) | ||
800 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
801 | return 1; | 807 | return 1; |
802 | } | 808 | } |
803 | 809 | ||
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
890 | 896 | ||
891 | mid = path->nodes[level]; | 897 | mid = path->nodes[level]; |
892 | 898 | ||
893 | WARN_ON(!path->locks[level]); | 899 | WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && |
900 | path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); | ||
894 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 901 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
895 | 902 | ||
896 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | 903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); |
@@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1228 | u32 nr; | 1235 | u32 nr; |
1229 | u32 blocksize; | 1236 | u32 blocksize; |
1230 | u32 nscan = 0; | 1237 | u32 nscan = 0; |
1231 | bool map = true; | ||
1232 | 1238 | ||
1233 | if (level != 1) | 1239 | if (level != 1) |
1234 | return; | 1240 | return; |
@@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
1250 | 1256 | ||
1251 | nritems = btrfs_header_nritems(node); | 1257 | nritems = btrfs_header_nritems(node); |
1252 | nr = slot; | 1258 | nr = slot; |
1253 | if (node->map_token || path->skip_locking) | ||
1254 | map = false; | ||
1255 | 1259 | ||
1256 | while (1) { | 1260 | while (1) { |
1257 | if (map && !node->map_token) { | ||
1258 | unsigned long offset = btrfs_node_key_ptr_offset(nr); | ||
1259 | map_private_extent_buffer(node, offset, | ||
1260 | sizeof(struct btrfs_key_ptr), | ||
1261 | &node->map_token, | ||
1262 | &node->kaddr, | ||
1263 | &node->map_start, | ||
1264 | &node->map_len, KM_USER1); | ||
1265 | } | ||
1266 | if (direction < 0) { | 1261 | if (direction < 0) { |
1267 | if (nr == 0) | 1262 | if (nr == 0) |
1268 | break; | 1263 | break; |
@@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1281 | if ((search <= target && target - search <= 65536) || | 1276 | if ((search <= target && target - search <= 65536) || |
1282 | (search > target && search - target <= 65536)) { | 1277 | (search > target && search - target <= 65536)) { |
1283 | gen = btrfs_node_ptr_generation(node, nr); | 1278 | gen = btrfs_node_ptr_generation(node, nr); |
1284 | if (map && node->map_token) { | ||
1285 | unmap_extent_buffer(node, node->map_token, | ||
1286 | KM_USER1); | ||
1287 | node->map_token = NULL; | ||
1288 | } | ||
1289 | readahead_tree_block(root, search, blocksize, gen); | 1279 | readahead_tree_block(root, search, blocksize, gen); |
1290 | nread += blocksize; | 1280 | nread += blocksize; |
1291 | } | 1281 | } |
@@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1293 | if ((nread > 65536 || nscan > 32)) | 1283 | if ((nread > 65536 || nscan > 32)) |
1294 | break; | 1284 | break; |
1295 | } | 1285 | } |
1296 | if (map && node->map_token) { | ||
1297 | unmap_extent_buffer(node, node->map_token, KM_USER1); | ||
1298 | node->map_token = NULL; | ||
1299 | } | ||
1300 | } | 1286 | } |
1301 | 1287 | ||
1302 | /* | 1288 | /* |
@@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
1409 | 1395 | ||
1410 | t = path->nodes[i]; | 1396 | t = path->nodes[i]; |
1411 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { | 1397 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { |
1412 | btrfs_tree_unlock(t); | 1398 | btrfs_tree_unlock_rw(t, path->locks[i]); |
1413 | path->locks[i] = 0; | 1399 | path->locks[i] = 0; |
1414 | } | 1400 | } |
1415 | } | 1401 | } |
@@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1436 | continue; | 1422 | continue; |
1437 | if (!path->locks[i]) | 1423 | if (!path->locks[i]) |
1438 | continue; | 1424 | continue; |
1439 | btrfs_tree_unlock(path->nodes[i]); | 1425 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); |
1440 | path->locks[i] = 0; | 1426 | path->locks[i] = 0; |
1441 | } | 1427 | } |
1442 | } | 1428 | } |
@@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1485 | * we can trust our generation number | 1471 | * we can trust our generation number |
1486 | */ | 1472 | */ |
1487 | free_extent_buffer(tmp); | 1473 | free_extent_buffer(tmp); |
1474 | btrfs_set_path_blocking(p); | ||
1475 | |||
1488 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1476 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1489 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 1477 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { |
1490 | *eb_ret = tmp; | 1478 | *eb_ret = tmp; |
@@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1540 | static int | 1528 | static int |
1541 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | 1529 | setup_nodes_for_search(struct btrfs_trans_handle *trans, |
1542 | struct btrfs_root *root, struct btrfs_path *p, | 1530 | struct btrfs_root *root, struct btrfs_path *p, |
1543 | struct extent_buffer *b, int level, int ins_len) | 1531 | struct extent_buffer *b, int level, int ins_len, |
1532 | int *write_lock_level) | ||
1544 | { | 1533 | { |
1545 | int ret; | 1534 | int ret; |
1546 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | 1535 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= |
1547 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1536 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { |
1548 | int sret; | 1537 | int sret; |
1549 | 1538 | ||
1539 | if (*write_lock_level < level + 1) { | ||
1540 | *write_lock_level = level + 1; | ||
1541 | btrfs_release_path(p); | ||
1542 | goto again; | ||
1543 | } | ||
1544 | |||
1550 | sret = reada_for_balance(root, p, level); | 1545 | sret = reada_for_balance(root, p, level); |
1551 | if (sret) | 1546 | if (sret) |
1552 | goto again; | 1547 | goto again; |
1553 | 1548 | ||
1554 | btrfs_set_path_blocking(p); | 1549 | btrfs_set_path_blocking(p); |
1555 | sret = split_node(trans, root, p, level); | 1550 | sret = split_node(trans, root, p, level); |
1556 | btrfs_clear_path_blocking(p, NULL); | 1551 | btrfs_clear_path_blocking(p, NULL, 0); |
1557 | 1552 | ||
1558 | BUG_ON(sret > 0); | 1553 | BUG_ON(sret > 0); |
1559 | if (sret) { | 1554 | if (sret) { |
@@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
1565 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { | 1560 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { |
1566 | int sret; | 1561 | int sret; |
1567 | 1562 | ||
1563 | if (*write_lock_level < level + 1) { | ||
1564 | *write_lock_level = level + 1; | ||
1565 | btrfs_release_path(p); | ||
1566 | goto again; | ||
1567 | } | ||
1568 | |||
1568 | sret = reada_for_balance(root, p, level); | 1569 | sret = reada_for_balance(root, p, level); |
1569 | if (sret) | 1570 | if (sret) |
1570 | goto again; | 1571 | goto again; |
1571 | 1572 | ||
1572 | btrfs_set_path_blocking(p); | 1573 | btrfs_set_path_blocking(p); |
1573 | sret = balance_level(trans, root, p, level); | 1574 | sret = balance_level(trans, root, p, level); |
1574 | btrfs_clear_path_blocking(p, NULL); | 1575 | btrfs_clear_path_blocking(p, NULL, 0); |
1575 | 1576 | ||
1576 | if (sret) { | 1577 | if (sret) { |
1577 | ret = sret; | 1578 | ret = sret; |
@@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1615 | int err; | 1616 | int err; |
1616 | int level; | 1617 | int level; |
1617 | int lowest_unlock = 1; | 1618 | int lowest_unlock = 1; |
1619 | int root_lock; | ||
1620 | /* everything at write_lock_level or lower must be write locked */ | ||
1621 | int write_lock_level = 0; | ||
1618 | u8 lowest_level = 0; | 1622 | u8 lowest_level = 0; |
1619 | 1623 | ||
1620 | lowest_level = p->lowest_level; | 1624 | lowest_level = p->lowest_level; |
1621 | WARN_ON(lowest_level && ins_len > 0); | 1625 | WARN_ON(lowest_level && ins_len > 0); |
1622 | WARN_ON(p->nodes[0] != NULL); | 1626 | WARN_ON(p->nodes[0] != NULL); |
1623 | 1627 | ||
1624 | if (ins_len < 0) | 1628 | if (ins_len < 0) { |
1625 | lowest_unlock = 2; | 1629 | lowest_unlock = 2; |
1626 | 1630 | ||
1631 | /* when we are removing items, we might have to go up to level | ||
1632 | * two as we update tree pointers Make sure we keep write | ||
1633 | * for those levels as well | ||
1634 | */ | ||
1635 | write_lock_level = 2; | ||
1636 | } else if (ins_len > 0) { | ||
1637 | /* | ||
1638 | * for inserting items, make sure we have a write lock on | ||
1639 | * level 1 so we can update keys | ||
1640 | */ | ||
1641 | write_lock_level = 1; | ||
1642 | } | ||
1643 | |||
1644 | if (!cow) | ||
1645 | write_lock_level = -1; | ||
1646 | |||
1647 | if (cow && (p->keep_locks || p->lowest_level)) | ||
1648 | write_lock_level = BTRFS_MAX_LEVEL; | ||
1649 | |||
1627 | again: | 1650 | again: |
1651 | /* | ||
1652 | * we try very hard to do read locks on the root | ||
1653 | */ | ||
1654 | root_lock = BTRFS_READ_LOCK; | ||
1655 | level = 0; | ||
1628 | if (p->search_commit_root) { | 1656 | if (p->search_commit_root) { |
1657 | /* | ||
1658 | * the commit roots are read only | ||
1659 | * so we always do read locks | ||
1660 | */ | ||
1629 | b = root->commit_root; | 1661 | b = root->commit_root; |
1630 | extent_buffer_get(b); | 1662 | extent_buffer_get(b); |
1663 | level = btrfs_header_level(b); | ||
1631 | if (!p->skip_locking) | 1664 | if (!p->skip_locking) |
1632 | btrfs_tree_lock(b); | 1665 | btrfs_tree_read_lock(b); |
1633 | } else { | 1666 | } else { |
1634 | if (p->skip_locking) | 1667 | if (p->skip_locking) { |
1635 | b = btrfs_root_node(root); | 1668 | b = btrfs_root_node(root); |
1636 | else | 1669 | level = btrfs_header_level(b); |
1637 | b = btrfs_lock_root_node(root); | 1670 | } else { |
1671 | /* we don't know the level of the root node | ||
1672 | * until we actually have it read locked | ||
1673 | */ | ||
1674 | b = btrfs_read_lock_root_node(root); | ||
1675 | level = btrfs_header_level(b); | ||
1676 | if (level <= write_lock_level) { | ||
1677 | /* whoops, must trade for write lock */ | ||
1678 | btrfs_tree_read_unlock(b); | ||
1679 | free_extent_buffer(b); | ||
1680 | b = btrfs_lock_root_node(root); | ||
1681 | root_lock = BTRFS_WRITE_LOCK; | ||
1682 | |||
1683 | /* the level might have changed, check again */ | ||
1684 | level = btrfs_header_level(b); | ||
1685 | } | ||
1686 | } | ||
1638 | } | 1687 | } |
1688 | p->nodes[level] = b; | ||
1689 | if (!p->skip_locking) | ||
1690 | p->locks[level] = root_lock; | ||
1639 | 1691 | ||
1640 | while (b) { | 1692 | while (b) { |
1641 | level = btrfs_header_level(b); | 1693 | level = btrfs_header_level(b); |
@@ -1644,10 +1696,6 @@ again: | |||
1644 | * setup the path here so we can release it under lock | 1696 | * setup the path here so we can release it under lock |
1645 | * contention with the cow code | 1697 | * contention with the cow code |
1646 | */ | 1698 | */ |
1647 | p->nodes[level] = b; | ||
1648 | if (!p->skip_locking) | ||
1649 | p->locks[level] = 1; | ||
1650 | |||
1651 | if (cow) { | 1699 | if (cow) { |
1652 | /* | 1700 | /* |
1653 | * if we don't really need to cow this block | 1701 | * if we don't really need to cow this block |
@@ -1659,6 +1707,16 @@ again: | |||
1659 | 1707 | ||
1660 | btrfs_set_path_blocking(p); | 1708 | btrfs_set_path_blocking(p); |
1661 | 1709 | ||
1710 | /* | ||
1711 | * must have write locks on this node and the | ||
1712 | * parent | ||
1713 | */ | ||
1714 | if (level + 1 > write_lock_level) { | ||
1715 | write_lock_level = level + 1; | ||
1716 | btrfs_release_path(p); | ||
1717 | goto again; | ||
1718 | } | ||
1719 | |||
1662 | err = btrfs_cow_block(trans, root, b, | 1720 | err = btrfs_cow_block(trans, root, b, |
1663 | p->nodes[level + 1], | 1721 | p->nodes[level + 1], |
1664 | p->slots[level + 1], &b); | 1722 | p->slots[level + 1], &b); |
@@ -1671,10 +1729,7 @@ cow_done: | |||
1671 | BUG_ON(!cow && ins_len); | 1729 | BUG_ON(!cow && ins_len); |
1672 | 1730 | ||
1673 | p->nodes[level] = b; | 1731 | p->nodes[level] = b; |
1674 | if (!p->skip_locking) | 1732 | btrfs_clear_path_blocking(p, NULL, 0); |
1675 | p->locks[level] = 1; | ||
1676 | |||
1677 | btrfs_clear_path_blocking(p, NULL); | ||
1678 | 1733 | ||
1679 | /* | 1734 | /* |
1680 | * we have a lock on b and as long as we aren't changing | 1735 | * we have a lock on b and as long as we aren't changing |
@@ -1700,7 +1755,7 @@ cow_done: | |||
1700 | } | 1755 | } |
1701 | p->slots[level] = slot; | 1756 | p->slots[level] = slot; |
1702 | err = setup_nodes_for_search(trans, root, p, b, level, | 1757 | err = setup_nodes_for_search(trans, root, p, b, level, |
1703 | ins_len); | 1758 | ins_len, &write_lock_level); |
1704 | if (err == -EAGAIN) | 1759 | if (err == -EAGAIN) |
1705 | goto again; | 1760 | goto again; |
1706 | if (err) { | 1761 | if (err) { |
@@ -1710,6 +1765,19 @@ cow_done: | |||
1710 | b = p->nodes[level]; | 1765 | b = p->nodes[level]; |
1711 | slot = p->slots[level]; | 1766 | slot = p->slots[level]; |
1712 | 1767 | ||
1768 | /* | ||
1769 | * slot 0 is special, if we change the key | ||
1770 | * we have to update the parent pointer | ||
1771 | * which means we must have a write lock | ||
1772 | * on the parent | ||
1773 | */ | ||
1774 | if (slot == 0 && cow && | ||
1775 | write_lock_level < level + 1) { | ||
1776 | write_lock_level = level + 1; | ||
1777 | btrfs_release_path(p); | ||
1778 | goto again; | ||
1779 | } | ||
1780 | |||
1713 | unlock_up(p, level, lowest_unlock); | 1781 | unlock_up(p, level, lowest_unlock); |
1714 | 1782 | ||
1715 | if (level == lowest_level) { | 1783 | if (level == lowest_level) { |
@@ -1728,23 +1796,42 @@ cow_done: | |||
1728 | } | 1796 | } |
1729 | 1797 | ||
1730 | if (!p->skip_locking) { | 1798 | if (!p->skip_locking) { |
1731 | btrfs_clear_path_blocking(p, NULL); | 1799 | level = btrfs_header_level(b); |
1732 | err = btrfs_try_spin_lock(b); | 1800 | if (level <= write_lock_level) { |
1733 | 1801 | err = btrfs_try_tree_write_lock(b); | |
1734 | if (!err) { | 1802 | if (!err) { |
1735 | btrfs_set_path_blocking(p); | 1803 | btrfs_set_path_blocking(p); |
1736 | btrfs_tree_lock(b); | 1804 | btrfs_tree_lock(b); |
1737 | btrfs_clear_path_blocking(p, b); | 1805 | btrfs_clear_path_blocking(p, b, |
1806 | BTRFS_WRITE_LOCK); | ||
1807 | } | ||
1808 | p->locks[level] = BTRFS_WRITE_LOCK; | ||
1809 | } else { | ||
1810 | err = btrfs_try_tree_read_lock(b); | ||
1811 | if (!err) { | ||
1812 | btrfs_set_path_blocking(p); | ||
1813 | btrfs_tree_read_lock(b); | ||
1814 | btrfs_clear_path_blocking(p, b, | ||
1815 | BTRFS_READ_LOCK); | ||
1816 | } | ||
1817 | p->locks[level] = BTRFS_READ_LOCK; | ||
1738 | } | 1818 | } |
1819 | p->nodes[level] = b; | ||
1739 | } | 1820 | } |
1740 | } else { | 1821 | } else { |
1741 | p->slots[level] = slot; | 1822 | p->slots[level] = slot; |
1742 | if (ins_len > 0 && | 1823 | if (ins_len > 0 && |
1743 | btrfs_leaf_free_space(root, b) < ins_len) { | 1824 | btrfs_leaf_free_space(root, b) < ins_len) { |
1825 | if (write_lock_level < 1) { | ||
1826 | write_lock_level = 1; | ||
1827 | btrfs_release_path(p); | ||
1828 | goto again; | ||
1829 | } | ||
1830 | |||
1744 | btrfs_set_path_blocking(p); | 1831 | btrfs_set_path_blocking(p); |
1745 | err = split_leaf(trans, root, key, | 1832 | err = split_leaf(trans, root, key, |
1746 | p, ins_len, ret == 0); | 1833 | p, ins_len, ret == 0); |
1747 | btrfs_clear_path_blocking(p, NULL); | 1834 | btrfs_clear_path_blocking(p, NULL, 0); |
1748 | 1835 | ||
1749 | BUG_ON(err > 0); | 1836 | BUG_ON(err > 0); |
1750 | if (err) { | 1837 | if (err) { |
@@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2025 | add_root_to_dirty_list(root); | 2112 | add_root_to_dirty_list(root); |
2026 | extent_buffer_get(c); | 2113 | extent_buffer_get(c); |
2027 | path->nodes[level] = c; | 2114 | path->nodes[level] = c; |
2028 | path->locks[level] = 1; | 2115 | path->locks[level] = BTRFS_WRITE_LOCK; |
2029 | path->slots[level] = 0; | 2116 | path->slots[level] = 0; |
2030 | return 0; | 2117 | return 0; |
2031 | } | 2118 | } |
@@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2253 | if (path->slots[0] == i) | 2340 | if (path->slots[0] == i) |
2254 | push_space += data_size; | 2341 | push_space += data_size; |
2255 | 2342 | ||
2256 | if (!left->map_token) { | ||
2257 | map_extent_buffer(left, (unsigned long)item, | ||
2258 | sizeof(struct btrfs_item), | ||
2259 | &left->map_token, &left->kaddr, | ||
2260 | &left->map_start, &left->map_len, | ||
2261 | KM_USER1); | ||
2262 | } | ||
2263 | |||
2264 | this_item_size = btrfs_item_size(left, item); | 2343 | this_item_size = btrfs_item_size(left, item); |
2265 | if (this_item_size + sizeof(*item) + push_space > free_space) | 2344 | if (this_item_size + sizeof(*item) + push_space > free_space) |
2266 | break; | 2345 | break; |
@@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2271 | break; | 2350 | break; |
2272 | i--; | 2351 | i--; |
2273 | } | 2352 | } |
2274 | if (left->map_token) { | ||
2275 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2276 | left->map_token = NULL; | ||
2277 | } | ||
2278 | 2353 | ||
2279 | if (push_items == 0) | 2354 | if (push_items == 0) |
2280 | goto out_unlock; | 2355 | goto out_unlock; |
@@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2316 | push_space = BTRFS_LEAF_DATA_SIZE(root); | 2391 | push_space = BTRFS_LEAF_DATA_SIZE(root); |
2317 | for (i = 0; i < right_nritems; i++) { | 2392 | for (i = 0; i < right_nritems; i++) { |
2318 | item = btrfs_item_nr(right, i); | 2393 | item = btrfs_item_nr(right, i); |
2319 | if (!right->map_token) { | ||
2320 | map_extent_buffer(right, (unsigned long)item, | ||
2321 | sizeof(struct btrfs_item), | ||
2322 | &right->map_token, &right->kaddr, | ||
2323 | &right->map_start, &right->map_len, | ||
2324 | KM_USER1); | ||
2325 | } | ||
2326 | push_space -= btrfs_item_size(right, item); | 2394 | push_space -= btrfs_item_size(right, item); |
2327 | btrfs_set_item_offset(right, item, push_space); | 2395 | btrfs_set_item_offset(right, item, push_space); |
2328 | } | 2396 | } |
2329 | 2397 | ||
2330 | if (right->map_token) { | ||
2331 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2332 | right->map_token = NULL; | ||
2333 | } | ||
2334 | left_nritems -= push_items; | 2398 | left_nritems -= push_items; |
2335 | btrfs_set_header_nritems(left, left_nritems); | 2399 | btrfs_set_header_nritems(left, left_nritems); |
2336 | 2400 | ||
@@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2467 | 2531 | ||
2468 | for (i = 0; i < nr; i++) { | 2532 | for (i = 0; i < nr; i++) { |
2469 | item = btrfs_item_nr(right, i); | 2533 | item = btrfs_item_nr(right, i); |
2470 | if (!right->map_token) { | ||
2471 | map_extent_buffer(right, (unsigned long)item, | ||
2472 | sizeof(struct btrfs_item), | ||
2473 | &right->map_token, &right->kaddr, | ||
2474 | &right->map_start, &right->map_len, | ||
2475 | KM_USER1); | ||
2476 | } | ||
2477 | 2534 | ||
2478 | if (!empty && push_items > 0) { | 2535 | if (!empty && push_items > 0) { |
2479 | if (path->slots[0] < i) | 2536 | if (path->slots[0] < i) |
@@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2496 | push_space += this_item_size + sizeof(*item); | 2553 | push_space += this_item_size + sizeof(*item); |
2497 | } | 2554 | } |
2498 | 2555 | ||
2499 | if (right->map_token) { | ||
2500 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2501 | right->map_token = NULL; | ||
2502 | } | ||
2503 | |||
2504 | if (push_items == 0) { | 2556 | if (push_items == 0) { |
2505 | ret = 1; | 2557 | ret = 1; |
2506 | goto out; | 2558 | goto out; |
@@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2530 | u32 ioff; | 2582 | u32 ioff; |
2531 | 2583 | ||
2532 | item = btrfs_item_nr(left, i); | 2584 | item = btrfs_item_nr(left, i); |
2533 | if (!left->map_token) { | ||
2534 | map_extent_buffer(left, (unsigned long)item, | ||
2535 | sizeof(struct btrfs_item), | ||
2536 | &left->map_token, &left->kaddr, | ||
2537 | &left->map_start, &left->map_len, | ||
2538 | KM_USER1); | ||
2539 | } | ||
2540 | 2585 | ||
2541 | ioff = btrfs_item_offset(left, item); | 2586 | ioff = btrfs_item_offset(left, item); |
2542 | btrfs_set_item_offset(left, item, | 2587 | btrfs_set_item_offset(left, item, |
2543 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); | 2588 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); |
2544 | } | 2589 | } |
2545 | btrfs_set_header_nritems(left, old_left_nritems + push_items); | 2590 | btrfs_set_header_nritems(left, old_left_nritems + push_items); |
2546 | if (left->map_token) { | ||
2547 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2548 | left->map_token = NULL; | ||
2549 | } | ||
2550 | 2591 | ||
2551 | /* fixup right node */ | 2592 | /* fixup right node */ |
2552 | if (push_items > right_nritems) { | 2593 | if (push_items > right_nritems) { |
@@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2574 | for (i = 0; i < right_nritems; i++) { | 2615 | for (i = 0; i < right_nritems; i++) { |
2575 | item = btrfs_item_nr(right, i); | 2616 | item = btrfs_item_nr(right, i); |
2576 | 2617 | ||
2577 | if (!right->map_token) { | ||
2578 | map_extent_buffer(right, (unsigned long)item, | ||
2579 | sizeof(struct btrfs_item), | ||
2580 | &right->map_token, &right->kaddr, | ||
2581 | &right->map_start, &right->map_len, | ||
2582 | KM_USER1); | ||
2583 | } | ||
2584 | |||
2585 | push_space = push_space - btrfs_item_size(right, item); | 2618 | push_space = push_space - btrfs_item_size(right, item); |
2586 | btrfs_set_item_offset(right, item, push_space); | 2619 | btrfs_set_item_offset(right, item, push_space); |
2587 | } | 2620 | } |
2588 | if (right->map_token) { | ||
2589 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2590 | right->map_token = NULL; | ||
2591 | } | ||
2592 | 2621 | ||
2593 | btrfs_mark_buffer_dirty(left); | 2622 | btrfs_mark_buffer_dirty(left); |
2594 | if (right_nritems) | 2623 | if (right_nritems) |
@@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
2729 | struct btrfs_item *item = btrfs_item_nr(right, i); | 2758 | struct btrfs_item *item = btrfs_item_nr(right, i); |
2730 | u32 ioff; | 2759 | u32 ioff; |
2731 | 2760 | ||
2732 | if (!right->map_token) { | ||
2733 | map_extent_buffer(right, (unsigned long)item, | ||
2734 | sizeof(struct btrfs_item), | ||
2735 | &right->map_token, &right->kaddr, | ||
2736 | &right->map_start, &right->map_len, | ||
2737 | KM_USER1); | ||
2738 | } | ||
2739 | |||
2740 | ioff = btrfs_item_offset(right, item); | 2761 | ioff = btrfs_item_offset(right, item); |
2741 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | 2762 | btrfs_set_item_offset(right, item, ioff + rt_data_off); |
2742 | } | 2763 | } |
2743 | 2764 | ||
2744 | if (right->map_token) { | ||
2745 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2746 | right->map_token = NULL; | ||
2747 | } | ||
2748 | |||
2749 | btrfs_set_header_nritems(l, mid); | 2765 | btrfs_set_header_nritems(l, mid); |
2750 | ret = 0; | 2766 | ret = 0; |
2751 | btrfs_item_key(right, &disk_key, 0); | 2767 | btrfs_item_key(right, &disk_key, 0); |
@@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3264 | u32 ioff; | 3280 | u32 ioff; |
3265 | item = btrfs_item_nr(leaf, i); | 3281 | item = btrfs_item_nr(leaf, i); |
3266 | 3282 | ||
3267 | if (!leaf->map_token) { | ||
3268 | map_extent_buffer(leaf, (unsigned long)item, | ||
3269 | sizeof(struct btrfs_item), | ||
3270 | &leaf->map_token, &leaf->kaddr, | ||
3271 | &leaf->map_start, &leaf->map_len, | ||
3272 | KM_USER1); | ||
3273 | } | ||
3274 | |||
3275 | ioff = btrfs_item_offset(leaf, item); | 3283 | ioff = btrfs_item_offset(leaf, item); |
3276 | btrfs_set_item_offset(leaf, item, ioff + size_diff); | 3284 | btrfs_set_item_offset(leaf, item, ioff + size_diff); |
3277 | } | 3285 | } |
3278 | 3286 | ||
3279 | if (leaf->map_token) { | ||
3280 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3281 | leaf->map_token = NULL; | ||
3282 | } | ||
3283 | |||
3284 | /* shift the data */ | 3287 | /* shift the data */ |
3285 | if (from_end) { | 3288 | if (from_end) { |
3286 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3289 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
@@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3377 | u32 ioff; | 3380 | u32 ioff; |
3378 | item = btrfs_item_nr(leaf, i); | 3381 | item = btrfs_item_nr(leaf, i); |
3379 | 3382 | ||
3380 | if (!leaf->map_token) { | ||
3381 | map_extent_buffer(leaf, (unsigned long)item, | ||
3382 | sizeof(struct btrfs_item), | ||
3383 | &leaf->map_token, &leaf->kaddr, | ||
3384 | &leaf->map_start, &leaf->map_len, | ||
3385 | KM_USER1); | ||
3386 | } | ||
3387 | ioff = btrfs_item_offset(leaf, item); | 3383 | ioff = btrfs_item_offset(leaf, item); |
3388 | btrfs_set_item_offset(leaf, item, ioff - data_size); | 3384 | btrfs_set_item_offset(leaf, item, ioff - data_size); |
3389 | } | 3385 | } |
3390 | 3386 | ||
3391 | if (leaf->map_token) { | ||
3392 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3393 | leaf->map_token = NULL; | ||
3394 | } | ||
3395 | |||
3396 | /* shift the data */ | 3387 | /* shift the data */ |
3397 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3388 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
3398 | data_end - data_size, btrfs_leaf_data(leaf) + | 3389 | data_end - data_size, btrfs_leaf_data(leaf) + |
@@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | |||
3494 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3485 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3495 | */ | 3486 | */ |
3496 | /* first correct the data pointers */ | 3487 | /* first correct the data pointers */ |
3497 | WARN_ON(leaf->map_token); | ||
3498 | for (i = slot; i < nritems; i++) { | 3488 | for (i = slot; i < nritems; i++) { |
3499 | u32 ioff; | 3489 | u32 ioff; |
3500 | 3490 | ||
3501 | item = btrfs_item_nr(leaf, i); | 3491 | item = btrfs_item_nr(leaf, i); |
3502 | if (!leaf->map_token) { | ||
3503 | map_extent_buffer(leaf, (unsigned long)item, | ||
3504 | sizeof(struct btrfs_item), | ||
3505 | &leaf->map_token, &leaf->kaddr, | ||
3506 | &leaf->map_start, &leaf->map_len, | ||
3507 | KM_USER1); | ||
3508 | } | ||
3509 | |||
3510 | ioff = btrfs_item_offset(leaf, item); | 3492 | ioff = btrfs_item_offset(leaf, item); |
3511 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3493 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3512 | } | 3494 | } |
3513 | if (leaf->map_token) { | ||
3514 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3515 | leaf->map_token = NULL; | ||
3516 | } | ||
3517 | |||
3518 | /* shift the items */ | 3495 | /* shift the items */ |
3519 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3496 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3520 | btrfs_item_nr_offset(slot), | 3497 | btrfs_item_nr_offset(slot), |
@@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans, | |||
3608 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3585 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3609 | */ | 3586 | */ |
3610 | /* first correct the data pointers */ | 3587 | /* first correct the data pointers */ |
3611 | WARN_ON(leaf->map_token); | ||
3612 | for (i = slot; i < nritems; i++) { | 3588 | for (i = slot; i < nritems; i++) { |
3613 | u32 ioff; | 3589 | u32 ioff; |
3614 | 3590 | ||
3615 | item = btrfs_item_nr(leaf, i); | 3591 | item = btrfs_item_nr(leaf, i); |
3616 | if (!leaf->map_token) { | ||
3617 | map_extent_buffer(leaf, (unsigned long)item, | ||
3618 | sizeof(struct btrfs_item), | ||
3619 | &leaf->map_token, &leaf->kaddr, | ||
3620 | &leaf->map_start, &leaf->map_len, | ||
3621 | KM_USER1); | ||
3622 | } | ||
3623 | |||
3624 | ioff = btrfs_item_offset(leaf, item); | 3592 | ioff = btrfs_item_offset(leaf, item); |
3625 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3593 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3626 | } | 3594 | } |
3627 | if (leaf->map_token) { | ||
3628 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3629 | leaf->map_token = NULL; | ||
3630 | } | ||
3631 | |||
3632 | /* shift the items */ | 3595 | /* shift the items */ |
3633 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3596 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3634 | btrfs_item_nr_offset(slot), | 3597 | btrfs_item_nr_offset(slot), |
@@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3840 | u32 ioff; | 3803 | u32 ioff; |
3841 | 3804 | ||
3842 | item = btrfs_item_nr(leaf, i); | 3805 | item = btrfs_item_nr(leaf, i); |
3843 | if (!leaf->map_token) { | ||
3844 | map_extent_buffer(leaf, (unsigned long)item, | ||
3845 | sizeof(struct btrfs_item), | ||
3846 | &leaf->map_token, &leaf->kaddr, | ||
3847 | &leaf->map_start, &leaf->map_len, | ||
3848 | KM_USER1); | ||
3849 | } | ||
3850 | ioff = btrfs_item_offset(leaf, item); | 3806 | ioff = btrfs_item_offset(leaf, item); |
3851 | btrfs_set_item_offset(leaf, item, ioff + dsize); | 3807 | btrfs_set_item_offset(leaf, item, ioff + dsize); |
3852 | } | 3808 | } |
3853 | 3809 | ||
3854 | if (leaf->map_token) { | ||
3855 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3856 | leaf->map_token = NULL; | ||
3857 | } | ||
3858 | |||
3859 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), | 3810 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), |
3860 | btrfs_item_nr_offset(slot + nr), | 3811 | btrfs_item_nr_offset(slot + nr), |
3861 | sizeof(struct btrfs_item) * | 3812 | sizeof(struct btrfs_item) * |
@@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
4004 | 3955 | ||
4005 | WARN_ON(!path->keep_locks); | 3956 | WARN_ON(!path->keep_locks); |
4006 | again: | 3957 | again: |
4007 | cur = btrfs_lock_root_node(root); | 3958 | cur = btrfs_read_lock_root_node(root); |
4008 | level = btrfs_header_level(cur); | 3959 | level = btrfs_header_level(cur); |
4009 | WARN_ON(path->nodes[level]); | 3960 | WARN_ON(path->nodes[level]); |
4010 | path->nodes[level] = cur; | 3961 | path->nodes[level] = cur; |
4011 | path->locks[level] = 1; | 3962 | path->locks[level] = BTRFS_READ_LOCK; |
4012 | 3963 | ||
4013 | if (btrfs_header_generation(cur) < min_trans) { | 3964 | if (btrfs_header_generation(cur) < min_trans) { |
4014 | ret = 1; | 3965 | ret = 1; |
@@ -4098,12 +4049,12 @@ find_next_key: | |||
4098 | cur = read_node_slot(root, cur, slot); | 4049 | cur = read_node_slot(root, cur, slot); |
4099 | BUG_ON(!cur); | 4050 | BUG_ON(!cur); |
4100 | 4051 | ||
4101 | btrfs_tree_lock(cur); | 4052 | btrfs_tree_read_lock(cur); |
4102 | 4053 | ||
4103 | path->locks[level - 1] = 1; | 4054 | path->locks[level - 1] = BTRFS_READ_LOCK; |
4104 | path->nodes[level - 1] = cur; | 4055 | path->nodes[level - 1] = cur; |
4105 | unlock_up(path, level, 1); | 4056 | unlock_up(path, level, 1); |
4106 | btrfs_clear_path_blocking(path, NULL); | 4057 | btrfs_clear_path_blocking(path, NULL, 0); |
4107 | } | 4058 | } |
4108 | out: | 4059 | out: |
4109 | if (ret == 0) | 4060 | if (ret == 0) |
@@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4218 | u32 nritems; | 4169 | u32 nritems; |
4219 | int ret; | 4170 | int ret; |
4220 | int old_spinning = path->leave_spinning; | 4171 | int old_spinning = path->leave_spinning; |
4221 | int force_blocking = 0; | 4172 | int next_rw_lock = 0; |
4222 | 4173 | ||
4223 | nritems = btrfs_header_nritems(path->nodes[0]); | 4174 | nritems = btrfs_header_nritems(path->nodes[0]); |
4224 | if (nritems == 0) | 4175 | if (nritems == 0) |
4225 | return 1; | 4176 | return 1; |
4226 | 4177 | ||
4227 | /* | ||
4228 | * we take the blocks in an order that upsets lockdep. Using | ||
4229 | * blocking mode is the only way around it. | ||
4230 | */ | ||
4231 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4232 | force_blocking = 1; | ||
4233 | #endif | ||
4234 | |||
4235 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4178 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); |
4236 | again: | 4179 | again: |
4237 | level = 1; | 4180 | level = 1; |
4238 | next = NULL; | 4181 | next = NULL; |
4182 | next_rw_lock = 0; | ||
4239 | btrfs_release_path(path); | 4183 | btrfs_release_path(path); |
4240 | 4184 | ||
4241 | path->keep_locks = 1; | 4185 | path->keep_locks = 1; |
4242 | 4186 | path->leave_spinning = 1; | |
4243 | if (!force_blocking) | ||
4244 | path->leave_spinning = 1; | ||
4245 | 4187 | ||
4246 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4188 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4247 | path->keep_locks = 0; | 4189 | path->keep_locks = 0; |
@@ -4281,11 +4223,12 @@ again: | |||
4281 | } | 4223 | } |
4282 | 4224 | ||
4283 | if (next) { | 4225 | if (next) { |
4284 | btrfs_tree_unlock(next); | 4226 | btrfs_tree_unlock_rw(next, next_rw_lock); |
4285 | free_extent_buffer(next); | 4227 | free_extent_buffer(next); |
4286 | } | 4228 | } |
4287 | 4229 | ||
4288 | next = c; | 4230 | next = c; |
4231 | next_rw_lock = path->locks[level]; | ||
4289 | ret = read_block_for_search(NULL, root, path, &next, level, | 4232 | ret = read_block_for_search(NULL, root, path, &next, level, |
4290 | slot, &key); | 4233 | slot, &key); |
4291 | if (ret == -EAGAIN) | 4234 | if (ret == -EAGAIN) |
@@ -4297,15 +4240,14 @@ again: | |||
4297 | } | 4240 | } |
4298 | 4241 | ||
4299 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4300 | ret = btrfs_try_spin_lock(next); | 4243 | ret = btrfs_try_tree_read_lock(next); |
4301 | if (!ret) { | 4244 | if (!ret) { |
4302 | btrfs_set_path_blocking(path); | 4245 | btrfs_set_path_blocking(path); |
4303 | btrfs_tree_lock(next); | 4246 | btrfs_tree_read_lock(next); |
4304 | if (!force_blocking) | 4247 | btrfs_clear_path_blocking(path, next, |
4305 | btrfs_clear_path_blocking(path, next); | 4248 | BTRFS_READ_LOCK); |
4306 | } | 4249 | } |
4307 | if (force_blocking) | 4250 | next_rw_lock = BTRFS_READ_LOCK; |
4308 | btrfs_set_lock_blocking(next); | ||
4309 | } | 4251 | } |
4310 | break; | 4252 | break; |
4311 | } | 4253 | } |
@@ -4314,14 +4256,13 @@ again: | |||
4314 | level--; | 4256 | level--; |
4315 | c = path->nodes[level]; | 4257 | c = path->nodes[level]; |
4316 | if (path->locks[level]) | 4258 | if (path->locks[level]) |
4317 | btrfs_tree_unlock(c); | 4259 | btrfs_tree_unlock_rw(c, path->locks[level]); |
4318 | 4260 | ||
4319 | free_extent_buffer(c); | 4261 | free_extent_buffer(c); |
4320 | path->nodes[level] = next; | 4262 | path->nodes[level] = next; |
4321 | path->slots[level] = 0; | 4263 | path->slots[level] = 0; |
4322 | if (!path->skip_locking) | 4264 | if (!path->skip_locking) |
4323 | path->locks[level] = 1; | 4265 | path->locks[level] = next_rw_lock; |
4324 | |||
4325 | if (!level) | 4266 | if (!level) |
4326 | break; | 4267 | break; |
4327 | 4268 | ||
@@ -4336,16 +4277,14 @@ again: | |||
4336 | } | 4277 | } |
4337 | 4278 | ||
4338 | if (!path->skip_locking) { | 4279 | if (!path->skip_locking) { |
4339 | btrfs_assert_tree_locked(path->nodes[level]); | 4280 | ret = btrfs_try_tree_read_lock(next); |
4340 | ret = btrfs_try_spin_lock(next); | ||
4341 | if (!ret) { | 4281 | if (!ret) { |
4342 | btrfs_set_path_blocking(path); | 4282 | btrfs_set_path_blocking(path); |
4343 | btrfs_tree_lock(next); | 4283 | btrfs_tree_read_lock(next); |
4344 | if (!force_blocking) | 4284 | btrfs_clear_path_blocking(path, next, |
4345 | btrfs_clear_path_blocking(path, next); | 4285 | BTRFS_READ_LOCK); |
4346 | } | 4286 | } |
4347 | if (force_blocking) | 4287 | next_rw_lock = BTRFS_READ_LOCK; |
4348 | btrfs_set_lock_blocking(next); | ||
4349 | } | 4288 | } |
4350 | } | 4289 | } |
4351 | ret = 0; | 4290 | ret = 0; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fe9287b06496..03912c5c6f49 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -755,6 +755,8 @@ struct btrfs_space_info { | |||
755 | chunks for this space */ | 755 | chunks for this space */ |
756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ | 756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ |
757 | 757 | ||
758 | unsigned int flush:1; /* set if we are trying to make space */ | ||
759 | |||
758 | unsigned int force_alloc; /* set if we need to force a chunk | 760 | unsigned int force_alloc; /* set if we need to force a chunk |
759 | alloc for this space */ | 761 | alloc for this space */ |
760 | 762 | ||
@@ -764,7 +766,7 @@ struct btrfs_space_info { | |||
764 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; | 766 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
765 | spinlock_t lock; | 767 | spinlock_t lock; |
766 | struct rw_semaphore groups_sem; | 768 | struct rw_semaphore groups_sem; |
767 | atomic_t caching_threads; | 769 | wait_queue_head_t wait; |
768 | }; | 770 | }; |
769 | 771 | ||
770 | struct btrfs_block_rsv { | 772 | struct btrfs_block_rsv { |
@@ -824,6 +826,7 @@ struct btrfs_caching_control { | |||
824 | struct list_head list; | 826 | struct list_head list; |
825 | struct mutex mutex; | 827 | struct mutex mutex; |
826 | wait_queue_head_t wait; | 828 | wait_queue_head_t wait; |
829 | struct btrfs_work work; | ||
827 | struct btrfs_block_group_cache *block_group; | 830 | struct btrfs_block_group_cache *block_group; |
828 | u64 progress; | 831 | u64 progress; |
829 | atomic_t count; | 832 | atomic_t count; |
@@ -1032,6 +1035,8 @@ struct btrfs_fs_info { | |||
1032 | struct btrfs_workers endio_write_workers; | 1035 | struct btrfs_workers endio_write_workers; |
1033 | struct btrfs_workers endio_freespace_worker; | 1036 | struct btrfs_workers endio_freespace_worker; |
1034 | struct btrfs_workers submit_workers; | 1037 | struct btrfs_workers submit_workers; |
1038 | struct btrfs_workers caching_workers; | ||
1039 | |||
1035 | /* | 1040 | /* |
1036 | * fixup workers take dirty pages that didn't properly go through | 1041 | * fixup workers take dirty pages that didn't properly go through |
1037 | * the cow mechanism and make them safe to write. It happens | 1042 | * the cow mechanism and make them safe to write. It happens |
@@ -1410,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); | |||
1410 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ | 1415 | #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ |
1411 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ | 1416 | static inline u##bits btrfs_##name(struct extent_buffer *eb) \ |
1412 | { \ | 1417 | { \ |
1413 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1418 | type *p = page_address(eb->first_page); \ |
1414 | u##bits res = le##bits##_to_cpu(p->member); \ | 1419 | u##bits res = le##bits##_to_cpu(p->member); \ |
1415 | kunmap_atomic(p, KM_USER0); \ | ||
1416 | return res; \ | 1420 | return res; \ |
1417 | } \ | 1421 | } \ |
1418 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ | 1422 | static inline void btrfs_set_##name(struct extent_buffer *eb, \ |
1419 | u##bits val) \ | 1423 | u##bits val) \ |
1420 | { \ | 1424 | { \ |
1421 | type *p = kmap_atomic(eb->first_page, KM_USER0); \ | 1425 | type *p = page_address(eb->first_page); \ |
1422 | p->member = cpu_to_le##bits(val); \ | 1426 | p->member = cpu_to_le##bits(val); \ |
1423 | kunmap_atomic(p, KM_USER0); \ | ||
1424 | } | 1427 | } |
1425 | 1428 | ||
1426 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ | 1429 | #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ |
@@ -2128,7 +2131,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | |||
2128 | 2131 | ||
2129 | /* extent-tree.c */ | 2132 | /* extent-tree.c */ |
2130 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 2133 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
2131 | int num_items) | 2134 | unsigned num_items) |
2132 | { | 2135 | { |
2133 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 2136 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * |
2134 | 3 * num_items; | 2137 | 3 * num_items; |
@@ -2222,9 +2225,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | |||
2222 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2225 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2223 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2226 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
2224 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 2227 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2225 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2226 | struct btrfs_root *root, | ||
2227 | int num_items); | ||
2228 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 2228 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2229 | struct btrfs_root *root); | 2229 | struct btrfs_root *root); |
2230 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | 2230 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
@@ -2330,7 +2330,7 @@ struct btrfs_path *btrfs_alloc_path(void); | |||
2330 | void btrfs_free_path(struct btrfs_path *p); | 2330 | void btrfs_free_path(struct btrfs_path *p); |
2331 | void btrfs_set_path_blocking(struct btrfs_path *p); | 2331 | void btrfs_set_path_blocking(struct btrfs_path *p); |
2332 | void btrfs_clear_path_blocking(struct btrfs_path *p, | 2332 | void btrfs_clear_path_blocking(struct btrfs_path *p, |
2333 | struct extent_buffer *held); | 2333 | struct extent_buffer *held, int held_rw); |
2334 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | 2334 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); |
2335 | 2335 | ||
2336 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2336 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
@@ -2365,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2365 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2365 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2366 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2366 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2367 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2367 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2368 | int btrfs_drop_snapshot(struct btrfs_root *root, | 2368 | void btrfs_drop_snapshot(struct btrfs_root *root, |
2369 | struct btrfs_block_rsv *block_rsv, int update_ref); | 2369 | struct btrfs_block_rsv *block_rsv, int update_ref); |
2370 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2370 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2371 | struct btrfs_root *root, | 2371 | struct btrfs_root *root, |
2372 | struct extent_buffer *node, | 2372 | struct extent_buffer *node, |
@@ -2404,8 +2404,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
2404 | btrfs_root_item *item, struct btrfs_key *key); | 2404 | btrfs_root_item *item, struct btrfs_key *key); |
2405 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2405 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
2406 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 2406 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
2407 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2407 | void btrfs_set_root_node(struct btrfs_root_item *item, |
2408 | struct extent_buffer *node); | 2408 | struct extent_buffer *node); |
2409 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); | 2409 | void btrfs_check_and_init_root_item(struct btrfs_root_item *item); |
2410 | 2410 | ||
2411 | /* dir-item.c */ | 2411 | /* dir-item.c */ |
@@ -2521,6 +2521,14 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag | |||
2521 | #define PageChecked PageFsMisc | 2521 | #define PageChecked PageFsMisc |
2522 | #endif | 2522 | #endif |
2523 | 2523 | ||
2524 | /* This forces readahead on a given range of bytes in an inode */ | ||
2525 | static inline void btrfs_force_ra(struct address_space *mapping, | ||
2526 | struct file_ra_state *ra, struct file *file, | ||
2527 | pgoff_t offset, unsigned long req_size) | ||
2528 | { | ||
2529 | page_cache_sync_readahead(mapping, ra, file, offset, req_size); | ||
2530 | } | ||
2531 | |||
2524 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); | 2532 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); |
2525 | int btrfs_set_inode_index(struct inode *dir, u64 *index); | 2533 | int btrfs_set_inode_index(struct inode *dir, u64 *index); |
2526 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | 2534 | int btrfs_unlink_inode(struct btrfs_trans_handle *trans, |
@@ -2549,9 +2557,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
2549 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2557 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2550 | size_t size, struct bio *bio, unsigned long bio_flags); | 2558 | size_t size, struct bio *bio, unsigned long bio_flags); |
2551 | 2559 | ||
2552 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
2553 | struct file_ra_state *ra, struct file *file, | ||
2554 | pgoff_t offset, pgoff_t last_index); | ||
2555 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2560 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2556 | int btrfs_readpage(struct file *file, struct page *page); | 2561 | int btrfs_readpage(struct file *file, struct page *page); |
2557 | void btrfs_evict_inode(struct inode *inode); | 2562 | void btrfs_evict_inode(struct inode *inode); |
@@ -2646,12 +2651,21 @@ do { \ | |||
2646 | /* acl.c */ | 2651 | /* acl.c */ |
2647 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2652 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
2648 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type); | 2653 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type); |
2649 | #else | ||
2650 | #define btrfs_get_acl NULL | ||
2651 | #endif | ||
2652 | int btrfs_init_acl(struct btrfs_trans_handle *trans, | 2654 | int btrfs_init_acl(struct btrfs_trans_handle *trans, |
2653 | struct inode *inode, struct inode *dir); | 2655 | struct inode *inode, struct inode *dir); |
2654 | int btrfs_acl_chmod(struct inode *inode); | 2656 | int btrfs_acl_chmod(struct inode *inode); |
2657 | #else | ||
2658 | #define btrfs_get_acl NULL | ||
2659 | static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, | ||
2660 | struct inode *inode, struct inode *dir) | ||
2661 | { | ||
2662 | return 0; | ||
2663 | } | ||
2664 | static inline int btrfs_acl_chmod(struct inode *inode) | ||
2665 | { | ||
2666 | return 0; | ||
2667 | } | ||
2668 | #endif | ||
2655 | 2669 | ||
2656 | /* relocation.c */ | 2670 | /* relocation.c */ |
2657 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); | 2671 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 98c68e658a9b..b52c672f4c18 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, | |||
735 | } | 735 | } |
736 | 736 | ||
737 | /* reset all the locked nodes in the patch to spinning locks. */ | 737 | /* reset all the locked nodes in the patch to spinning locks. */ |
738 | btrfs_clear_path_blocking(path, NULL); | 738 | btrfs_clear_path_blocking(path, NULL, 0); |
739 | 739 | ||
740 | /* insert the keys of the items */ | 740 | /* insert the keys of the items */ |
741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, | 741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 8d27af4bd8b9..7083d08b2a21 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
26 | #include <linux/list.h> | 26 | #include <linux/list.h> |
27 | #include <linux/wait.h> | 27 | #include <linux/wait.h> |
28 | #include <asm/atomic.h> | 28 | #include <linux/atomic.h> |
29 | 29 | ||
30 | #include "ctree.h" | 30 | #include "ctree.h" |
31 | 31 | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 685f2593c4f0..31d84e78129b 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
89 | data_size = sizeof(*dir_item) + name_len + data_len; | 89 | data_size = sizeof(*dir_item) + name_len + data_len; |
90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
91 | name, name_len); | 91 | name, name_len); |
92 | /* | 92 | if (IS_ERR(dir_item)) |
93 | * FIXME: at some point we should handle xattr's that are larger than | 93 | return PTR_ERR(dir_item); |
94 | * what we can fit in our leaf. We set location to NULL b/c we arent | ||
95 | * pointing at anything else, that will change if we store the xattr | ||
96 | * data in a separate inode. | ||
97 | */ | ||
98 | BUG_ON(IS_ERR(dir_item)); | ||
99 | memset(&location, 0, sizeof(location)); | 94 | memset(&location, 0, sizeof(location)); |
100 | 95 | ||
101 | leaf = path->nodes[0]; | 96 | leaf = path->nodes[0]; |
@@ -203,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
203 | struct btrfs_key key; | 198 | struct btrfs_key key; |
204 | int ins_len = mod < 0 ? -1 : 0; | 199 | int ins_len = mod < 0 ? -1 : 0; |
205 | int cow = mod != 0; | 200 | int cow = mod != 0; |
206 | struct btrfs_key found_key; | ||
207 | struct extent_buffer *leaf; | ||
208 | 201 | ||
209 | key.objectid = dir; | 202 | key.objectid = dir; |
210 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 203 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); |
@@ -214,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
214 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 207 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
215 | if (ret < 0) | 208 | if (ret < 0) |
216 | return ERR_PTR(ret); | 209 | return ERR_PTR(ret); |
217 | if (ret > 0) { | 210 | if (ret > 0) |
218 | if (path->slots[0] == 0) | ||
219 | return NULL; | ||
220 | path->slots[0]--; | ||
221 | } | ||
222 | |||
223 | leaf = path->nodes[0]; | ||
224 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
225 | |||
226 | if (found_key.objectid != dir || | ||
227 | btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || | ||
228 | found_key.offset != key.offset) | ||
229 | return NULL; | 211 | return NULL; |
230 | 212 | ||
231 | return btrfs_match_dir_item_name(root, path, name, name_len); | 213 | return btrfs_match_dir_item_name(root, path, name, name_len); |
@@ -320,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
320 | struct btrfs_key key; | 302 | struct btrfs_key key; |
321 | int ins_len = mod < 0 ? -1 : 0; | 303 | int ins_len = mod < 0 ? -1 : 0; |
322 | int cow = mod != 0; | 304 | int cow = mod != 0; |
323 | struct btrfs_key found_key; | ||
324 | struct extent_buffer *leaf; | ||
325 | 305 | ||
326 | key.objectid = dir; | 306 | key.objectid = dir; |
327 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 307 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); |
@@ -329,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
329 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 309 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
330 | if (ret < 0) | 310 | if (ret < 0) |
331 | return ERR_PTR(ret); | 311 | return ERR_PTR(ret); |
332 | if (ret > 0) { | 312 | if (ret > 0) |
333 | if (path->slots[0] == 0) | ||
334 | return NULL; | ||
335 | path->slots[0]--; | ||
336 | } | ||
337 | |||
338 | leaf = path->nodes[0]; | ||
339 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | ||
340 | |||
341 | if (found_key.objectid != dir || | ||
342 | btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || | ||
343 | found_key.offset != key.offset) | ||
344 | return NULL; | 313 | return NULL; |
345 | 314 | ||
346 | return btrfs_match_dir_item_name(root, path, name, name_len); | 315 | return btrfs_match_dir_item_name(root, path, name, name_len); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b231ae13b269..07b3ac662e19 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -100,38 +100,83 @@ struct async_submit_bio { | |||
100 | struct btrfs_work work; | 100 | struct btrfs_work work; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | /* These are used to set the lockdep class on the extent buffer locks. | 103 | /* |
104 | * The class is set by the readpage_end_io_hook after the buffer has | 104 | * Lockdep class keys for extent_buffer->lock's in this root. For a given |
105 | * passed csum validation but before the pages are unlocked. | 105 | * eb, the lockdep key is determined by the btrfs_root it belongs to and |
106 | * the level the eb occupies in the tree. | ||
107 | * | ||
108 | * Different roots are used for different purposes and may nest inside each | ||
109 | * other and they require separate keysets. As lockdep keys should be | ||
110 | * static, assign keysets according to the purpose of the root as indicated | ||
111 | * by btrfs_root->objectid. This ensures that all special purpose roots | ||
112 | * have separate keysets. | ||
106 | * | 113 | * |
107 | * The lockdep class is also set by btrfs_init_new_buffer on freshly | 114 | * Lock-nesting across peer nodes is always done with the immediate parent |
108 | * allocated blocks. | 115 | * node locked thus preventing deadlock. As lockdep doesn't know this, use |
116 | * subclass to avoid triggering lockdep warning in such cases. | ||
109 | * | 117 | * |
110 | * The class is based on the level in the tree block, which allows lockdep | 118 | * The key is set by the readpage_end_io_hook after the buffer has passed |
111 | * to know that lower nodes nest inside the locks of higher nodes. | 119 | * csum validation but before the pages are unlocked. It is also set by |
120 | * btrfs_init_new_buffer on freshly allocated blocks. | ||
112 | * | 121 | * |
113 | * We also add a check to make sure the highest level of the tree is | 122 | * We also add a check to make sure the highest level of the tree is the |
114 | * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this | 123 | * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code |
115 | * code needs update as well. | 124 | * needs update as well. |
116 | */ | 125 | */ |
117 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 126 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
118 | # if BTRFS_MAX_LEVEL != 8 | 127 | # if BTRFS_MAX_LEVEL != 8 |
119 | # error | 128 | # error |
120 | # endif | 129 | # endif |
121 | static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; | 130 | |
122 | static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | 131 | static struct btrfs_lockdep_keyset { |
123 | /* leaf */ | 132 | u64 id; /* root objectid */ |
124 | "btrfs-extent-00", | 133 | const char *name_stem; /* lock name stem */ |
125 | "btrfs-extent-01", | 134 | char names[BTRFS_MAX_LEVEL + 1][20]; |
126 | "btrfs-extent-02", | 135 | struct lock_class_key keys[BTRFS_MAX_LEVEL + 1]; |
127 | "btrfs-extent-03", | 136 | } btrfs_lockdep_keysets[] = { |
128 | "btrfs-extent-04", | 137 | { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" }, |
129 | "btrfs-extent-05", | 138 | { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" }, |
130 | "btrfs-extent-06", | 139 | { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" }, |
131 | "btrfs-extent-07", | 140 | { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, |
132 | /* highest possible level */ | 141 | { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, |
133 | "btrfs-extent-08", | 142 | { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, |
143 | { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, | ||
144 | { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, | ||
145 | { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, | ||
146 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, | ||
147 | { .id = 0, .name_stem = "tree" }, | ||
134 | }; | 148 | }; |
149 | |||
150 | void __init btrfs_init_lockdep(void) | ||
151 | { | ||
152 | int i, j; | ||
153 | |||
154 | /* initialize lockdep class names */ | ||
155 | for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) { | ||
156 | struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i]; | ||
157 | |||
158 | for (j = 0; j < ARRAY_SIZE(ks->names); j++) | ||
159 | snprintf(ks->names[j], sizeof(ks->names[j]), | ||
160 | "btrfs-%s-%02d", ks->name_stem, j); | ||
161 | } | ||
162 | } | ||
163 | |||
164 | void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, | ||
165 | int level) | ||
166 | { | ||
167 | struct btrfs_lockdep_keyset *ks; | ||
168 | |||
169 | BUG_ON(level >= ARRAY_SIZE(ks->keys)); | ||
170 | |||
171 | /* find the matching keyset, id 0 is the default entry */ | ||
172 | for (ks = btrfs_lockdep_keysets; ks->id; ks++) | ||
173 | if (ks->id == objectid) | ||
174 | break; | ||
175 | |||
176 | lockdep_set_class_and_name(&eb->lock, | ||
177 | &ks->keys[level], ks->names[level]); | ||
178 | } | ||
179 | |||
135 | #endif | 180 | #endif |
136 | 181 | ||
137 | /* | 182 | /* |
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
217 | unsigned long len; | 262 | unsigned long len; |
218 | unsigned long cur_len; | 263 | unsigned long cur_len; |
219 | unsigned long offset = BTRFS_CSUM_SIZE; | 264 | unsigned long offset = BTRFS_CSUM_SIZE; |
220 | char *map_token = NULL; | ||
221 | char *kaddr; | 265 | char *kaddr; |
222 | unsigned long map_start; | 266 | unsigned long map_start; |
223 | unsigned long map_len; | 267 | unsigned long map_len; |
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
228 | len = buf->len - offset; | 272 | len = buf->len - offset; |
229 | while (len > 0) { | 273 | while (len > 0) { |
230 | err = map_private_extent_buffer(buf, offset, 32, | 274 | err = map_private_extent_buffer(buf, offset, 32, |
231 | &map_token, &kaddr, | 275 | &kaddr, &map_start, &map_len); |
232 | &map_start, &map_len, KM_USER0); | ||
233 | if (err) | 276 | if (err) |
234 | return 1; | 277 | return 1; |
235 | cur_len = min(len, map_len - (offset - map_start)); | 278 | cur_len = min(len, map_len - (offset - map_start)); |
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
237 | crc, cur_len); | 280 | crc, cur_len); |
238 | len -= cur_len; | 281 | len -= cur_len; |
239 | offset += cur_len; | 282 | offset += cur_len; |
240 | unmap_extent_buffer(buf, map_token, KM_USER0); | ||
241 | } | 283 | } |
242 | if (csum_size > sizeof(inline_result)) { | 284 | if (csum_size > sizeof(inline_result)) { |
243 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); | 285 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); |
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
494 | return 0; | 536 | return 0; |
495 | } | 537 | } |
496 | 538 | ||
497 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
498 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | ||
499 | { | ||
500 | lockdep_set_class_and_name(&eb->lock, | ||
501 | &btrfs_eb_class[level], | ||
502 | btrfs_eb_name[level]); | ||
503 | } | ||
504 | #endif | ||
505 | |||
506 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 539 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
507 | struct extent_state *state) | 540 | struct extent_state *state) |
508 | { | 541 | { |
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
553 | } | 586 | } |
554 | found_level = btrfs_header_level(eb); | 587 | found_level = btrfs_header_level(eb); |
555 | 588 | ||
556 | btrfs_set_buffer_lockdep_class(eb, found_level); | 589 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), |
590 | eb, found_level); | ||
557 | 591 | ||
558 | ret = csum_tree_block(root, eb, 1); | 592 | ret = csum_tree_block(root, eb, 1); |
559 | if (ret) { | 593 | if (ret) { |
@@ -1598,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1598 | goto fail_bdi; | 1632 | goto fail_bdi; |
1599 | } | 1633 | } |
1600 | 1634 | ||
1601 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | 1635 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
1602 | 1636 | ||
1603 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1637 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1604 | INIT_LIST_HEAD(&fs_info->trans_list); | 1638 | INIT_LIST_HEAD(&fs_info->trans_list); |
@@ -1802,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1802 | fs_info->thread_pool_size), | 1836 | fs_info->thread_pool_size), |
1803 | &fs_info->generic_worker); | 1837 | &fs_info->generic_worker); |
1804 | 1838 | ||
1839 | btrfs_init_workers(&fs_info->caching_workers, "cache", | ||
1840 | 2, &fs_info->generic_worker); | ||
1841 | |||
1805 | /* a higher idle thresh on the submit workers makes it much more | 1842 | /* a higher idle thresh on the submit workers makes it much more |
1806 | * likely that bios will be send down in a sane order to the | 1843 | * likely that bios will be send down in a sane order to the |
1807 | * devices | 1844 | * devices |
@@ -1855,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1855 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1892 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1856 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 1893 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
1857 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 1894 | btrfs_start_workers(&fs_info->delayed_workers, 1); |
1895 | btrfs_start_workers(&fs_info->caching_workers, 1); | ||
1858 | 1896 | ||
1859 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1897 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1860 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1898 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -2112,6 +2150,7 @@ fail_sb_buffer: | |||
2112 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2150 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2113 | btrfs_stop_workers(&fs_info->submit_workers); | 2151 | btrfs_stop_workers(&fs_info->submit_workers); |
2114 | btrfs_stop_workers(&fs_info->delayed_workers); | 2152 | btrfs_stop_workers(&fs_info->delayed_workers); |
2153 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2115 | fail_alloc: | 2154 | fail_alloc: |
2116 | kfree(fs_info->delayed_root); | 2155 | kfree(fs_info->delayed_root); |
2117 | fail_iput: | 2156 | fail_iput: |
@@ -2577,6 +2616,7 @@ int close_ctree(struct btrfs_root *root) | |||
2577 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2616 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2578 | btrfs_stop_workers(&fs_info->submit_workers); | 2617 | btrfs_stop_workers(&fs_info->submit_workers); |
2579 | btrfs_stop_workers(&fs_info->delayed_workers); | 2618 | btrfs_stop_workers(&fs_info->delayed_workers); |
2619 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2580 | 2620 | ||
2581 | btrfs_close_devices(fs_info->fs_devices); | 2621 | btrfs_close_devices(fs_info->fs_devices); |
2582 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2622 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0b610a67aae..bec3ea4bd67f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page); | |||
87 | 87 | ||
88 | 88 | ||
89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
90 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); | 90 | void btrfs_init_lockdep(void); |
91 | void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
92 | struct extent_buffer *eb, int level); | ||
91 | #else | 93 | #else |
92 | static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, | 94 | static inline void btrfs_init_lockdep(void) |
93 | int level) | 95 | { } |
96 | static inline void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
97 | struct extent_buffer *eb, int level) | ||
94 | { | 98 | { |
95 | } | 99 | } |
96 | #endif | 100 | #endif |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456fdb60..f5be06a2462f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
320 | return total_added; | 320 | return total_added; |
321 | } | 321 | } |
322 | 322 | ||
323 | static int caching_kthread(void *data) | 323 | static noinline void caching_thread(struct btrfs_work *work) |
324 | { | 324 | { |
325 | struct btrfs_block_group_cache *block_group = data; | 325 | struct btrfs_block_group_cache *block_group; |
326 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 326 | struct btrfs_fs_info *fs_info; |
327 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; | 327 | struct btrfs_caching_control *caching_ctl; |
328 | struct btrfs_root *extent_root = fs_info->extent_root; | 328 | struct btrfs_root *extent_root; |
329 | struct btrfs_path *path; | 329 | struct btrfs_path *path; |
330 | struct extent_buffer *leaf; | 330 | struct extent_buffer *leaf; |
331 | struct btrfs_key key; | 331 | struct btrfs_key key; |
@@ -334,9 +334,14 @@ static int caching_kthread(void *data) | |||
334 | u32 nritems; | 334 | u32 nritems; |
335 | int ret = 0; | 335 | int ret = 0; |
336 | 336 | ||
337 | caching_ctl = container_of(work, struct btrfs_caching_control, work); | ||
338 | block_group = caching_ctl->block_group; | ||
339 | fs_info = block_group->fs_info; | ||
340 | extent_root = fs_info->extent_root; | ||
341 | |||
337 | path = btrfs_alloc_path(); | 342 | path = btrfs_alloc_path(); |
338 | if (!path) | 343 | if (!path) |
339 | return -ENOMEM; | 344 | goto out; |
340 | 345 | ||
341 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 346 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
342 | 347 | ||
@@ -433,13 +438,11 @@ err: | |||
433 | free_excluded_extents(extent_root, block_group); | 438 | free_excluded_extents(extent_root, block_group); |
434 | 439 | ||
435 | mutex_unlock(&caching_ctl->mutex); | 440 | mutex_unlock(&caching_ctl->mutex); |
441 | out: | ||
436 | wake_up(&caching_ctl->wait); | 442 | wake_up(&caching_ctl->wait); |
437 | 443 | ||
438 | put_caching_control(caching_ctl); | 444 | put_caching_control(caching_ctl); |
439 | atomic_dec(&block_group->space_info->caching_threads); | ||
440 | btrfs_put_block_group(block_group); | 445 | btrfs_put_block_group(block_group); |
441 | |||
442 | return 0; | ||
443 | } | 446 | } |
444 | 447 | ||
445 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 448 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
449 | { | 452 | { |
450 | struct btrfs_fs_info *fs_info = cache->fs_info; | 453 | struct btrfs_fs_info *fs_info = cache->fs_info; |
451 | struct btrfs_caching_control *caching_ctl; | 454 | struct btrfs_caching_control *caching_ctl; |
452 | struct task_struct *tsk; | ||
453 | int ret = 0; | 455 | int ret = 0; |
454 | 456 | ||
455 | smp_mb(); | 457 | smp_mb(); |
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
501 | caching_ctl->progress = cache->key.objectid; | 503 | caching_ctl->progress = cache->key.objectid; |
502 | /* one for caching kthread, one for caching block group list */ | 504 | /* one for caching kthread, one for caching block group list */ |
503 | atomic_set(&caching_ctl->count, 2); | 505 | atomic_set(&caching_ctl->count, 2); |
506 | caching_ctl->work.func = caching_thread; | ||
504 | 507 | ||
505 | spin_lock(&cache->lock); | 508 | spin_lock(&cache->lock); |
506 | if (cache->cached != BTRFS_CACHE_NO) { | 509 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
516 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 519 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
517 | up_write(&fs_info->extent_commit_sem); | 520 | up_write(&fs_info->extent_commit_sem); |
518 | 521 | ||
519 | atomic_inc(&cache->space_info->caching_threads); | ||
520 | btrfs_get_block_group(cache); | 522 | btrfs_get_block_group(cache); |
521 | 523 | ||
522 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 524 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); |
523 | cache->key.objectid); | ||
524 | if (IS_ERR(tsk)) { | ||
525 | ret = PTR_ERR(tsk); | ||
526 | printk(KERN_ERR "error running thread %d\n", ret); | ||
527 | BUG(); | ||
528 | } | ||
529 | 525 | ||
530 | return ret; | 526 | return ret; |
531 | } | 527 | } |
@@ -667,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
667 | struct btrfs_path *path; | 663 | struct btrfs_path *path; |
668 | 664 | ||
669 | path = btrfs_alloc_path(); | 665 | path = btrfs_alloc_path(); |
670 | BUG_ON(!path); | 666 | if (!path) |
667 | return -ENOMEM; | ||
668 | |||
671 | key.objectid = start; | 669 | key.objectid = start; |
672 | key.offset = len; | 670 | key.offset = len; |
673 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 671 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); |
@@ -1784,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1784 | 1782 | ||
1785 | 1783 | ||
1786 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { |
1785 | if (!stripe->dev->can_discard) | ||
1786 | continue; | ||
1787 | |||
1787 | ret = btrfs_issue_discard(stripe->dev->bdev, | 1788 | ret = btrfs_issue_discard(stripe->dev->bdev, |
1788 | stripe->physical, | 1789 | stripe->physical, |
1789 | stripe->length); | 1790 | stripe->length); |
@@ -1791,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1791 | discarded_bytes += stripe->length; | 1792 | discarded_bytes += stripe->length; |
1792 | else if (ret != -EOPNOTSUPP) | 1793 | else if (ret != -EOPNOTSUPP) |
1793 | break; | 1794 | break; |
1795 | |||
1796 | /* | ||
1797 | * Just in case we get back EOPNOTSUPP for some reason, | ||
1798 | * just ignore the return value so we don't screw up | ||
1799 | * people calling discard_extent. | ||
1800 | */ | ||
1801 | ret = 0; | ||
1794 | } | 1802 | } |
1795 | kfree(multi); | 1803 | kfree(multi); |
1796 | } | 1804 | } |
1797 | if (discarded_bytes && ret == -EOPNOTSUPP) | ||
1798 | ret = 0; | ||
1799 | 1805 | ||
1800 | if (actual_bytes) | 1806 | if (actual_bytes) |
1801 | *actual_bytes = discarded_bytes; | 1807 | *actual_bytes = discarded_bytes; |
@@ -2932,9 +2938,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2932 | found->full = 0; | 2938 | found->full = 0; |
2933 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; | 2939 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
2934 | found->chunk_alloc = 0; | 2940 | found->chunk_alloc = 0; |
2941 | found->flush = 0; | ||
2942 | init_waitqueue_head(&found->wait); | ||
2935 | *space_info = found; | 2943 | *space_info = found; |
2936 | list_add_rcu(&found->list, &info->space_info); | 2944 | list_add_rcu(&found->list, &info->space_info); |
2937 | atomic_set(&found->caching_threads, 0); | ||
2938 | return 0; | 2945 | return 0; |
2939 | } | 2946 | } |
2940 | 2947 | ||
@@ -3275,6 +3282,9 @@ again: | |||
3275 | } | 3282 | } |
3276 | 3283 | ||
3277 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3284 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3285 | if (ret < 0 && ret != -ENOSPC) | ||
3286 | goto out; | ||
3287 | |||
3278 | spin_lock(&space_info->lock); | 3288 | spin_lock(&space_info->lock); |
3279 | if (ret) | 3289 | if (ret) |
3280 | space_info->full = 1; | 3290 | space_info->full = 1; |
@@ -3284,6 +3294,7 @@ again: | |||
3284 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | 3294 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3285 | space_info->chunk_alloc = 0; | 3295 | space_info->chunk_alloc = 0; |
3286 | spin_unlock(&space_info->lock); | 3296 | spin_unlock(&space_info->lock); |
3297 | out: | ||
3287 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3298 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3288 | return ret; | 3299 | return ret; |
3289 | } | 3300 | } |
@@ -3314,6 +3325,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3314 | if (reserved == 0) | 3325 | if (reserved == 0) |
3315 | return 0; | 3326 | return 0; |
3316 | 3327 | ||
3328 | smp_mb(); | ||
3329 | if (root->fs_info->delalloc_bytes == 0) { | ||
3330 | if (trans) | ||
3331 | return 0; | ||
3332 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3333 | return 0; | ||
3334 | } | ||
3335 | |||
3317 | max_reclaim = min(reserved, to_reclaim); | 3336 | max_reclaim = min(reserved, to_reclaim); |
3318 | 3337 | ||
3319 | while (loops < 1024) { | 3338 | while (loops < 1024) { |
@@ -3356,6 +3375,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3356 | } | 3375 | } |
3357 | 3376 | ||
3358 | } | 3377 | } |
3378 | if (reclaimed >= to_reclaim && !trans) | ||
3379 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3359 | return reclaimed >= to_reclaim; | 3380 | return reclaimed >= to_reclaim; |
3360 | } | 3381 | } |
3361 | 3382 | ||
@@ -3380,15 +3401,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | |||
3380 | u64 num_bytes = orig_bytes; | 3401 | u64 num_bytes = orig_bytes; |
3381 | int retries = 0; | 3402 | int retries = 0; |
3382 | int ret = 0; | 3403 | int ret = 0; |
3383 | bool reserved = false; | ||
3384 | bool committed = false; | 3404 | bool committed = false; |
3405 | bool flushing = false; | ||
3385 | 3406 | ||
3386 | again: | 3407 | again: |
3387 | ret = -ENOSPC; | 3408 | ret = 0; |
3388 | if (reserved) | ||
3389 | num_bytes = 0; | ||
3390 | |||
3391 | spin_lock(&space_info->lock); | 3409 | spin_lock(&space_info->lock); |
3410 | /* | ||
3411 | * We only want to wait if somebody other than us is flushing and we are | ||
3412 | * actually alloed to flush. | ||
3413 | */ | ||
3414 | while (flush && !flushing && space_info->flush) { | ||
3415 | spin_unlock(&space_info->lock); | ||
3416 | /* | ||
3417 | * If we have a trans handle we can't wait because the flusher | ||
3418 | * may have to commit the transaction, which would mean we would | ||
3419 | * deadlock since we are waiting for the flusher to finish, but | ||
3420 | * hold the current transaction open. | ||
3421 | */ | ||
3422 | if (trans) | ||
3423 | return -EAGAIN; | ||
3424 | ret = wait_event_interruptible(space_info->wait, | ||
3425 | !space_info->flush); | ||
3426 | /* Must have been interrupted, return */ | ||
3427 | if (ret) | ||
3428 | return -EINTR; | ||
3429 | |||
3430 | spin_lock(&space_info->lock); | ||
3431 | } | ||
3432 | |||
3433 | ret = -ENOSPC; | ||
3392 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3434 | unused = space_info->bytes_used + space_info->bytes_reserved + |
3393 | space_info->bytes_pinned + space_info->bytes_readonly + | 3435 | space_info->bytes_pinned + space_info->bytes_readonly + |
3394 | space_info->bytes_may_use; | 3436 | space_info->bytes_may_use; |
@@ -3403,8 +3445,7 @@ again: | |||
3403 | if (unused <= space_info->total_bytes) { | 3445 | if (unused <= space_info->total_bytes) { |
3404 | unused = space_info->total_bytes - unused; | 3446 | unused = space_info->total_bytes - unused; |
3405 | if (unused >= num_bytes) { | 3447 | if (unused >= num_bytes) { |
3406 | if (!reserved) | 3448 | space_info->bytes_reserved += orig_bytes; |
3407 | space_info->bytes_reserved += orig_bytes; | ||
3408 | ret = 0; | 3449 | ret = 0; |
3409 | } else { | 3450 | } else { |
3410 | /* | 3451 | /* |
@@ -3429,17 +3470,14 @@ again: | |||
3429 | * to reclaim space we can actually use it instead of somebody else | 3470 | * to reclaim space we can actually use it instead of somebody else |
3430 | * stealing it from us. | 3471 | * stealing it from us. |
3431 | */ | 3472 | */ |
3432 | if (ret && !reserved) { | 3473 | if (ret && flush) { |
3433 | space_info->bytes_reserved += orig_bytes; | 3474 | flushing = true; |
3434 | reserved = true; | 3475 | space_info->flush = 1; |
3435 | } | 3476 | } |
3436 | 3477 | ||
3437 | spin_unlock(&space_info->lock); | 3478 | spin_unlock(&space_info->lock); |
3438 | 3479 | ||
3439 | if (!ret) | 3480 | if (!ret || !flush) |
3440 | return 0; | ||
3441 | |||
3442 | if (!flush) | ||
3443 | goto out; | 3481 | goto out; |
3444 | 3482 | ||
3445 | /* | 3483 | /* |
@@ -3447,11 +3485,11 @@ again: | |||
3447 | * metadata until after the IO is completed. | 3485 | * metadata until after the IO is completed. |
3448 | */ | 3486 | */ |
3449 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3487 | ret = shrink_delalloc(trans, root, num_bytes, 1); |
3450 | if (ret > 0) | 3488 | if (ret < 0) |
3451 | return 0; | ||
3452 | else if (ret < 0) | ||
3453 | goto out; | 3489 | goto out; |
3454 | 3490 | ||
3491 | ret = 0; | ||
3492 | |||
3455 | /* | 3493 | /* |
3456 | * So if we were overcommitted it's possible that somebody else flushed | 3494 | * So if we were overcommitted it's possible that somebody else flushed |
3457 | * out enough space and we simply didn't have enough space to reclaim, | 3495 | * out enough space and we simply didn't have enough space to reclaim, |
@@ -3462,11 +3500,11 @@ again: | |||
3462 | goto again; | 3500 | goto again; |
3463 | } | 3501 | } |
3464 | 3502 | ||
3465 | spin_lock(&space_info->lock); | ||
3466 | /* | 3503 | /* |
3467 | * Not enough space to be reclaimed, don't bother committing the | 3504 | * Not enough space to be reclaimed, don't bother committing the |
3468 | * transaction. | 3505 | * transaction. |
3469 | */ | 3506 | */ |
3507 | spin_lock(&space_info->lock); | ||
3470 | if (space_info->bytes_pinned < orig_bytes) | 3508 | if (space_info->bytes_pinned < orig_bytes) |
3471 | ret = -ENOSPC; | 3509 | ret = -ENOSPC; |
3472 | spin_unlock(&space_info->lock); | 3510 | spin_unlock(&space_info->lock); |
@@ -3474,10 +3512,13 @@ again: | |||
3474 | goto out; | 3512 | goto out; |
3475 | 3513 | ||
3476 | ret = -EAGAIN; | 3514 | ret = -EAGAIN; |
3477 | if (trans || committed) | 3515 | if (trans) |
3478 | goto out; | 3516 | goto out; |
3479 | 3517 | ||
3480 | ret = -ENOSPC; | 3518 | ret = -ENOSPC; |
3519 | if (committed) | ||
3520 | goto out; | ||
3521 | |||
3481 | trans = btrfs_join_transaction(root); | 3522 | trans = btrfs_join_transaction(root); |
3482 | if (IS_ERR(trans)) | 3523 | if (IS_ERR(trans)) |
3483 | goto out; | 3524 | goto out; |
@@ -3489,12 +3530,12 @@ again: | |||
3489 | } | 3530 | } |
3490 | 3531 | ||
3491 | out: | 3532 | out: |
3492 | if (reserved) { | 3533 | if (flushing) { |
3493 | spin_lock(&space_info->lock); | 3534 | spin_lock(&space_info->lock); |
3494 | space_info->bytes_reserved -= orig_bytes; | 3535 | space_info->flush = 0; |
3536 | wake_up_all(&space_info->wait); | ||
3495 | spin_unlock(&space_info->lock); | 3537 | spin_unlock(&space_info->lock); |
3496 | } | 3538 | } |
3497 | |||
3498 | return ret; | 3539 | return ret; |
3499 | } | 3540 | } |
3500 | 3541 | ||
@@ -3704,7 +3745,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3704 | if (commit_trans) { | 3745 | if (commit_trans) { |
3705 | if (trans) | 3746 | if (trans) |
3706 | return -EAGAIN; | 3747 | return -EAGAIN; |
3707 | |||
3708 | trans = btrfs_join_transaction(root); | 3748 | trans = btrfs_join_transaction(root); |
3709 | BUG_ON(IS_ERR(trans)); | 3749 | BUG_ON(IS_ERR(trans)); |
3710 | ret = btrfs_commit_transaction(trans, root); | 3750 | ret = btrfs_commit_transaction(trans, root); |
@@ -3874,26 +3914,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3874 | return 0; | 3914 | return 0; |
3875 | } | 3915 | } |
3876 | 3916 | ||
3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3878 | struct btrfs_root *root, | ||
3879 | int num_items) | ||
3880 | { | ||
3881 | u64 num_bytes; | ||
3882 | int ret; | ||
3883 | |||
3884 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3885 | return 0; | ||
3886 | |||
3887 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
3888 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3889 | num_bytes); | ||
3890 | if (!ret) { | ||
3891 | trans->bytes_reserved += num_bytes; | ||
3892 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3893 | } | ||
3894 | return ret; | ||
3895 | } | ||
3896 | |||
3897 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3917 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
3898 | struct btrfs_root *root) | 3918 | struct btrfs_root *root) |
3899 | { | 3919 | { |
@@ -3944,6 +3964,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3944 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3964 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3945 | } | 3965 | } |
3946 | 3966 | ||
3967 | static unsigned drop_outstanding_extent(struct inode *inode) | ||
3968 | { | ||
3969 | unsigned dropped_extents = 0; | ||
3970 | |||
3971 | spin_lock(&BTRFS_I(inode)->lock); | ||
3972 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | ||
3973 | BTRFS_I(inode)->outstanding_extents--; | ||
3974 | |||
3975 | /* | ||
3976 | * If we have more or the same amount of outsanding extents than we have | ||
3977 | * reserved then we need to leave the reserved extents count alone. | ||
3978 | */ | ||
3979 | if (BTRFS_I(inode)->outstanding_extents >= | ||
3980 | BTRFS_I(inode)->reserved_extents) | ||
3981 | goto out; | ||
3982 | |||
3983 | dropped_extents = BTRFS_I(inode)->reserved_extents - | ||
3984 | BTRFS_I(inode)->outstanding_extents; | ||
3985 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | ||
3986 | out: | ||
3987 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3988 | return dropped_extents; | ||
3989 | } | ||
3990 | |||
3947 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 3991 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) |
3948 | { | 3992 | { |
3949 | return num_bytes >>= 3; | 3993 | return num_bytes >>= 3; |
@@ -3953,9 +3997,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3953 | { | 3997 | { |
3954 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3998 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3955 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3999 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3956 | u64 to_reserve; | 4000 | u64 to_reserve = 0; |
3957 | int nr_extents; | 4001 | unsigned nr_extents = 0; |
3958 | int reserved_extents; | ||
3959 | int ret; | 4002 | int ret; |
3960 | 4003 | ||
3961 | if (btrfs_transaction_in_commit(root->fs_info)) | 4004 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -3963,66 +4006,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3963 | 4006 | ||
3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4007 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3965 | 4008 | ||
3966 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 4009 | spin_lock(&BTRFS_I(inode)->lock); |
3967 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | 4010 | BTRFS_I(inode)->outstanding_extents++; |
4011 | |||
4012 | if (BTRFS_I(inode)->outstanding_extents > | ||
4013 | BTRFS_I(inode)->reserved_extents) { | ||
4014 | nr_extents = BTRFS_I(inode)->outstanding_extents - | ||
4015 | BTRFS_I(inode)->reserved_extents; | ||
4016 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3968 | 4017 | ||
3969 | if (nr_extents > reserved_extents) { | ||
3970 | nr_extents -= reserved_extents; | ||
3971 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4018 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
3972 | } else { | ||
3973 | nr_extents = 0; | ||
3974 | to_reserve = 0; | ||
3975 | } | 4019 | } |
4020 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3976 | 4021 | ||
3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4022 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4023 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3979 | if (ret) | 4024 | if (ret) { |
4025 | unsigned dropped; | ||
4026 | /* | ||
4027 | * We don't need the return value since our reservation failed, | ||
4028 | * we just need to clean up our counter. | ||
4029 | */ | ||
4030 | dropped = drop_outstanding_extent(inode); | ||
4031 | WARN_ON(dropped > 1); | ||
3980 | return ret; | 4032 | return ret; |
3981 | 4033 | } | |
3982 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); | ||
3983 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3984 | 4034 | ||
3985 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4035 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3986 | 4036 | ||
3987 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3988 | shrink_delalloc(NULL, root, to_reserve, 0); | ||
3989 | |||
3990 | return 0; | 4037 | return 0; |
3991 | } | 4038 | } |
3992 | 4039 | ||
3993 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4040 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
3994 | { | 4041 | { |
3995 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4042 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3996 | u64 to_free; | 4043 | u64 to_free = 0; |
3997 | int nr_extents; | 4044 | unsigned dropped; |
3998 | int reserved_extents; | ||
3999 | 4045 | ||
4000 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4046 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4001 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4047 | dropped = drop_outstanding_extent(inode); |
4002 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4003 | |||
4004 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | ||
4005 | do { | ||
4006 | int old, new; | ||
4007 | |||
4008 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
4009 | if (nr_extents >= reserved_extents) { | ||
4010 | nr_extents = 0; | ||
4011 | break; | ||
4012 | } | ||
4013 | old = reserved_extents; | ||
4014 | nr_extents = reserved_extents - nr_extents; | ||
4015 | new = reserved_extents - nr_extents; | ||
4016 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4017 | reserved_extents, new); | ||
4018 | if (likely(old == reserved_extents)) | ||
4019 | break; | ||
4020 | reserved_extents = old; | ||
4021 | } while (1); | ||
4022 | 4048 | ||
4023 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4049 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4024 | if (nr_extents > 0) | 4050 | if (dropped > 0) |
4025 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); | 4051 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4026 | 4052 | ||
4027 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4053 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4028 | to_free); | 4054 | to_free); |
@@ -4444,7 +4470,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4444 | printk(KERN_ERR "umm, got %d back from search" | 4470 | printk(KERN_ERR "umm, got %d back from search" |
4445 | ", was looking for %llu\n", ret, | 4471 | ", was looking for %llu\n", ret, |
4446 | (unsigned long long)bytenr); | 4472 | (unsigned long long)bytenr); |
4447 | btrfs_print_leaf(extent_root, path->nodes[0]); | 4473 | if (ret > 0) |
4474 | btrfs_print_leaf(extent_root, | ||
4475 | path->nodes[0]); | ||
4448 | } | 4476 | } |
4449 | BUG_ON(ret); | 4477 | BUG_ON(ret); |
4450 | extent_slot = path->slots[0]; | 4478 | extent_slot = path->slots[0]; |
@@ -4990,14 +5018,10 @@ have_block_group: | |||
4990 | } | 5018 | } |
4991 | 5019 | ||
4992 | /* | 5020 | /* |
4993 | * We only want to start kthread caching if we are at | 5021 | * The caching workers are limited to 2 threads, so we |
4994 | * the point where we will wait for caching to make | 5022 | * can queue as much work as we care to. |
4995 | * progress, or if our ideal search is over and we've | ||
4996 | * found somebody to start caching. | ||
4997 | */ | 5023 | */ |
4998 | if (loop > LOOP_CACHING_NOWAIT || | 5024 | if (loop > LOOP_FIND_IDEAL) { |
4999 | (loop > LOOP_FIND_IDEAL && | ||
5000 | atomic_read(&space_info->caching_threads) < 2)) { | ||
5001 | ret = cache_block_group(block_group, trans, | 5025 | ret = cache_block_group(block_group, trans, |
5002 | orig_root, 0); | 5026 | orig_root, 0); |
5003 | BUG_ON(ret); | 5027 | BUG_ON(ret); |
@@ -5065,7 +5089,9 @@ have_block_group: | |||
5065 | * group is does point to and try again | 5089 | * group is does point to and try again |
5066 | */ | 5090 | */ |
5067 | if (!last_ptr_loop && last_ptr->block_group && | 5091 | if (!last_ptr_loop && last_ptr->block_group && |
5068 | last_ptr->block_group != block_group) { | 5092 | last_ptr->block_group != block_group && |
5093 | index <= | ||
5094 | get_block_group_index(last_ptr->block_group)) { | ||
5069 | 5095 | ||
5070 | btrfs_put_block_group(block_group); | 5096 | btrfs_put_block_group(block_group); |
5071 | block_group = last_ptr->block_group; | 5097 | block_group = last_ptr->block_group; |
@@ -5219,8 +5245,7 @@ loop: | |||
5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5245 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
5220 | found_uncached_bg = false; | 5246 | found_uncached_bg = false; |
5221 | loop++; | 5247 | loop++; |
5222 | if (!ideal_cache_percent && | 5248 | if (!ideal_cache_percent) |
5223 | atomic_read(&space_info->caching_threads)) | ||
5224 | goto search; | 5249 | goto search; |
5225 | 5250 | ||
5226 | /* | 5251 | /* |
@@ -5494,7 +5519,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
5494 | u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); | 5519 | u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); |
5495 | 5520 | ||
5496 | path = btrfs_alloc_path(); | 5521 | path = btrfs_alloc_path(); |
5497 | BUG_ON(!path); | 5522 | if (!path) |
5523 | return -ENOMEM; | ||
5498 | 5524 | ||
5499 | path->leave_spinning = 1; | 5525 | path->leave_spinning = 1; |
5500 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 5526 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
@@ -5623,7 +5649,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
5623 | if (!buf) | 5649 | if (!buf) |
5624 | return ERR_PTR(-ENOMEM); | 5650 | return ERR_PTR(-ENOMEM); |
5625 | btrfs_set_header_generation(buf, trans->transid); | 5651 | btrfs_set_header_generation(buf, trans->transid); |
5626 | btrfs_set_buffer_lockdep_class(buf, level); | 5652 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); |
5627 | btrfs_tree_lock(buf); | 5653 | btrfs_tree_lock(buf); |
5628 | clean_tree_block(trans, root, buf); | 5654 | clean_tree_block(trans, root, buf); |
5629 | 5655 | ||
@@ -5910,7 +5936,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5910 | return 1; | 5936 | return 1; |
5911 | 5937 | ||
5912 | if (path->locks[level] && !wc->keep_locks) { | 5938 | if (path->locks[level] && !wc->keep_locks) { |
5913 | btrfs_tree_unlock(eb); | 5939 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5914 | path->locks[level] = 0; | 5940 | path->locks[level] = 0; |
5915 | } | 5941 | } |
5916 | return 0; | 5942 | return 0; |
@@ -5934,7 +5960,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5934 | * keep the tree lock | 5960 | * keep the tree lock |
5935 | */ | 5961 | */ |
5936 | if (path->locks[level] && level > 0) { | 5962 | if (path->locks[level] && level > 0) { |
5937 | btrfs_tree_unlock(eb); | 5963 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5938 | path->locks[level] = 0; | 5964 | path->locks[level] = 0; |
5939 | } | 5965 | } |
5940 | return 0; | 5966 | return 0; |
@@ -6047,7 +6073,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6047 | BUG_ON(level != btrfs_header_level(next)); | 6073 | BUG_ON(level != btrfs_header_level(next)); |
6048 | path->nodes[level] = next; | 6074 | path->nodes[level] = next; |
6049 | path->slots[level] = 0; | 6075 | path->slots[level] = 0; |
6050 | path->locks[level] = 1; | 6076 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6051 | wc->level = level; | 6077 | wc->level = level; |
6052 | if (wc->level == 1) | 6078 | if (wc->level == 1) |
6053 | wc->reada_slot = 0; | 6079 | wc->reada_slot = 0; |
@@ -6118,7 +6144,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6118 | BUG_ON(level == 0); | 6144 | BUG_ON(level == 0); |
6119 | btrfs_tree_lock(eb); | 6145 | btrfs_tree_lock(eb); |
6120 | btrfs_set_lock_blocking(eb); | 6146 | btrfs_set_lock_blocking(eb); |
6121 | path->locks[level] = 1; | 6147 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6122 | 6148 | ||
6123 | ret = btrfs_lookup_extent_info(trans, root, | 6149 | ret = btrfs_lookup_extent_info(trans, root, |
6124 | eb->start, eb->len, | 6150 | eb->start, eb->len, |
@@ -6127,8 +6153,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6127 | BUG_ON(ret); | 6153 | BUG_ON(ret); |
6128 | BUG_ON(wc->refs[level] == 0); | 6154 | BUG_ON(wc->refs[level] == 0); |
6129 | if (wc->refs[level] == 1) { | 6155 | if (wc->refs[level] == 1) { |
6130 | btrfs_tree_unlock(eb); | 6156 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
6131 | path->locks[level] = 0; | ||
6132 | return 1; | 6157 | return 1; |
6133 | } | 6158 | } |
6134 | } | 6159 | } |
@@ -6150,7 +6175,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6150 | btrfs_header_generation(eb) == trans->transid) { | 6175 | btrfs_header_generation(eb) == trans->transid) { |
6151 | btrfs_tree_lock(eb); | 6176 | btrfs_tree_lock(eb); |
6152 | btrfs_set_lock_blocking(eb); | 6177 | btrfs_set_lock_blocking(eb); |
6153 | path->locks[level] = 1; | 6178 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6154 | } | 6179 | } |
6155 | clean_tree_block(trans, root, eb); | 6180 | clean_tree_block(trans, root, eb); |
6156 | } | 6181 | } |
@@ -6229,7 +6254,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6229 | return 0; | 6254 | return 0; |
6230 | 6255 | ||
6231 | if (path->locks[level]) { | 6256 | if (path->locks[level]) { |
6232 | btrfs_tree_unlock(path->nodes[level]); | 6257 | btrfs_tree_unlock_rw(path->nodes[level], |
6258 | path->locks[level]); | ||
6233 | path->locks[level] = 0; | 6259 | path->locks[level] = 0; |
6234 | } | 6260 | } |
6235 | free_extent_buffer(path->nodes[level]); | 6261 | free_extent_buffer(path->nodes[level]); |
@@ -6251,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6251 | * also make sure backrefs for the shared block and all lower level | 6277 | * also make sure backrefs for the shared block and all lower level |
6252 | * blocks are properly updated. | 6278 | * blocks are properly updated. |
6253 | */ | 6279 | */ |
6254 | int btrfs_drop_snapshot(struct btrfs_root *root, | 6280 | void btrfs_drop_snapshot(struct btrfs_root *root, |
6255 | struct btrfs_block_rsv *block_rsv, int update_ref) | 6281 | struct btrfs_block_rsv *block_rsv, int update_ref) |
6256 | { | 6282 | { |
6257 | struct btrfs_path *path; | 6283 | struct btrfs_path *path; |
6258 | struct btrfs_trans_handle *trans; | 6284 | struct btrfs_trans_handle *trans; |
@@ -6265,10 +6291,17 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6265 | int level; | 6291 | int level; |
6266 | 6292 | ||
6267 | path = btrfs_alloc_path(); | 6293 | path = btrfs_alloc_path(); |
6268 | BUG_ON(!path); | 6294 | if (!path) { |
6295 | err = -ENOMEM; | ||
6296 | goto out; | ||
6297 | } | ||
6269 | 6298 | ||
6270 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 6299 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
6271 | BUG_ON(!wc); | 6300 | if (!wc) { |
6301 | btrfs_free_path(path); | ||
6302 | err = -ENOMEM; | ||
6303 | goto out; | ||
6304 | } | ||
6272 | 6305 | ||
6273 | trans = btrfs_start_transaction(tree_root, 0); | 6306 | trans = btrfs_start_transaction(tree_root, 0); |
6274 | BUG_ON(IS_ERR(trans)); | 6307 | BUG_ON(IS_ERR(trans)); |
@@ -6281,7 +6314,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6281 | path->nodes[level] = btrfs_lock_root_node(root); | 6314 | path->nodes[level] = btrfs_lock_root_node(root); |
6282 | btrfs_set_lock_blocking(path->nodes[level]); | 6315 | btrfs_set_lock_blocking(path->nodes[level]); |
6283 | path->slots[level] = 0; | 6316 | path->slots[level] = 0; |
6284 | path->locks[level] = 1; | 6317 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6285 | memset(&wc->update_progress, 0, | 6318 | memset(&wc->update_progress, 0, |
6286 | sizeof(wc->update_progress)); | 6319 | sizeof(wc->update_progress)); |
6287 | } else { | 6320 | } else { |
@@ -6296,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6296 | path->lowest_level = 0; | 6329 | path->lowest_level = 0; |
6297 | if (ret < 0) { | 6330 | if (ret < 0) { |
6298 | err = ret; | 6331 | err = ret; |
6299 | goto out; | 6332 | goto out_free; |
6300 | } | 6333 | } |
6301 | WARN_ON(ret > 0); | 6334 | WARN_ON(ret > 0); |
6302 | 6335 | ||
@@ -6403,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6403 | free_extent_buffer(root->commit_root); | 6436 | free_extent_buffer(root->commit_root); |
6404 | kfree(root); | 6437 | kfree(root); |
6405 | } | 6438 | } |
6406 | out: | 6439 | out_free: |
6407 | btrfs_end_transaction_throttle(trans, tree_root); | 6440 | btrfs_end_transaction_throttle(trans, tree_root); |
6408 | kfree(wc); | 6441 | kfree(wc); |
6409 | btrfs_free_path(path); | 6442 | btrfs_free_path(path); |
6410 | return err; | 6443 | out: |
6444 | if (err) | ||
6445 | btrfs_std_error(root->fs_info, err); | ||
6446 | return; | ||
6411 | } | 6447 | } |
6412 | 6448 | ||
6413 | /* | 6449 | /* |
@@ -6449,7 +6485,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6449 | level = btrfs_header_level(node); | 6485 | level = btrfs_header_level(node); |
6450 | path->nodes[level] = node; | 6486 | path->nodes[level] = node; |
6451 | path->slots[level] = 0; | 6487 | path->slots[level] = 0; |
6452 | path->locks[level] = 1; | 6488 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6453 | 6489 | ||
6454 | wc->refs[parent_level] = 1; | 6490 | wc->refs[parent_level] = 1; |
6455 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 6491 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
@@ -6524,30 +6560,48 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
6524 | return flags; | 6560 | return flags; |
6525 | } | 6561 | } |
6526 | 6562 | ||
6527 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) | 6563 | static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) |
6528 | { | 6564 | { |
6529 | struct btrfs_space_info *sinfo = cache->space_info; | 6565 | struct btrfs_space_info *sinfo = cache->space_info; |
6530 | u64 num_bytes; | 6566 | u64 num_bytes; |
6567 | u64 min_allocable_bytes; | ||
6531 | int ret = -ENOSPC; | 6568 | int ret = -ENOSPC; |
6532 | 6569 | ||
6533 | if (cache->ro) | 6570 | |
6534 | return 0; | 6571 | /* |
6572 | * We need some metadata space and system metadata space for | ||
6573 | * allocating chunks in some corner cases until we force to set | ||
6574 | * it to be readonly. | ||
6575 | */ | ||
6576 | if ((sinfo->flags & | ||
6577 | (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && | ||
6578 | !force) | ||
6579 | min_allocable_bytes = 1 * 1024 * 1024; | ||
6580 | else | ||
6581 | min_allocable_bytes = 0; | ||
6535 | 6582 | ||
6536 | spin_lock(&sinfo->lock); | 6583 | spin_lock(&sinfo->lock); |
6537 | spin_lock(&cache->lock); | 6584 | spin_lock(&cache->lock); |
6585 | |||
6586 | if (cache->ro) { | ||
6587 | ret = 0; | ||
6588 | goto out; | ||
6589 | } | ||
6590 | |||
6538 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | 6591 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - |
6539 | cache->bytes_super - btrfs_block_group_used(&cache->item); | 6592 | cache->bytes_super - btrfs_block_group_used(&cache->item); |
6540 | 6593 | ||
6541 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6594 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
6542 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6595 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
6543 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { | 6596 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= |
6597 | sinfo->total_bytes) { | ||
6544 | sinfo->bytes_readonly += num_bytes; | 6598 | sinfo->bytes_readonly += num_bytes; |
6545 | sinfo->bytes_reserved += cache->reserved_pinned; | 6599 | sinfo->bytes_reserved += cache->reserved_pinned; |
6546 | cache->reserved_pinned = 0; | 6600 | cache->reserved_pinned = 0; |
6547 | cache->ro = 1; | 6601 | cache->ro = 1; |
6548 | ret = 0; | 6602 | ret = 0; |
6549 | } | 6603 | } |
6550 | 6604 | out: | |
6551 | spin_unlock(&cache->lock); | 6605 | spin_unlock(&cache->lock); |
6552 | spin_unlock(&sinfo->lock); | 6606 | spin_unlock(&sinfo->lock); |
6553 | return ret; | 6607 | return ret; |
@@ -6571,7 +6625,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6571 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 6625 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6572 | CHUNK_ALLOC_FORCE); | 6626 | CHUNK_ALLOC_FORCE); |
6573 | 6627 | ||
6574 | ret = set_block_group_ro(cache); | 6628 | ret = set_block_group_ro(cache, 0); |
6575 | if (!ret) | 6629 | if (!ret) |
6576 | goto out; | 6630 | goto out; |
6577 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 6631 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
@@ -6579,7 +6633,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6579 | CHUNK_ALLOC_FORCE); | 6633 | CHUNK_ALLOC_FORCE); |
6580 | if (ret < 0) | 6634 | if (ret < 0) |
6581 | goto out; | 6635 | goto out; |
6582 | ret = set_block_group_ro(cache); | 6636 | ret = set_block_group_ro(cache, 0); |
6583 | out: | 6637 | out: |
6584 | btrfs_end_transaction(trans, root); | 6638 | btrfs_end_transaction(trans, root); |
6585 | return ret; | 6639 | return ret; |
@@ -6680,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6680 | struct btrfs_space_info *space_info; | 6734 | struct btrfs_space_info *space_info; |
6681 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 6735 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6682 | struct btrfs_device *device; | 6736 | struct btrfs_device *device; |
6737 | u64 min_free; | ||
6738 | u64 dev_min = 1; | ||
6739 | u64 dev_nr = 0; | ||
6740 | int index; | ||
6683 | int full = 0; | 6741 | int full = 0; |
6684 | int ret = 0; | 6742 | int ret = 0; |
6685 | 6743 | ||
@@ -6689,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6689 | if (!block_group) | 6747 | if (!block_group) |
6690 | return -1; | 6748 | return -1; |
6691 | 6749 | ||
6750 | min_free = btrfs_block_group_used(&block_group->item); | ||
6751 | |||
6692 | /* no bytes used, we're good */ | 6752 | /* no bytes used, we're good */ |
6693 | if (!btrfs_block_group_used(&block_group->item)) | 6753 | if (!min_free) |
6694 | goto out; | 6754 | goto out; |
6695 | 6755 | ||
6696 | space_info = block_group->space_info; | 6756 | space_info = block_group->space_info; |
@@ -6706,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6706 | * all of the extents from this block group. If we can, we're good | 6766 | * all of the extents from this block group. If we can, we're good |
6707 | */ | 6767 | */ |
6708 | if ((space_info->total_bytes != block_group->key.offset) && | 6768 | if ((space_info->total_bytes != block_group->key.offset) && |
6709 | (space_info->bytes_used + space_info->bytes_reserved + | 6769 | (space_info->bytes_used + space_info->bytes_reserved + |
6710 | space_info->bytes_pinned + space_info->bytes_readonly + | 6770 | space_info->bytes_pinned + space_info->bytes_readonly + |
6711 | btrfs_block_group_used(&block_group->item) < | 6771 | min_free < space_info->total_bytes)) { |
6712 | space_info->total_bytes)) { | ||
6713 | spin_unlock(&space_info->lock); | 6772 | spin_unlock(&space_info->lock); |
6714 | goto out; | 6773 | goto out; |
6715 | } | 6774 | } |
@@ -6726,9 +6785,31 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6726 | if (full) | 6785 | if (full) |
6727 | goto out; | 6786 | goto out; |
6728 | 6787 | ||
6788 | /* | ||
6789 | * index: | ||
6790 | * 0: raid10 | ||
6791 | * 1: raid1 | ||
6792 | * 2: dup | ||
6793 | * 3: raid0 | ||
6794 | * 4: single | ||
6795 | */ | ||
6796 | index = get_block_group_index(block_group); | ||
6797 | if (index == 0) { | ||
6798 | dev_min = 4; | ||
6799 | /* Divide by 2 */ | ||
6800 | min_free >>= 1; | ||
6801 | } else if (index == 1) { | ||
6802 | dev_min = 2; | ||
6803 | } else if (index == 2) { | ||
6804 | /* Multiply by 2 */ | ||
6805 | min_free <<= 1; | ||
6806 | } else if (index == 3) { | ||
6807 | dev_min = fs_devices->rw_devices; | ||
6808 | do_div(min_free, dev_min); | ||
6809 | } | ||
6810 | |||
6729 | mutex_lock(&root->fs_info->chunk_mutex); | 6811 | mutex_lock(&root->fs_info->chunk_mutex); |
6730 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 6812 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
6731 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
6732 | u64 dev_offset; | 6813 | u64 dev_offset; |
6733 | 6814 | ||
6734 | /* | 6815 | /* |
@@ -6739,7 +6820,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
6739 | ret = find_free_dev_extent(NULL, device, min_free, | 6820 | ret = find_free_dev_extent(NULL, device, min_free, |
6740 | &dev_offset, NULL); | 6821 | &dev_offset, NULL); |
6741 | if (!ret) | 6822 | if (!ret) |
6823 | dev_nr++; | ||
6824 | |||
6825 | if (dev_nr >= dev_min) | ||
6742 | break; | 6826 | break; |
6827 | |||
6743 | ret = -1; | 6828 | ret = -1; |
6744 | } | 6829 | } |
6745 | } | 6830 | } |
@@ -7016,7 +7101,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7016 | 7101 | ||
7017 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7102 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7018 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7103 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7019 | set_block_group_ro(cache); | 7104 | set_block_group_ro(cache, 1); |
7020 | } | 7105 | } |
7021 | 7106 | ||
7022 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | 7107 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { |
@@ -7030,9 +7115,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7030 | * mirrored block groups. | 7115 | * mirrored block groups. |
7031 | */ | 7116 | */ |
7032 | list_for_each_entry(cache, &space_info->block_groups[3], list) | 7117 | list_for_each_entry(cache, &space_info->block_groups[3], list) |
7033 | set_block_group_ro(cache); | 7118 | set_block_group_ro(cache, 1); |
7034 | list_for_each_entry(cache, &space_info->block_groups[4], list) | 7119 | list_for_each_entry(cache, &space_info->block_groups[4], list) |
7035 | set_block_group_ro(cache); | 7120 | set_block_group_ro(cache, 1); |
7036 | } | 7121 | } |
7037 | 7122 | ||
7038 | init_global_block_rsv(info); | 7123 | init_global_block_rsv(info); |
@@ -7162,11 +7247,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7162 | spin_unlock(&cluster->refill_lock); | 7247 | spin_unlock(&cluster->refill_lock); |
7163 | 7248 | ||
7164 | path = btrfs_alloc_path(); | 7249 | path = btrfs_alloc_path(); |
7165 | BUG_ON(!path); | 7250 | if (!path) { |
7251 | ret = -ENOMEM; | ||
7252 | goto out; | ||
7253 | } | ||
7166 | 7254 | ||
7167 | inode = lookup_free_space_inode(root, block_group, path); | 7255 | inode = lookup_free_space_inode(root, block_group, path); |
7168 | if (!IS_ERR(inode)) { | 7256 | if (!IS_ERR(inode)) { |
7169 | btrfs_orphan_add(trans, inode); | 7257 | ret = btrfs_orphan_add(trans, inode); |
7258 | BUG_ON(ret); | ||
7170 | clear_nlink(inode); | 7259 | clear_nlink(inode); |
7171 | /* One for the block groups ref */ | 7260 | /* One for the block groups ref */ |
7172 | spin_lock(&block_group->lock); | 7261 | spin_lock(&block_group->lock); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7055d11c1efd..d418164a35f1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | |||
254 | * | 254 | * |
255 | * This should be called with the tree lock held. | 255 | * This should be called with the tree lock held. |
256 | */ | 256 | */ |
257 | static int merge_state(struct extent_io_tree *tree, | 257 | static void merge_state(struct extent_io_tree *tree, |
258 | struct extent_state *state) | 258 | struct extent_state *state) |
259 | { | 259 | { |
260 | struct extent_state *other; | 260 | struct extent_state *other; |
261 | struct rb_node *other_node; | 261 | struct rb_node *other_node; |
262 | 262 | ||
263 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 263 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
264 | return 0; | 264 | return; |
265 | 265 | ||
266 | other_node = rb_prev(&state->rb_node); | 266 | other_node = rb_prev(&state->rb_node); |
267 | if (other_node) { | 267 | if (other_node) { |
@@ -281,26 +281,19 @@ static int merge_state(struct extent_io_tree *tree, | |||
281 | if (other->start == state->end + 1 && | 281 | if (other->start == state->end + 1 && |
282 | other->state == state->state) { | 282 | other->state == state->state) { |
283 | merge_cb(tree, state, other); | 283 | merge_cb(tree, state, other); |
284 | other->start = state->start; | 284 | state->end = other->end; |
285 | state->tree = NULL; | 285 | other->tree = NULL; |
286 | rb_erase(&state->rb_node, &tree->state); | 286 | rb_erase(&other->rb_node, &tree->state); |
287 | free_extent_state(state); | 287 | free_extent_state(other); |
288 | state = NULL; | ||
289 | } | 288 | } |
290 | } | 289 | } |
291 | |||
292 | return 0; | ||
293 | } | 290 | } |
294 | 291 | ||
295 | static int set_state_cb(struct extent_io_tree *tree, | 292 | static void set_state_cb(struct extent_io_tree *tree, |
296 | struct extent_state *state, int *bits) | 293 | struct extent_state *state, int *bits) |
297 | { | 294 | { |
298 | if (tree->ops && tree->ops->set_bit_hook) { | 295 | if (tree->ops && tree->ops->set_bit_hook) |
299 | return tree->ops->set_bit_hook(tree->mapping->host, | 296 | tree->ops->set_bit_hook(tree->mapping->host, state, bits); |
300 | state, bits); | ||
301 | } | ||
302 | |||
303 | return 0; | ||
304 | } | 297 | } |
305 | 298 | ||
306 | static void clear_state_cb(struct extent_io_tree *tree, | 299 | static void clear_state_cb(struct extent_io_tree *tree, |
@@ -310,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
310 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 303 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
311 | } | 304 | } |
312 | 305 | ||
306 | static void set_state_bits(struct extent_io_tree *tree, | ||
307 | struct extent_state *state, int *bits); | ||
308 | |||
313 | /* | 309 | /* |
314 | * insert an extent_state struct into the tree. 'bits' are set on the | 310 | * insert an extent_state struct into the tree. 'bits' are set on the |
315 | * struct before it is inserted. | 311 | * struct before it is inserted. |
@@ -325,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
325 | int *bits) | 321 | int *bits) |
326 | { | 322 | { |
327 | struct rb_node *node; | 323 | struct rb_node *node; |
328 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
329 | int ret; | ||
330 | 324 | ||
331 | if (end < start) { | 325 | if (end < start) { |
332 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 326 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
@@ -336,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
336 | } | 330 | } |
337 | state->start = start; | 331 | state->start = start; |
338 | state->end = end; | 332 | state->end = end; |
339 | ret = set_state_cb(tree, state, bits); | ||
340 | if (ret) | ||
341 | return ret; | ||
342 | 333 | ||
343 | if (bits_to_set & EXTENT_DIRTY) | 334 | set_state_bits(tree, state, bits); |
344 | tree->dirty_bytes += end - start + 1; | 335 | |
345 | state->state |= bits_to_set; | ||
346 | node = tree_insert(&tree->state, end, &state->rb_node); | 336 | node = tree_insert(&tree->state, end, &state->rb_node); |
347 | if (node) { | 337 | if (node) { |
348 | struct extent_state *found; | 338 | struct extent_state *found; |
@@ -351,7 +341,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
351 | "%llu %llu\n", (unsigned long long)found->start, | 341 | "%llu %llu\n", (unsigned long long)found->start, |
352 | (unsigned long long)found->end, | 342 | (unsigned long long)found->end, |
353 | (unsigned long long)start, (unsigned long long)end); | 343 | (unsigned long long)start, (unsigned long long)end); |
354 | free_extent_state(state); | ||
355 | return -EEXIST; | 344 | return -EEXIST; |
356 | } | 345 | } |
357 | state->tree = tree; | 346 | state->tree = tree; |
@@ -359,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree, | |||
359 | return 0; | 348 | return 0; |
360 | } | 349 | } |
361 | 350 | ||
362 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | 351 | static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, |
363 | u64 split) | 352 | u64 split) |
364 | { | 353 | { |
365 | if (tree->ops && tree->ops->split_extent_hook) | 354 | if (tree->ops && tree->ops->split_extent_hook) |
366 | return tree->ops->split_extent_hook(tree->mapping->host, | 355 | tree->ops->split_extent_hook(tree->mapping->host, orig, split); |
367 | orig, split); | ||
368 | return 0; | ||
369 | } | 356 | } |
370 | 357 | ||
371 | /* | 358 | /* |
@@ -500,7 +487,8 @@ again: | |||
500 | cached_state = NULL; | 487 | cached_state = NULL; |
501 | } | 488 | } |
502 | 489 | ||
503 | if (cached && cached->tree && cached->start == start) { | 490 | if (cached && cached->tree && cached->start <= start && |
491 | cached->end > start) { | ||
504 | if (clear) | 492 | if (clear) |
505 | atomic_dec(&cached->refs); | 493 | atomic_dec(&cached->refs); |
506 | state = cached; | 494 | state = cached; |
@@ -660,34 +648,25 @@ again: | |||
660 | if (start > end) | 648 | if (start > end) |
661 | break; | 649 | break; |
662 | 650 | ||
663 | if (need_resched()) { | 651 | cond_resched_lock(&tree->lock); |
664 | spin_unlock(&tree->lock); | ||
665 | cond_resched(); | ||
666 | spin_lock(&tree->lock); | ||
667 | } | ||
668 | } | 652 | } |
669 | out: | 653 | out: |
670 | spin_unlock(&tree->lock); | 654 | spin_unlock(&tree->lock); |
671 | return 0; | 655 | return 0; |
672 | } | 656 | } |
673 | 657 | ||
674 | static int set_state_bits(struct extent_io_tree *tree, | 658 | static void set_state_bits(struct extent_io_tree *tree, |
675 | struct extent_state *state, | 659 | struct extent_state *state, |
676 | int *bits) | 660 | int *bits) |
677 | { | 661 | { |
678 | int ret; | ||
679 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | 662 | int bits_to_set = *bits & ~EXTENT_CTLBITS; |
680 | 663 | ||
681 | ret = set_state_cb(tree, state, bits); | 664 | set_state_cb(tree, state, bits); |
682 | if (ret) | ||
683 | return ret; | ||
684 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 665 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
685 | u64 range = state->end - state->start + 1; | 666 | u64 range = state->end - state->start + 1; |
686 | tree->dirty_bytes += range; | 667 | tree->dirty_bytes += range; |
687 | } | 668 | } |
688 | state->state |= bits_to_set; | 669 | state->state |= bits_to_set; |
689 | |||
690 | return 0; | ||
691 | } | 670 | } |
692 | 671 | ||
693 | static void cache_state(struct extent_state *state, | 672 | static void cache_state(struct extent_state *state, |
@@ -742,7 +721,8 @@ again: | |||
742 | spin_lock(&tree->lock); | 721 | spin_lock(&tree->lock); |
743 | if (cached_state && *cached_state) { | 722 | if (cached_state && *cached_state) { |
744 | state = *cached_state; | 723 | state = *cached_state; |
745 | if (state->start == start && state->tree) { | 724 | if (state->start <= start && state->end > start && |
725 | state->tree) { | ||
746 | node = &state->rb_node; | 726 | node = &state->rb_node; |
747 | goto hit_next; | 727 | goto hit_next; |
748 | } | 728 | } |
@@ -779,17 +759,15 @@ hit_next: | |||
779 | goto out; | 759 | goto out; |
780 | } | 760 | } |
781 | 761 | ||
782 | err = set_state_bits(tree, state, &bits); | 762 | set_state_bits(tree, state, &bits); |
783 | if (err) | ||
784 | goto out; | ||
785 | 763 | ||
786 | next_node = rb_next(node); | ||
787 | cache_state(state, cached_state); | 764 | cache_state(state, cached_state); |
788 | merge_state(tree, state); | 765 | merge_state(tree, state); |
789 | if (last_end == (u64)-1) | 766 | if (last_end == (u64)-1) |
790 | goto out; | 767 | goto out; |
791 | 768 | ||
792 | start = last_end + 1; | 769 | start = last_end + 1; |
770 | next_node = rb_next(&state->rb_node); | ||
793 | if (next_node && start < end && prealloc && !need_resched()) { | 771 | if (next_node && start < end && prealloc && !need_resched()) { |
794 | state = rb_entry(next_node, struct extent_state, | 772 | state = rb_entry(next_node, struct extent_state, |
795 | rb_node); | 773 | rb_node); |
@@ -830,9 +808,7 @@ hit_next: | |||
830 | if (err) | 808 | if (err) |
831 | goto out; | 809 | goto out; |
832 | if (state->end <= end) { | 810 | if (state->end <= end) { |
833 | err = set_state_bits(tree, state, &bits); | 811 | set_state_bits(tree, state, &bits); |
834 | if (err) | ||
835 | goto out; | ||
836 | cache_state(state, cached_state); | 812 | cache_state(state, cached_state); |
837 | merge_state(tree, state); | 813 | merge_state(tree, state); |
838 | if (last_end == (u64)-1) | 814 | if (last_end == (u64)-1) |
@@ -862,7 +838,6 @@ hit_next: | |||
862 | * Avoid to free 'prealloc' if it can be merged with | 838 | * Avoid to free 'prealloc' if it can be merged with |
863 | * the later extent. | 839 | * the later extent. |
864 | */ | 840 | */ |
865 | atomic_inc(&prealloc->refs); | ||
866 | err = insert_state(tree, prealloc, start, this_end, | 841 | err = insert_state(tree, prealloc, start, this_end, |
867 | &bits); | 842 | &bits); |
868 | BUG_ON(err == -EEXIST); | 843 | BUG_ON(err == -EEXIST); |
@@ -872,7 +847,6 @@ hit_next: | |||
872 | goto out; | 847 | goto out; |
873 | } | 848 | } |
874 | cache_state(prealloc, cached_state); | 849 | cache_state(prealloc, cached_state); |
875 | free_extent_state(prealloc); | ||
876 | prealloc = NULL; | 850 | prealloc = NULL; |
877 | start = this_end + 1; | 851 | start = this_end + 1; |
878 | goto search_again; | 852 | goto search_again; |
@@ -895,11 +869,7 @@ hit_next: | |||
895 | err = split_state(tree, state, prealloc, end + 1); | 869 | err = split_state(tree, state, prealloc, end + 1); |
896 | BUG_ON(err == -EEXIST); | 870 | BUG_ON(err == -EEXIST); |
897 | 871 | ||
898 | err = set_state_bits(tree, prealloc, &bits); | 872 | set_state_bits(tree, prealloc, &bits); |
899 | if (err) { | ||
900 | prealloc = NULL; | ||
901 | goto out; | ||
902 | } | ||
903 | cache_state(prealloc, cached_state); | 873 | cache_state(prealloc, cached_state); |
904 | merge_state(tree, prealloc); | 874 | merge_state(tree, prealloc); |
905 | prealloc = NULL; | 875 | prealloc = NULL; |
@@ -1061,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
1061 | return 0; | 1031 | return 0; |
1062 | } | 1032 | } |
1063 | 1033 | ||
1064 | /* | ||
1065 | * find the first offset in the io tree with 'bits' set. zero is | ||
1066 | * returned if we find something, and *start_ret and *end_ret are | ||
1067 | * set to reflect the state struct that was found. | ||
1068 | * | ||
1069 | * If nothing was found, 1 is returned, < 0 on error | ||
1070 | */ | ||
1071 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
1072 | u64 *start_ret, u64 *end_ret, int bits) | ||
1073 | { | ||
1074 | struct rb_node *node; | ||
1075 | struct extent_state *state; | ||
1076 | int ret = 1; | ||
1077 | |||
1078 | spin_lock(&tree->lock); | ||
1079 | /* | ||
1080 | * this search will find all the extents that end after | ||
1081 | * our range starts. | ||
1082 | */ | ||
1083 | node = tree_search(tree, start); | ||
1084 | if (!node) | ||
1085 | goto out; | ||
1086 | |||
1087 | while (1) { | ||
1088 | state = rb_entry(node, struct extent_state, rb_node); | ||
1089 | if (state->end >= start && (state->state & bits)) { | ||
1090 | *start_ret = state->start; | ||
1091 | *end_ret = state->end; | ||
1092 | ret = 0; | ||
1093 | break; | ||
1094 | } | ||
1095 | node = rb_next(node); | ||
1096 | if (!node) | ||
1097 | break; | ||
1098 | } | ||
1099 | out: | ||
1100 | spin_unlock(&tree->lock); | ||
1101 | return ret; | ||
1102 | } | ||
1103 | |||
1104 | /* find the first state struct with 'bits' set after 'start', and | 1034 | /* find the first state struct with 'bits' set after 'start', and |
1105 | * return it. tree->lock must be held. NULL will returned if | 1035 | * return it. tree->lock must be held. NULL will returned if |
1106 | * nothing was found after 'start' | 1036 | * nothing was found after 'start' |
@@ -1133,6 +1063,30 @@ out: | |||
1133 | } | 1063 | } |
1134 | 1064 | ||
1135 | /* | 1065 | /* |
1066 | * find the first offset in the io tree with 'bits' set. zero is | ||
1067 | * returned if we find something, and *start_ret and *end_ret are | ||
1068 | * set to reflect the state struct that was found. | ||
1069 | * | ||
1070 | * If nothing was found, 1 is returned, < 0 on error | ||
1071 | */ | ||
1072 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | ||
1073 | u64 *start_ret, u64 *end_ret, int bits) | ||
1074 | { | ||
1075 | struct extent_state *state; | ||
1076 | int ret = 1; | ||
1077 | |||
1078 | spin_lock(&tree->lock); | ||
1079 | state = find_first_extent_bit_state(tree, start, bits); | ||
1080 | if (state) { | ||
1081 | *start_ret = state->start; | ||
1082 | *end_ret = state->end; | ||
1083 | ret = 0; | ||
1084 | } | ||
1085 | spin_unlock(&tree->lock); | ||
1086 | return ret; | ||
1087 | } | ||
1088 | |||
1089 | /* | ||
1136 | * find a contiguous range of bytes in the file marked as delalloc, not | 1090 | * find a contiguous range of bytes in the file marked as delalloc, not |
1137 | * more than 'max_bytes'. start and end are used to return the range, | 1091 | * more than 'max_bytes'. start and end are used to return the range, |
1138 | * | 1092 | * |
@@ -1564,7 +1518,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1564 | int bitset = 0; | 1518 | int bitset = 0; |
1565 | 1519 | ||
1566 | spin_lock(&tree->lock); | 1520 | spin_lock(&tree->lock); |
1567 | if (cached && cached->tree && cached->start == start) | 1521 | if (cached && cached->tree && cached->start <= start && |
1522 | cached->end > start) | ||
1568 | node = &cached->rb_node; | 1523 | node = &cached->rb_node; |
1569 | else | 1524 | else |
1570 | node = tree_search(tree, start); | 1525 | node = tree_search(tree, start); |
@@ -2432,6 +2387,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2432 | pgoff_t index; | 2387 | pgoff_t index; |
2433 | pgoff_t end; /* Inclusive */ | 2388 | pgoff_t end; /* Inclusive */ |
2434 | int scanned = 0; | 2389 | int scanned = 0; |
2390 | int tag; | ||
2435 | 2391 | ||
2436 | pagevec_init(&pvec, 0); | 2392 | pagevec_init(&pvec, 0); |
2437 | if (wbc->range_cyclic) { | 2393 | if (wbc->range_cyclic) { |
@@ -2442,11 +2398,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2398 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2443 | scanned = 1; | 2399 | scanned = 1; |
2444 | } | 2400 | } |
2401 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2402 | tag = PAGECACHE_TAG_TOWRITE; | ||
2403 | else | ||
2404 | tag = PAGECACHE_TAG_DIRTY; | ||
2445 | retry: | 2405 | retry: |
2406 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2407 | tag_pages_for_writeback(mapping, index, end); | ||
2446 | while (!done && !nr_to_write_done && (index <= end) && | 2408 | while (!done && !nr_to_write_done && (index <= end) && |
2447 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2409 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2448 | PAGECACHE_TAG_DIRTY, min(end - index, | 2410 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
2449 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
2450 | unsigned i; | 2411 | unsigned i; |
2451 | 2412 | ||
2452 | scanned = 1; | 2413 | scanned = 1; |
@@ -2541,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2541 | struct writeback_control *wbc) | 2502 | struct writeback_control *wbc) |
2542 | { | 2503 | { |
2543 | int ret; | 2504 | int ret; |
2544 | struct address_space *mapping = page->mapping; | ||
2545 | struct extent_page_data epd = { | 2505 | struct extent_page_data epd = { |
2546 | .bio = NULL, | 2506 | .bio = NULL, |
2547 | .tree = tree, | 2507 | .tree = tree, |
@@ -2549,18 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2549 | .extent_locked = 0, | 2509 | .extent_locked = 0, |
2550 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2510 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
2551 | }; | 2511 | }; |
2552 | struct writeback_control wbc_writepages = { | ||
2553 | .sync_mode = wbc->sync_mode, | ||
2554 | .older_than_this = NULL, | ||
2555 | .nr_to_write = 64, | ||
2556 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | ||
2557 | .range_end = (loff_t)-1, | ||
2558 | }; | ||
2559 | 2512 | ||
2560 | ret = __extent_writepage(page, wbc, &epd); | 2513 | ret = __extent_writepage(page, wbc, &epd); |
2561 | 2514 | ||
2562 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | ||
2563 | __extent_writepage, &epd, flush_write_bio); | ||
2564 | flush_epd_write_bio(&epd); | 2515 | flush_epd_write_bio(&epd); |
2565 | return ret; | 2516 | return ret; |
2566 | } | 2517 | } |
@@ -2584,7 +2535,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2584 | }; | 2535 | }; |
2585 | struct writeback_control wbc_writepages = { | 2536 | struct writeback_control wbc_writepages = { |
2586 | .sync_mode = mode, | 2537 | .sync_mode = mode, |
2587 | .older_than_this = NULL, | ||
2588 | .nr_to_write = nr_pages * 2, | 2538 | .nr_to_write = nr_pages * 2, |
2589 | .range_start = start, | 2539 | .range_start = start, |
2590 | .range_end = end + 1, | 2540 | .range_end = end + 1, |
@@ -3022,8 +2972,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3022 | return NULL; | 2972 | return NULL; |
3023 | eb->start = start; | 2973 | eb->start = start; |
3024 | eb->len = len; | 2974 | eb->len = len; |
3025 | spin_lock_init(&eb->lock); | 2975 | rwlock_init(&eb->lock); |
3026 | init_waitqueue_head(&eb->lock_wq); | 2976 | atomic_set(&eb->write_locks, 0); |
2977 | atomic_set(&eb->read_locks, 0); | ||
2978 | atomic_set(&eb->blocking_readers, 0); | ||
2979 | atomic_set(&eb->blocking_writers, 0); | ||
2980 | atomic_set(&eb->spinning_readers, 0); | ||
2981 | atomic_set(&eb->spinning_writers, 0); | ||
2982 | init_waitqueue_head(&eb->write_lock_wq); | ||
2983 | init_waitqueue_head(&eb->read_lock_wq); | ||
3027 | 2984 | ||
3028 | #if LEAK_DEBUG | 2985 | #if LEAK_DEBUG |
3029 | spin_lock_irqsave(&leak_lock, flags); | 2986 | spin_lock_irqsave(&leak_lock, flags); |
@@ -3119,7 +3076,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3119 | i = 0; | 3076 | i = 0; |
3120 | } | 3077 | } |
3121 | for (; i < num_pages; i++, index++) { | 3078 | for (; i < num_pages; i++, index++) { |
3122 | p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); | 3079 | p = find_or_create_page(mapping, index, GFP_NOFS); |
3123 | if (!p) { | 3080 | if (!p) { |
3124 | WARN_ON(1); | 3081 | WARN_ON(1); |
3125 | goto free_eb; | 3082 | goto free_eb; |
@@ -3266,6 +3223,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3266 | return was_dirty; | 3223 | return was_dirty; |
3267 | } | 3224 | } |
3268 | 3225 | ||
3226 | static int __eb_straddles_pages(u64 start, u64 len) | ||
3227 | { | ||
3228 | if (len < PAGE_CACHE_SIZE) | ||
3229 | return 1; | ||
3230 | if (start & (PAGE_CACHE_SIZE - 1)) | ||
3231 | return 1; | ||
3232 | if ((start + len) & (PAGE_CACHE_SIZE - 1)) | ||
3233 | return 1; | ||
3234 | return 0; | ||
3235 | } | ||
3236 | |||
3237 | static int eb_straddles_pages(struct extent_buffer *eb) | ||
3238 | { | ||
3239 | return __eb_straddles_pages(eb->start, eb->len); | ||
3240 | } | ||
3241 | |||
3269 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3242 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
3270 | struct extent_buffer *eb, | 3243 | struct extent_buffer *eb, |
3271 | struct extent_state **cached_state) | 3244 | struct extent_state **cached_state) |
@@ -3277,8 +3250,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3277 | num_pages = num_extent_pages(eb->start, eb->len); | 3250 | num_pages = num_extent_pages(eb->start, eb->len); |
3278 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3251 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3279 | 3252 | ||
3280 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3253 | if (eb_straddles_pages(eb)) { |
3281 | cached_state, GFP_NOFS); | 3254 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3255 | cached_state, GFP_NOFS); | ||
3256 | } | ||
3282 | for (i = 0; i < num_pages; i++) { | 3257 | for (i = 0; i < num_pages; i++) { |
3283 | page = extent_buffer_page(eb, i); | 3258 | page = extent_buffer_page(eb, i); |
3284 | if (page) | 3259 | if (page) |
@@ -3296,8 +3271,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3296 | 3271 | ||
3297 | num_pages = num_extent_pages(eb->start, eb->len); | 3272 | num_pages = num_extent_pages(eb->start, eb->len); |
3298 | 3273 | ||
3299 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3274 | if (eb_straddles_pages(eb)) { |
3300 | NULL, GFP_NOFS); | 3275 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3276 | NULL, GFP_NOFS); | ||
3277 | } | ||
3301 | for (i = 0; i < num_pages; i++) { | 3278 | for (i = 0; i < num_pages; i++) { |
3302 | page = extent_buffer_page(eb, i); | 3279 | page = extent_buffer_page(eb, i); |
3303 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3280 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3320,9 +3297,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3320 | int uptodate; | 3297 | int uptodate; |
3321 | unsigned long index; | 3298 | unsigned long index; |
3322 | 3299 | ||
3323 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); | 3300 | if (__eb_straddles_pages(start, end - start + 1)) { |
3324 | if (ret) | 3301 | ret = test_range_bit(tree, start, end, |
3325 | return 1; | 3302 | EXTENT_UPTODATE, 1, NULL); |
3303 | if (ret) | ||
3304 | return 1; | ||
3305 | } | ||
3326 | while (start <= end) { | 3306 | while (start <= end) { |
3327 | index = start >> PAGE_CACHE_SHIFT; | 3307 | index = start >> PAGE_CACHE_SHIFT; |
3328 | page = find_get_page(tree->mapping, index); | 3308 | page = find_get_page(tree->mapping, index); |
@@ -3350,10 +3330,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3350 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3330 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3351 | return 1; | 3331 | return 1; |
3352 | 3332 | ||
3353 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3333 | if (eb_straddles_pages(eb)) { |
3354 | EXTENT_UPTODATE, 1, cached_state); | 3334 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3355 | if (ret) | 3335 | EXTENT_UPTODATE, 1, cached_state); |
3356 | return ret; | 3336 | if (ret) |
3337 | return ret; | ||
3338 | } | ||
3357 | 3339 | ||
3358 | num_pages = num_extent_pages(eb->start, eb->len); | 3340 | num_pages = num_extent_pages(eb->start, eb->len); |
3359 | for (i = 0; i < num_pages; i++) { | 3341 | for (i = 0; i < num_pages; i++) { |
@@ -3386,9 +3368,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3386 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3368 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3387 | return 0; | 3369 | return 0; |
3388 | 3370 | ||
3389 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3371 | if (eb_straddles_pages(eb)) { |
3390 | EXTENT_UPTODATE, 1, NULL)) { | 3372 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3391 | return 0; | 3373 | EXTENT_UPTODATE, 1, NULL)) { |
3374 | return 0; | ||
3375 | } | ||
3392 | } | 3376 | } |
3393 | 3377 | ||
3394 | if (start) { | 3378 | if (start) { |
@@ -3492,9 +3476,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3492 | page = extent_buffer_page(eb, i); | 3476 | page = extent_buffer_page(eb, i); |
3493 | 3477 | ||
3494 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3478 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3495 | kaddr = kmap_atomic(page, KM_USER1); | 3479 | kaddr = page_address(page); |
3496 | memcpy(dst, kaddr + offset, cur); | 3480 | memcpy(dst, kaddr + offset, cur); |
3497 | kunmap_atomic(kaddr, KM_USER1); | ||
3498 | 3481 | ||
3499 | dst += cur; | 3482 | dst += cur; |
3500 | len -= cur; | 3483 | len -= cur; |
@@ -3504,9 +3487,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3504 | } | 3487 | } |
3505 | 3488 | ||
3506 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 3489 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
3507 | unsigned long min_len, char **token, char **map, | 3490 | unsigned long min_len, char **map, |
3508 | unsigned long *map_start, | 3491 | unsigned long *map_start, |
3509 | unsigned long *map_len, int km) | 3492 | unsigned long *map_len) |
3510 | { | 3493 | { |
3511 | size_t offset = start & (PAGE_CACHE_SIZE - 1); | 3494 | size_t offset = start & (PAGE_CACHE_SIZE - 1); |
3512 | char *kaddr; | 3495 | char *kaddr; |
@@ -3536,42 +3519,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3536 | } | 3519 | } |
3537 | 3520 | ||
3538 | p = extent_buffer_page(eb, i); | 3521 | p = extent_buffer_page(eb, i); |
3539 | kaddr = kmap_atomic(p, km); | 3522 | kaddr = page_address(p); |
3540 | *token = kaddr; | ||
3541 | *map = kaddr + offset; | 3523 | *map = kaddr + offset; |
3542 | *map_len = PAGE_CACHE_SIZE - offset; | 3524 | *map_len = PAGE_CACHE_SIZE - offset; |
3543 | return 0; | 3525 | return 0; |
3544 | } | 3526 | } |
3545 | 3527 | ||
3546 | int map_extent_buffer(struct extent_buffer *eb, unsigned long start, | ||
3547 | unsigned long min_len, | ||
3548 | char **token, char **map, | ||
3549 | unsigned long *map_start, | ||
3550 | unsigned long *map_len, int km) | ||
3551 | { | ||
3552 | int err; | ||
3553 | int save = 0; | ||
3554 | if (eb->map_token) { | ||
3555 | unmap_extent_buffer(eb, eb->map_token, km); | ||
3556 | eb->map_token = NULL; | ||
3557 | save = 1; | ||
3558 | } | ||
3559 | err = map_private_extent_buffer(eb, start, min_len, token, map, | ||
3560 | map_start, map_len, km); | ||
3561 | if (!err && save) { | ||
3562 | eb->map_token = *token; | ||
3563 | eb->kaddr = *map; | ||
3564 | eb->map_start = *map_start; | ||
3565 | eb->map_len = *map_len; | ||
3566 | } | ||
3567 | return err; | ||
3568 | } | ||
3569 | |||
3570 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) | ||
3571 | { | ||
3572 | kunmap_atomic(token, km); | ||
3573 | } | ||
3574 | |||
3575 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | 3528 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, |
3576 | unsigned long start, | 3529 | unsigned long start, |
3577 | unsigned long len) | 3530 | unsigned long len) |
@@ -3595,9 +3548,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
3595 | 3548 | ||
3596 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3549 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3597 | 3550 | ||
3598 | kaddr = kmap_atomic(page, KM_USER0); | 3551 | kaddr = page_address(page); |
3599 | ret = memcmp(ptr, kaddr + offset, cur); | 3552 | ret = memcmp(ptr, kaddr + offset, cur); |
3600 | kunmap_atomic(kaddr, KM_USER0); | ||
3601 | if (ret) | 3553 | if (ret) |
3602 | break; | 3554 | break; |
3603 | 3555 | ||
@@ -3630,9 +3582,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | |||
3630 | WARN_ON(!PageUptodate(page)); | 3582 | WARN_ON(!PageUptodate(page)); |
3631 | 3583 | ||
3632 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3584 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3633 | kaddr = kmap_atomic(page, KM_USER1); | 3585 | kaddr = page_address(page); |
3634 | memcpy(kaddr + offset, src, cur); | 3586 | memcpy(kaddr + offset, src, cur); |
3635 | kunmap_atomic(kaddr, KM_USER1); | ||
3636 | 3587 | ||
3637 | src += cur; | 3588 | src += cur; |
3638 | len -= cur; | 3589 | len -= cur; |
@@ -3661,9 +3612,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, | |||
3661 | WARN_ON(!PageUptodate(page)); | 3612 | WARN_ON(!PageUptodate(page)); |
3662 | 3613 | ||
3663 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3614 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3664 | kaddr = kmap_atomic(page, KM_USER0); | 3615 | kaddr = page_address(page); |
3665 | memset(kaddr + offset, c, cur); | 3616 | memset(kaddr + offset, c, cur); |
3666 | kunmap_atomic(kaddr, KM_USER0); | ||
3667 | 3617 | ||
3668 | len -= cur; | 3618 | len -= cur; |
3669 | offset = 0; | 3619 | offset = 0; |
@@ -3694,9 +3644,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | |||
3694 | 3644 | ||
3695 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | 3645 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); |
3696 | 3646 | ||
3697 | kaddr = kmap_atomic(page, KM_USER0); | 3647 | kaddr = page_address(page); |
3698 | read_extent_buffer(src, kaddr + offset, src_offset, cur); | 3648 | read_extent_buffer(src, kaddr + offset, src_offset, cur); |
3699 | kunmap_atomic(kaddr, KM_USER0); | ||
3700 | 3649 | ||
3701 | src_offset += cur; | 3650 | src_offset += cur; |
3702 | len -= cur; | 3651 | len -= cur; |
@@ -3709,20 +3658,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3709 | unsigned long dst_off, unsigned long src_off, | 3658 | unsigned long dst_off, unsigned long src_off, |
3710 | unsigned long len) | 3659 | unsigned long len) |
3711 | { | 3660 | { |
3712 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3661 | char *dst_kaddr = page_address(dst_page); |
3713 | if (dst_page == src_page) { | 3662 | if (dst_page == src_page) { |
3714 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); | 3663 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); |
3715 | } else { | 3664 | } else { |
3716 | char *src_kaddr = kmap_atomic(src_page, KM_USER1); | 3665 | char *src_kaddr = page_address(src_page); |
3717 | char *p = dst_kaddr + dst_off + len; | 3666 | char *p = dst_kaddr + dst_off + len; |
3718 | char *s = src_kaddr + src_off + len; | 3667 | char *s = src_kaddr + src_off + len; |
3719 | 3668 | ||
3720 | while (len--) | 3669 | while (len--) |
3721 | *--p = *--s; | 3670 | *--p = *--s; |
3722 | |||
3723 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3724 | } | 3671 | } |
3725 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3726 | } | 3672 | } |
3727 | 3673 | ||
3728 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | 3674 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) |
@@ -3735,20 +3681,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3735 | unsigned long dst_off, unsigned long src_off, | 3681 | unsigned long dst_off, unsigned long src_off, |
3736 | unsigned long len) | 3682 | unsigned long len) |
3737 | { | 3683 | { |
3738 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3684 | char *dst_kaddr = page_address(dst_page); |
3739 | char *src_kaddr; | 3685 | char *src_kaddr; |
3740 | 3686 | ||
3741 | if (dst_page != src_page) { | 3687 | if (dst_page != src_page) { |
3742 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3688 | src_kaddr = page_address(src_page); |
3743 | } else { | 3689 | } else { |
3744 | src_kaddr = dst_kaddr; | 3690 | src_kaddr = dst_kaddr; |
3745 | BUG_ON(areas_overlap(src_off, dst_off, len)); | 3691 | BUG_ON(areas_overlap(src_off, dst_off, len)); |
3746 | } | 3692 | } |
3747 | 3693 | ||
3748 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3694 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3749 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3750 | if (dst_page != src_page) | ||
3751 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3752 | } | 3695 | } |
3753 | 3696 | ||
3754 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | 3697 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a11a92ee2d30..7b2f0c3e7929 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -76,15 +76,15 @@ struct extent_io_ops { | |||
76 | struct extent_state *state); | 76 | struct extent_state *state); |
77 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 77 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
78 | struct extent_state *state, int uptodate); | 78 | struct extent_state *state, int uptodate); |
79 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, | 79 | void (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
80 | int *bits); | 80 | int *bits); |
81 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 81 | void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
82 | int *bits); | 82 | int *bits); |
83 | int (*merge_extent_hook)(struct inode *inode, | 83 | void (*merge_extent_hook)(struct inode *inode, |
84 | struct extent_state *new, | 84 | struct extent_state *new, |
85 | struct extent_state *other); | 85 | struct extent_state *other); |
86 | int (*split_extent_hook)(struct inode *inode, | 86 | void (*split_extent_hook)(struct inode *inode, |
87 | struct extent_state *orig, u64 split); | 87 | struct extent_state *orig, u64 split); |
88 | int (*write_cache_pages_lock_hook)(struct page *page); | 88 | int (*write_cache_pages_lock_hook)(struct page *page); |
89 | }; | 89 | }; |
90 | 90 | ||
@@ -108,8 +108,6 @@ struct extent_state { | |||
108 | wait_queue_head_t wq; | 108 | wait_queue_head_t wq; |
109 | atomic_t refs; | 109 | atomic_t refs; |
110 | unsigned long state; | 110 | unsigned long state; |
111 | u64 split_start; | ||
112 | u64 split_end; | ||
113 | 111 | ||
114 | /* for use by the FS */ | 112 | /* for use by the FS */ |
115 | u64 private; | 113 | u64 private; |
@@ -120,8 +118,6 @@ struct extent_state { | |||
120 | struct extent_buffer { | 118 | struct extent_buffer { |
121 | u64 start; | 119 | u64 start; |
122 | unsigned long len; | 120 | unsigned long len; |
123 | char *map_token; | ||
124 | char *kaddr; | ||
125 | unsigned long map_start; | 121 | unsigned long map_start; |
126 | unsigned long map_len; | 122 | unsigned long map_len; |
127 | struct page *first_page; | 123 | struct page *first_page; |
@@ -130,14 +126,26 @@ struct extent_buffer { | |||
130 | struct rcu_head rcu_head; | 126 | struct rcu_head rcu_head; |
131 | atomic_t refs; | 127 | atomic_t refs; |
132 | 128 | ||
133 | /* the spinlock is used to protect most operations */ | 129 | /* count of read lock holders on the extent buffer */ |
134 | spinlock_t lock; | 130 | atomic_t write_locks; |
131 | atomic_t read_locks; | ||
132 | atomic_t blocking_writers; | ||
133 | atomic_t blocking_readers; | ||
134 | atomic_t spinning_readers; | ||
135 | atomic_t spinning_writers; | ||
136 | |||
137 | /* protects write locks */ | ||
138 | rwlock_t lock; | ||
135 | 139 | ||
136 | /* | 140 | /* readers use lock_wq while they wait for the write |
137 | * when we keep the lock held while blocking, waiters go onto | 141 | * lock holders to unlock |
138 | * the wq | ||
139 | */ | 142 | */ |
140 | wait_queue_head_t lock_wq; | 143 | wait_queue_head_t write_lock_wq; |
144 | |||
145 | /* writers use read_lock_wq while they wait for readers | ||
146 | * to unlock | ||
147 | */ | ||
148 | wait_queue_head_t read_lock_wq; | ||
141 | }; | 149 | }; |
142 | 150 | ||
143 | static inline void extent_set_compress_type(unsigned long *bio_flags, | 151 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
@@ -279,15 +287,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
279 | int extent_buffer_uptodate(struct extent_io_tree *tree, | 287 | int extent_buffer_uptodate(struct extent_io_tree *tree, |
280 | struct extent_buffer *eb, | 288 | struct extent_buffer *eb, |
281 | struct extent_state *cached_state); | 289 | struct extent_state *cached_state); |
282 | int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, | ||
283 | unsigned long min_len, char **token, char **map, | ||
284 | unsigned long *map_start, | ||
285 | unsigned long *map_len, int km); | ||
286 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | 290 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, |
287 | unsigned long min_len, char **token, char **map, | 291 | unsigned long min_len, char **map, |
288 | unsigned long *map_start, | 292 | unsigned long *map_start, |
289 | unsigned long *map_len, int km); | 293 | unsigned long *map_len); |
290 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | ||
291 | int extent_range_uptodate(struct extent_io_tree *tree, | 294 | int extent_range_uptodate(struct extent_io_tree *tree, |
292 | u64 start, u64 end); | 295 | u64 start, u64 end); |
293 | int extent_clear_unlock_delalloc(struct inode *inode, | 296 | int extent_clear_unlock_delalloc(struct inode *inode, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2d0410344ea3..7c97b3301459 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
183 | return 0; | 183 | return 0; |
184 | } | 184 | } |
185 | 185 | ||
186 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | 186 | static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) |
187 | { | 187 | { |
188 | int ret = 0; | ||
189 | struct extent_map *merge = NULL; | 188 | struct extent_map *merge = NULL; |
190 | struct rb_node *rb; | 189 | struct rb_node *rb; |
191 | struct extent_map *em; | ||
192 | |||
193 | write_lock(&tree->lock); | ||
194 | em = lookup_extent_mapping(tree, start, len); | ||
195 | |||
196 | WARN_ON(!em || em->start != start); | ||
197 | |||
198 | if (!em) | ||
199 | goto out; | ||
200 | |||
201 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
202 | 190 | ||
203 | if (em->start != 0) { | 191 | if (em->start != 0) { |
204 | rb = rb_prev(&em->rb_node); | 192 | rb = rb_prev(&em->rb_node); |
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | |||
225 | merge->in_tree = 0; | 213 | merge->in_tree = 0; |
226 | free_extent_map(merge); | 214 | free_extent_map(merge); |
227 | } | 215 | } |
216 | } | ||
217 | |||
218 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
219 | { | ||
220 | int ret = 0; | ||
221 | struct extent_map *em; | ||
222 | |||
223 | write_lock(&tree->lock); | ||
224 | em = lookup_extent_mapping(tree, start, len); | ||
225 | |||
226 | WARN_ON(!em || em->start != start); | ||
227 | |||
228 | if (!em) | ||
229 | goto out; | ||
230 | |||
231 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
232 | |||
233 | try_merge_map(tree, em); | ||
228 | 234 | ||
229 | free_extent_map(em); | 235 | free_extent_map(em); |
230 | out: | 236 | out: |
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
247 | struct extent_map *em) | 253 | struct extent_map *em) |
248 | { | 254 | { |
249 | int ret = 0; | 255 | int ret = 0; |
250 | struct extent_map *merge = NULL; | ||
251 | struct rb_node *rb; | 256 | struct rb_node *rb; |
252 | struct extent_map *exist; | 257 | struct extent_map *exist; |
253 | 258 | ||
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
263 | goto out; | 268 | goto out; |
264 | } | 269 | } |
265 | atomic_inc(&em->refs); | 270 | atomic_inc(&em->refs); |
266 | if (em->start != 0) { | 271 | |
267 | rb = rb_prev(&em->rb_node); | 272 | try_merge_map(tree, em); |
268 | if (rb) | ||
269 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
270 | if (rb && mergable_maps(merge, em)) { | ||
271 | em->start = merge->start; | ||
272 | em->len += merge->len; | ||
273 | em->block_len += merge->block_len; | ||
274 | em->block_start = merge->block_start; | ||
275 | merge->in_tree = 0; | ||
276 | rb_erase(&merge->rb_node, &tree->map); | ||
277 | free_extent_map(merge); | ||
278 | } | ||
279 | } | ||
280 | rb = rb_next(&em->rb_node); | ||
281 | if (rb) | ||
282 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
283 | if (rb && mergable_maps(em, merge)) { | ||
284 | em->len += merge->len; | ||
285 | em->block_len += merge->len; | ||
286 | rb_erase(&merge->rb_node, &tree->map); | ||
287 | merge->in_tree = 0; | ||
288 | free_extent_map(merge); | ||
289 | } | ||
290 | out: | 273 | out: |
291 | return ret; | 274 | return ret; |
292 | } | 275 | } |
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len) | |||
299 | return start + len; | 282 | return start + len; |
300 | } | 283 | } |
301 | 284 | ||
302 | /** | 285 | struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, |
303 | * lookup_extent_mapping - lookup extent_map | 286 | u64 start, u64 len, int strict) |
304 | * @tree: tree to lookup in | ||
305 | * @start: byte offset to start the search | ||
306 | * @len: length of the lookup range | ||
307 | * | ||
308 | * Find and return the first extent_map struct in @tree that intersects the | ||
309 | * [start, len] range. There may be additional objects in the tree that | ||
310 | * intersect, so check the object returned carefully to make sure that no | ||
311 | * additional lookups are needed. | ||
312 | */ | ||
313 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
314 | u64 start, u64 len) | ||
315 | { | 287 | { |
316 | struct extent_map *em; | 288 | struct extent_map *em; |
317 | struct rb_node *rb_node; | 289 | struct rb_node *rb_node; |
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
320 | u64 end = range_end(start, len); | 292 | u64 end = range_end(start, len); |
321 | 293 | ||
322 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 294 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
323 | if (!rb_node && prev) { | ||
324 | em = rb_entry(prev, struct extent_map, rb_node); | ||
325 | if (end > em->start && start < extent_map_end(em)) | ||
326 | goto found; | ||
327 | } | ||
328 | if (!rb_node && next) { | ||
329 | em = rb_entry(next, struct extent_map, rb_node); | ||
330 | if (end > em->start && start < extent_map_end(em)) | ||
331 | goto found; | ||
332 | } | ||
333 | if (!rb_node) { | 295 | if (!rb_node) { |
334 | em = NULL; | 296 | if (prev) |
335 | goto out; | 297 | rb_node = prev; |
336 | } | 298 | else if (next) |
337 | if (IS_ERR(rb_node)) { | 299 | rb_node = next; |
338 | em = ERR_CAST(rb_node); | 300 | else |
339 | goto out; | 301 | return NULL; |
340 | } | 302 | } |
303 | |||
341 | em = rb_entry(rb_node, struct extent_map, rb_node); | 304 | em = rb_entry(rb_node, struct extent_map, rb_node); |
342 | if (end > em->start && start < extent_map_end(em)) | ||
343 | goto found; | ||
344 | 305 | ||
345 | em = NULL; | 306 | if (strict && !(end > em->start && start < extent_map_end(em))) |
346 | goto out; | 307 | return NULL; |
347 | 308 | ||
348 | found: | ||
349 | atomic_inc(&em->refs); | 309 | atomic_inc(&em->refs); |
350 | out: | ||
351 | return em; | 310 | return em; |
352 | } | 311 | } |
353 | 312 | ||
354 | /** | 313 | /** |
314 | * lookup_extent_mapping - lookup extent_map | ||
315 | * @tree: tree to lookup in | ||
316 | * @start: byte offset to start the search | ||
317 | * @len: length of the lookup range | ||
318 | * | ||
319 | * Find and return the first extent_map struct in @tree that intersects the | ||
320 | * [start, len] range. There may be additional objects in the tree that | ||
321 | * intersect, so check the object returned carefully to make sure that no | ||
322 | * additional lookups are needed. | ||
323 | */ | ||
324 | struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | ||
325 | u64 start, u64 len) | ||
326 | { | ||
327 | return __lookup_extent_mapping(tree, start, len, 1); | ||
328 | } | ||
329 | |||
330 | /** | ||
355 | * search_extent_mapping - find a nearby extent map | 331 | * search_extent_mapping - find a nearby extent map |
356 | * @tree: tree to lookup in | 332 | * @tree: tree to lookup in |
357 | * @start: byte offset to start the search | 333 | * @start: byte offset to start the search |
@@ -365,38 +341,7 @@ out: | |||
365 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | 341 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, |
366 | u64 start, u64 len) | 342 | u64 start, u64 len) |
367 | { | 343 | { |
368 | struct extent_map *em; | 344 | return __lookup_extent_mapping(tree, start, len, 0); |
369 | struct rb_node *rb_node; | ||
370 | struct rb_node *prev = NULL; | ||
371 | struct rb_node *next = NULL; | ||
372 | |||
373 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
374 | if (!rb_node && prev) { | ||
375 | em = rb_entry(prev, struct extent_map, rb_node); | ||
376 | goto found; | ||
377 | } | ||
378 | if (!rb_node && next) { | ||
379 | em = rb_entry(next, struct extent_map, rb_node); | ||
380 | goto found; | ||
381 | } | ||
382 | if (!rb_node) { | ||
383 | em = NULL; | ||
384 | goto out; | ||
385 | } | ||
386 | if (IS_ERR(rb_node)) { | ||
387 | em = ERR_CAST(rb_node); | ||
388 | goto out; | ||
389 | } | ||
390 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
391 | goto found; | ||
392 | |||
393 | em = NULL; | ||
394 | goto out; | ||
395 | |||
396 | found: | ||
397 | atomic_inc(&em->refs); | ||
398 | out: | ||
399 | return em; | ||
400 | } | 345 | } |
401 | 346 | ||
402 | /** | 347 | /** |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90d4ee52cd45..a1cb7821becd 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -177,6 +177,17 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
177 | 177 | ||
178 | WARN_ON(bio->bi_vcnt <= 0); | 178 | WARN_ON(bio->bi_vcnt <= 0); |
179 | 179 | ||
180 | /* | ||
181 | * the free space stuff is only read when it hasn't been | ||
182 | * updated in the current transaction. So, we can safely | ||
183 | * read from the commit root and sidestep a nasty deadlock | ||
184 | * between reading the free space cache and updating the csum tree. | ||
185 | */ | ||
186 | if (btrfs_is_free_space_inode(root, inode)) { | ||
187 | path->search_commit_root = 1; | ||
188 | path->skip_locking = 1; | ||
189 | } | ||
190 | |||
180 | disk_bytenr = (u64)bio->bi_sector << 9; | 191 | disk_bytenr = (u64)bio->bi_sector << 9; |
181 | if (dio) | 192 | if (dio) |
182 | offset = logical_offset; | 193 | offset = logical_offset; |
@@ -282,7 +293,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
282 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | 293 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); |
283 | 294 | ||
284 | path = btrfs_alloc_path(); | 295 | path = btrfs_alloc_path(); |
285 | BUG_ON(!path); | 296 | if (!path) |
297 | return -ENOMEM; | ||
286 | 298 | ||
287 | if (search_commit) { | 299 | if (search_commit) { |
288 | path->skip_locking = 1; | 300 | path->skip_locking = 1; |
@@ -664,15 +676,13 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
664 | struct btrfs_sector_sum *sector_sum; | 676 | struct btrfs_sector_sum *sector_sum; |
665 | u32 nritems; | 677 | u32 nritems; |
666 | u32 ins_size; | 678 | u32 ins_size; |
667 | char *eb_map; | ||
668 | char *eb_token; | ||
669 | unsigned long map_len; | ||
670 | unsigned long map_start; | ||
671 | u16 csum_size = | 679 | u16 csum_size = |
672 | btrfs_super_csum_size(&root->fs_info->super_copy); | 680 | btrfs_super_csum_size(&root->fs_info->super_copy); |
673 | 681 | ||
674 | path = btrfs_alloc_path(); | 682 | path = btrfs_alloc_path(); |
675 | BUG_ON(!path); | 683 | if (!path) |
684 | return -ENOMEM; | ||
685 | |||
676 | sector_sum = sums->sums; | 686 | sector_sum = sums->sums; |
677 | again: | 687 | again: |
678 | next_offset = (u64)-1; | 688 | next_offset = (u64)-1; |
@@ -814,30 +824,9 @@ found: | |||
814 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 824 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
815 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 825 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
816 | btrfs_item_size_nr(leaf, path->slots[0])); | 826 | btrfs_item_size_nr(leaf, path->slots[0])); |
817 | eb_token = NULL; | ||
818 | next_sector: | 827 | next_sector: |
819 | 828 | ||
820 | if (!eb_token || | 829 | write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); |
821 | (unsigned long)item + csum_size >= map_start + map_len) { | ||
822 | int err; | ||
823 | |||
824 | if (eb_token) | ||
825 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
826 | eb_token = NULL; | ||
827 | err = map_private_extent_buffer(leaf, (unsigned long)item, | ||
828 | csum_size, | ||
829 | &eb_token, &eb_map, | ||
830 | &map_start, &map_len, KM_USER1); | ||
831 | if (err) | ||
832 | eb_token = NULL; | ||
833 | } | ||
834 | if (eb_token) { | ||
835 | memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), | ||
836 | §or_sum->sum, csum_size); | ||
837 | } else { | ||
838 | write_extent_buffer(leaf, §or_sum->sum, | ||
839 | (unsigned long)item, csum_size); | ||
840 | } | ||
841 | 830 | ||
842 | total_bytes += root->sectorsize; | 831 | total_bytes += root->sectorsize; |
843 | sector_sum++; | 832 | sector_sum++; |
@@ -850,10 +839,7 @@ next_sector: | |||
850 | goto next_sector; | 839 | goto next_sector; |
851 | } | 840 | } |
852 | } | 841 | } |
853 | if (eb_token) { | 842 | |
854 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
855 | eb_token = NULL; | ||
856 | } | ||
857 | btrfs_mark_buffer_dirty(path->nodes[0]); | 843 | btrfs_mark_buffer_dirty(path->nodes[0]); |
858 | if (total_bytes < sums->len) { | 844 | if (total_bytes < sums->len) { |
859 | btrfs_release_path(path); | 845 | btrfs_release_path(path); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 59cbdb120ad0..3c3abff731a7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -74,7 +74,7 @@ struct inode_defrag { | |||
74 | * If an existing record is found the defrag item you | 74 | * If an existing record is found the defrag item you |
75 | * pass in is freed | 75 | * pass in is freed |
76 | */ | 76 | */ |
77 | static int __btrfs_add_inode_defrag(struct inode *inode, | 77 | static void __btrfs_add_inode_defrag(struct inode *inode, |
78 | struct inode_defrag *defrag) | 78 | struct inode_defrag *defrag) |
79 | { | 79 | { |
80 | struct btrfs_root *root = BTRFS_I(inode)->root; | 80 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode, | |||
106 | BTRFS_I(inode)->in_defrag = 1; | 106 | BTRFS_I(inode)->in_defrag = 1; |
107 | rb_link_node(&defrag->rb_node, parent, p); | 107 | rb_link_node(&defrag->rb_node, parent, p); |
108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
109 | return 0; | 109 | return; |
110 | 110 | ||
111 | exists: | 111 | exists: |
112 | kfree(defrag); | 112 | kfree(defrag); |
113 | return 0; | 113 | return; |
114 | 114 | ||
115 | } | 115 | } |
116 | 116 | ||
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
123 | { | 123 | { |
124 | struct btrfs_root *root = BTRFS_I(inode)->root; | 124 | struct btrfs_root *root = BTRFS_I(inode)->root; |
125 | struct inode_defrag *defrag; | 125 | struct inode_defrag *defrag; |
126 | int ret = 0; | ||
127 | u64 transid; | 126 | u64 transid; |
128 | 127 | ||
129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 128 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
@@ -150,9 +149,11 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
150 | 149 | ||
151 | spin_lock(&root->fs_info->defrag_inodes_lock); | 150 | spin_lock(&root->fs_info->defrag_inodes_lock); |
152 | if (!BTRFS_I(inode)->in_defrag) | 151 | if (!BTRFS_I(inode)->in_defrag) |
153 | ret = __btrfs_add_inode_defrag(inode, defrag); | 152 | __btrfs_add_inode_defrag(inode, defrag); |
153 | else | ||
154 | kfree(defrag); | ||
154 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 155 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
155 | return ret; | 156 | return 0; |
156 | } | 157 | } |
157 | 158 | ||
158 | /* | 159 | /* |
@@ -855,7 +856,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
855 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 856 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
856 | 857 | ||
857 | path = btrfs_alloc_path(); | 858 | path = btrfs_alloc_path(); |
858 | BUG_ON(!path); | 859 | if (!path) |
860 | return -ENOMEM; | ||
859 | again: | 861 | again: |
860 | recow = 0; | 862 | recow = 0; |
861 | split = start; | 863 | split = start; |
@@ -1059,7 +1061,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) | |||
1059 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | 1061 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, |
1060 | struct page **pages, size_t num_pages, | 1062 | struct page **pages, size_t num_pages, |
1061 | loff_t pos, unsigned long first_index, | 1063 | loff_t pos, unsigned long first_index, |
1062 | unsigned long last_index, size_t write_bytes) | 1064 | size_t write_bytes) |
1063 | { | 1065 | { |
1064 | struct extent_state *cached_state = NULL; | 1066 | struct extent_state *cached_state = NULL; |
1065 | int i; | 1067 | int i; |
@@ -1073,15 +1075,10 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1073 | start_pos = pos & ~((u64)root->sectorsize - 1); | 1075 | start_pos = pos & ~((u64)root->sectorsize - 1); |
1074 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 1076 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
1075 | 1077 | ||
1076 | if (start_pos > inode->i_size) { | ||
1077 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); | ||
1078 | if (err) | ||
1079 | return err; | ||
1080 | } | ||
1081 | |||
1082 | again: | 1078 | again: |
1083 | for (i = 0; i < num_pages; i++) { | 1079 | for (i = 0; i < num_pages; i++) { |
1084 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 1080 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1081 | GFP_NOFS); | ||
1085 | if (!pages[i]) { | 1082 | if (!pages[i]) { |
1086 | faili = i - 1; | 1083 | faili = i - 1; |
1087 | err = -ENOMEM; | 1084 | err = -ENOMEM; |
@@ -1158,7 +1155,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1158 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1155 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1159 | struct page **pages = NULL; | 1156 | struct page **pages = NULL; |
1160 | unsigned long first_index; | 1157 | unsigned long first_index; |
1161 | unsigned long last_index; | ||
1162 | size_t num_written = 0; | 1158 | size_t num_written = 0; |
1163 | int nrptrs; | 1159 | int nrptrs; |
1164 | int ret = 0; | 1160 | int ret = 0; |
@@ -1171,7 +1167,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1171 | return -ENOMEM; | 1167 | return -ENOMEM; |
1172 | 1168 | ||
1173 | first_index = pos >> PAGE_CACHE_SHIFT; | 1169 | first_index = pos >> PAGE_CACHE_SHIFT; |
1174 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; | ||
1175 | 1170 | ||
1176 | while (iov_iter_count(i) > 0) { | 1171 | while (iov_iter_count(i) > 0) { |
1177 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 1172 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1205,8 +1200,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1205 | * contents of pages from loop to loop | 1200 | * contents of pages from loop to loop |
1206 | */ | 1201 | */ |
1207 | ret = prepare_pages(root, file, pages, num_pages, | 1202 | ret = prepare_pages(root, file, pages, num_pages, |
1208 | pos, first_index, last_index, | 1203 | pos, first_index, write_bytes); |
1209 | write_bytes); | ||
1210 | if (ret) { | 1204 | if (ret) { |
1211 | btrfs_delalloc_release_space(inode, | 1205 | btrfs_delalloc_release_space(inode, |
1212 | num_pages << PAGE_CACHE_SHIFT); | 1206 | num_pages << PAGE_CACHE_SHIFT); |
@@ -1238,9 +1232,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1238 | * managed to copy. | 1232 | * managed to copy. |
1239 | */ | 1233 | */ |
1240 | if (num_pages > dirty_pages) { | 1234 | if (num_pages > dirty_pages) { |
1241 | if (copied > 0) | 1235 | if (copied > 0) { |
1242 | atomic_inc( | 1236 | spin_lock(&BTRFS_I(inode)->lock); |
1243 | &BTRFS_I(inode)->outstanding_extents); | 1237 | BTRFS_I(inode)->outstanding_extents++; |
1238 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1239 | } | ||
1244 | btrfs_delalloc_release_space(inode, | 1240 | btrfs_delalloc_release_space(inode, |
1245 | (num_pages - dirty_pages) << | 1241 | (num_pages - dirty_pages) << |
1246 | PAGE_CACHE_SHIFT); | 1242 | PAGE_CACHE_SHIFT); |
@@ -1336,6 +1332,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1336 | struct inode *inode = fdentry(file)->d_inode; | 1332 | struct inode *inode = fdentry(file)->d_inode; |
1337 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1333 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1338 | loff_t *ppos = &iocb->ki_pos; | 1334 | loff_t *ppos = &iocb->ki_pos; |
1335 | u64 start_pos; | ||
1339 | ssize_t num_written = 0; | 1336 | ssize_t num_written = 0; |
1340 | ssize_t err = 0; | 1337 | ssize_t err = 0; |
1341 | size_t count, ocount; | 1338 | size_t count, ocount; |
@@ -1384,6 +1381,15 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1384 | file_update_time(file); | 1381 | file_update_time(file); |
1385 | BTRFS_I(inode)->sequence++; | 1382 | BTRFS_I(inode)->sequence++; |
1386 | 1383 | ||
1384 | start_pos = round_down(pos, root->sectorsize); | ||
1385 | if (start_pos > i_size_read(inode)) { | ||
1386 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); | ||
1387 | if (err) { | ||
1388 | mutex_unlock(&inode->i_mutex); | ||
1389 | goto out; | ||
1390 | } | ||
1391 | } | ||
1392 | |||
1387 | if (unlikely(file->f_flags & O_DIRECT)) { | 1393 | if (unlikely(file->f_flags & O_DIRECT)) { |
1388 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1394 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
1389 | pos, ppos, count, ocount); | 1395 | pos, ppos, count, ocount); |
@@ -1638,11 +1644,15 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1638 | 1644 | ||
1639 | cur_offset = alloc_start; | 1645 | cur_offset = alloc_start; |
1640 | while (1) { | 1646 | while (1) { |
1647 | u64 actual_end; | ||
1648 | |||
1641 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | 1649 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, |
1642 | alloc_end - cur_offset, 0); | 1650 | alloc_end - cur_offset, 0); |
1643 | BUG_ON(IS_ERR_OR_NULL(em)); | 1651 | BUG_ON(IS_ERR_OR_NULL(em)); |
1644 | last_byte = min(extent_map_end(em), alloc_end); | 1652 | last_byte = min(extent_map_end(em), alloc_end); |
1653 | actual_end = min_t(u64, extent_map_end(em), offset + len); | ||
1645 | last_byte = (last_byte + mask) & ~mask; | 1654 | last_byte = (last_byte + mask) & ~mask; |
1655 | |||
1646 | if (em->block_start == EXTENT_MAP_HOLE || | 1656 | if (em->block_start == EXTENT_MAP_HOLE || |
1647 | (cur_offset >= inode->i_size && | 1657 | (cur_offset >= inode->i_size && |
1648 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 1658 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
@@ -1655,6 +1665,16 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1655 | free_extent_map(em); | 1665 | free_extent_map(em); |
1656 | break; | 1666 | break; |
1657 | } | 1667 | } |
1668 | } else if (actual_end > inode->i_size && | ||
1669 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1670 | /* | ||
1671 | * We didn't need to allocate any more space, but we | ||
1672 | * still extended the size of the file so we need to | ||
1673 | * update i_size. | ||
1674 | */ | ||
1675 | inode->i_ctime = CURRENT_TIME; | ||
1676 | i_size_write(inode, actual_end); | ||
1677 | btrfs_ordered_update_i_size(inode, actual_end, NULL); | ||
1658 | } | 1678 | } |
1659 | free_extent_map(em); | 1679 | free_extent_map(em); |
1660 | 1680 | ||
@@ -1804,10 +1824,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | |||
1804 | } | 1824 | } |
1805 | } | 1825 | } |
1806 | 1826 | ||
1807 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 1827 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { |
1808 | return -EINVAL; | 1828 | ret = -EINVAL; |
1809 | if (offset > inode->i_sb->s_maxbytes) | 1829 | goto out; |
1810 | return -EINVAL; | 1830 | } |
1831 | if (offset > inode->i_sb->s_maxbytes) { | ||
1832 | ret = -EINVAL; | ||
1833 | goto out; | ||
1834 | } | ||
1811 | 1835 | ||
1812 | /* Special lock needed here? */ | 1836 | /* Special lock needed here? */ |
1813 | if (offset != file->f_pos) { | 1837 | if (offset != file->f_pos) { |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bf0d61567f3d..41ac927401d0 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 98 | return inode; |
99 | 99 | ||
100 | spin_lock(&block_group->lock); | 100 | spin_lock(&block_group->lock); |
101 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { | ||
102 | printk(KERN_INFO "Old style space inode found, converting.\n"); | ||
103 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; | ||
104 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
105 | } | ||
106 | |||
101 | if (!btrfs_fs_closing(root->fs_info)) { | 107 | if (!btrfs_fs_closing(root->fs_info)) { |
102 | block_group->inode = igrab(inode); | 108 | block_group->inode = igrab(inode); |
103 | block_group->iref = 1; | 109 | block_group->iref = 1; |
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
135 | btrfs_set_inode_gid(leaf, inode_item, 0); | 141 | btrfs_set_inode_gid(leaf, inode_item, 0); |
136 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | 142 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); |
137 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | 143 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | |
138 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | 144 | BTRFS_INODE_PREALLOC); |
139 | btrfs_set_inode_nlink(leaf, inode_item, 1); | 145 | btrfs_set_inode_nlink(leaf, inode_item, 1); |
140 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | 146 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); |
141 | btrfs_set_inode_block_group(leaf, inode_item, offset); | 147 | btrfs_set_inode_block_group(leaf, inode_item, offset); |
@@ -184,9 +190,11 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
184 | struct btrfs_path *path, | 190 | struct btrfs_path *path, |
185 | struct inode *inode) | 191 | struct inode *inode) |
186 | { | 192 | { |
193 | struct btrfs_block_rsv *rsv; | ||
187 | loff_t oldsize; | 194 | loff_t oldsize; |
188 | int ret = 0; | 195 | int ret = 0; |
189 | 196 | ||
197 | rsv = trans->block_rsv; | ||
190 | trans->block_rsv = root->orphan_block_rsv; | 198 | trans->block_rsv = root->orphan_block_rsv; |
191 | ret = btrfs_block_rsv_check(trans, root, | 199 | ret = btrfs_block_rsv_check(trans, root, |
192 | root->orphan_block_rsv, | 200 | root->orphan_block_rsv, |
@@ -204,6 +212,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
204 | */ | 212 | */ |
205 | ret = btrfs_truncate_inode_items(trans, root, inode, | 213 | ret = btrfs_truncate_inode_items(trans, root, inode, |
206 | 0, BTRFS_EXTENT_DATA_KEY); | 214 | 0, BTRFS_EXTENT_DATA_KEY); |
215 | |||
216 | trans->block_rsv = rsv; | ||
207 | if (ret) { | 217 | if (ret) { |
208 | WARN_ON(1); | 218 | WARN_ON(1); |
209 | return ret; | 219 | return ret; |
@@ -239,17 +249,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
239 | struct btrfs_free_space_header *header; | 249 | struct btrfs_free_space_header *header; |
240 | struct extent_buffer *leaf; | 250 | struct extent_buffer *leaf; |
241 | struct page *page; | 251 | struct page *page; |
242 | u32 *checksums = NULL, *crc; | ||
243 | char *disk_crcs = NULL; | ||
244 | struct btrfs_key key; | 252 | struct btrfs_key key; |
245 | struct list_head bitmaps; | 253 | struct list_head bitmaps; |
246 | u64 num_entries; | 254 | u64 num_entries; |
247 | u64 num_bitmaps; | 255 | u64 num_bitmaps; |
248 | u64 generation; | 256 | u64 generation; |
249 | u32 cur_crc = ~(u32)0; | ||
250 | pgoff_t index = 0; | 257 | pgoff_t index = 0; |
251 | unsigned long first_page_offset; | ||
252 | int num_checksums; | ||
253 | int ret = 0; | 258 | int ret = 0; |
254 | 259 | ||
255 | INIT_LIST_HEAD(&bitmaps); | 260 | INIT_LIST_HEAD(&bitmaps); |
@@ -292,16 +297,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
292 | if (!num_entries) | 297 | if (!num_entries) |
293 | goto out; | 298 | goto out; |
294 | 299 | ||
295 | /* Setup everything for doing checksumming */ | ||
296 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
297 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
298 | if (!checksums) | ||
299 | goto out; | ||
300 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
301 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
302 | if (!disk_crcs) | ||
303 | goto out; | ||
304 | |||
305 | ret = readahead_cache(inode); | 300 | ret = readahead_cache(inode); |
306 | if (ret) | 301 | if (ret) |
307 | goto out; | 302 | goto out; |
@@ -311,18 +306,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
311 | struct btrfs_free_space *e; | 306 | struct btrfs_free_space *e; |
312 | void *addr; | 307 | void *addr; |
313 | unsigned long offset = 0; | 308 | unsigned long offset = 0; |
314 | unsigned long start_offset = 0; | ||
315 | int need_loop = 0; | 309 | int need_loop = 0; |
316 | 310 | ||
317 | if (!num_entries && !num_bitmaps) | 311 | if (!num_entries && !num_bitmaps) |
318 | break; | 312 | break; |
319 | 313 | ||
320 | if (index == 0) { | 314 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
321 | start_offset = first_page_offset; | ||
322 | offset = start_offset; | ||
323 | } | ||
324 | |||
325 | page = grab_cache_page(inode->i_mapping, index); | ||
326 | if (!page) | 315 | if (!page) |
327 | goto free_cache; | 316 | goto free_cache; |
328 | 317 | ||
@@ -342,8 +331,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
342 | if (index == 0) { | 331 | if (index == 0) { |
343 | u64 *gen; | 332 | u64 *gen; |
344 | 333 | ||
345 | memcpy(disk_crcs, addr, first_page_offset); | 334 | /* |
346 | gen = addr + (sizeof(u32) * num_checksums); | 335 | * We put a bogus crc in the front of the first page in |
336 | * case old kernels try to mount a fs with the new | ||
337 | * format to make sure they discard the cache. | ||
338 | */ | ||
339 | addr += sizeof(u64); | ||
340 | offset += sizeof(u64); | ||
341 | |||
342 | gen = addr; | ||
347 | if (*gen != BTRFS_I(inode)->generation) { | 343 | if (*gen != BTRFS_I(inode)->generation) { |
348 | printk(KERN_ERR "btrfs: space cache generation" | 344 | printk(KERN_ERR "btrfs: space cache generation" |
349 | " (%llu) does not match inode (%llu)\n", | 345 | " (%llu) does not match inode (%llu)\n", |
@@ -355,24 +351,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
355 | page_cache_release(page); | 351 | page_cache_release(page); |
356 | goto free_cache; | 352 | goto free_cache; |
357 | } | 353 | } |
358 | crc = (u32 *)disk_crcs; | 354 | addr += sizeof(u64); |
355 | offset += sizeof(u64); | ||
359 | } | 356 | } |
360 | entry = addr + start_offset; | 357 | entry = addr; |
361 | |||
362 | /* First lets check our crc before we do anything fun */ | ||
363 | cur_crc = ~(u32)0; | ||
364 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
365 | PAGE_CACHE_SIZE - start_offset); | ||
366 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
367 | if (cur_crc != *crc) { | ||
368 | printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", | ||
369 | index); | ||
370 | kunmap(page); | ||
371 | unlock_page(page); | ||
372 | page_cache_release(page); | ||
373 | goto free_cache; | ||
374 | } | ||
375 | crc++; | ||
376 | 358 | ||
377 | while (1) { | 359 | while (1) { |
378 | if (!num_entries) | 360 | if (!num_entries) |
@@ -470,8 +452,6 @@ next: | |||
470 | 452 | ||
471 | ret = 1; | 453 | ret = 1; |
472 | out: | 454 | out: |
473 | kfree(checksums); | ||
474 | kfree(disk_crcs); | ||
475 | return ret; | 455 | return ret; |
476 | free_cache: | 456 | free_cache: |
477 | __btrfs_remove_free_space_cache(ctl); | 457 | __btrfs_remove_free_space_cache(ctl); |
@@ -569,8 +549,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
569 | struct btrfs_key key; | 549 | struct btrfs_key key; |
570 | u64 start, end, len; | 550 | u64 start, end, len; |
571 | u64 bytes = 0; | 551 | u64 bytes = 0; |
572 | u32 *crc, *checksums; | 552 | u32 crc = ~(u32)0; |
573 | unsigned long first_page_offset; | ||
574 | int index = 0, num_pages = 0; | 553 | int index = 0, num_pages = 0; |
575 | int entries = 0; | 554 | int entries = 0; |
576 | int bitmaps = 0; | 555 | int bitmaps = 0; |
@@ -590,34 +569,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 569 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
591 | PAGE_CACHE_SHIFT; | 570 | PAGE_CACHE_SHIFT; |
592 | 571 | ||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | filemap_write_and_wait(inode->i_mapping); | 572 | filemap_write_and_wait(inode->i_mapping); |
600 | btrfs_wait_ordered_range(inode, inode->i_size & | 573 | btrfs_wait_ordered_range(inode, inode->i_size & |
601 | ~(root->sectorsize - 1), (u64)-1); | 574 | ~(root->sectorsize - 1), (u64)-1); |
602 | 575 | ||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
611 | /* We need a checksum per page. */ | ||
612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | ||
613 | if (!crc) | ||
614 | return -1; | ||
615 | |||
616 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | 576 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); |
617 | if (!pages) { | 577 | if (!pages) |
618 | kfree(crc); | ||
619 | return -1; | 578 | return -1; |
620 | } | ||
621 | 579 | ||
622 | /* Get the cluster for this block_group if it exists */ | 580 | /* Get the cluster for this block_group if it exists */ |
623 | if (block_group && !list_empty(&block_group->cluster_list)) | 581 | if (block_group && !list_empty(&block_group->cluster_list)) |
@@ -640,7 +598,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
640 | * know and don't freak out. | 598 | * know and don't freak out. |
641 | */ | 599 | */ |
642 | while (index < num_pages) { | 600 | while (index < num_pages) { |
643 | page = grab_cache_page(inode->i_mapping, index); | 601 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
644 | if (!page) { | 602 | if (!page) { |
645 | int i; | 603 | int i; |
646 | 604 | ||
@@ -648,7 +606,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
648 | unlock_page(pages[i]); | 606 | unlock_page(pages[i]); |
649 | page_cache_release(pages[i]); | 607 | page_cache_release(pages[i]); |
650 | } | 608 | } |
651 | goto out_free; | 609 | goto out; |
652 | } | 610 | } |
653 | pages[index] = page; | 611 | pages[index] = page; |
654 | index++; | 612 | index++; |
@@ -668,17 +626,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
668 | /* Write out the extent entries */ | 626 | /* Write out the extent entries */ |
669 | do { | 627 | do { |
670 | struct btrfs_free_space_entry *entry; | 628 | struct btrfs_free_space_entry *entry; |
671 | void *addr; | 629 | void *addr, *orig; |
672 | unsigned long offset = 0; | 630 | unsigned long offset = 0; |
673 | unsigned long start_offset = 0; | ||
674 | 631 | ||
675 | next_page = false; | 632 | next_page = false; |
676 | 633 | ||
677 | if (index == 0) { | ||
678 | start_offset = first_page_offset; | ||
679 | offset = start_offset; | ||
680 | } | ||
681 | |||
682 | if (index >= num_pages) { | 634 | if (index >= num_pages) { |
683 | out_of_space = true; | 635 | out_of_space = true; |
684 | break; | 636 | break; |
@@ -686,10 +638,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
686 | 638 | ||
687 | page = pages[index]; | 639 | page = pages[index]; |
688 | 640 | ||
689 | addr = kmap(page); | 641 | orig = addr = kmap(page); |
690 | entry = addr + start_offset; | 642 | if (index == 0) { |
643 | u64 *gen; | ||
691 | 644 | ||
692 | memset(addr, 0, PAGE_CACHE_SIZE); | 645 | /* |
646 | * We're going to put in a bogus crc for this page to | ||
647 | * make sure that old kernels who aren't aware of this | ||
648 | * format will be sure to discard the cache. | ||
649 | */ | ||
650 | addr += sizeof(u64); | ||
651 | offset += sizeof(u64); | ||
652 | |||
653 | gen = addr; | ||
654 | *gen = trans->transid; | ||
655 | addr += sizeof(u64); | ||
656 | offset += sizeof(u64); | ||
657 | } | ||
658 | entry = addr; | ||
659 | |||
660 | memset(addr, 0, PAGE_CACHE_SIZE - offset); | ||
693 | while (node && !next_page) { | 661 | while (node && !next_page) { |
694 | struct btrfs_free_space *e; | 662 | struct btrfs_free_space *e; |
695 | 663 | ||
@@ -752,13 +720,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
752 | next_page = true; | 720 | next_page = true; |
753 | entry++; | 721 | entry++; |
754 | } | 722 | } |
755 | *crc = ~(u32)0; | ||
756 | *crc = btrfs_csum_data(root, addr + start_offset, *crc, | ||
757 | PAGE_CACHE_SIZE - start_offset); | ||
758 | kunmap(page); | ||
759 | 723 | ||
760 | btrfs_csum_final(*crc, (char *)crc); | 724 | /* Generate bogus crc value */ |
761 | crc++; | 725 | if (index == 0) { |
726 | u32 *tmp; | ||
727 | crc = btrfs_csum_data(root, orig + sizeof(u64), crc, | ||
728 | PAGE_CACHE_SIZE - sizeof(u64)); | ||
729 | btrfs_csum_final(crc, (char *)&crc); | ||
730 | crc++; | ||
731 | tmp = orig; | ||
732 | *tmp = crc; | ||
733 | } | ||
734 | |||
735 | kunmap(page); | ||
762 | 736 | ||
763 | bytes += PAGE_CACHE_SIZE; | 737 | bytes += PAGE_CACHE_SIZE; |
764 | 738 | ||
@@ -779,11 +753,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
779 | 753 | ||
780 | addr = kmap(page); | 754 | addr = kmap(page); |
781 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 755 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
782 | *crc = ~(u32)0; | ||
783 | *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); | ||
784 | kunmap(page); | 756 | kunmap(page); |
785 | btrfs_csum_final(*crc, (char *)crc); | ||
786 | crc++; | ||
787 | bytes += PAGE_CACHE_SIZE; | 757 | bytes += PAGE_CACHE_SIZE; |
788 | 758 | ||
789 | list_del_init(&entry->list); | 759 | list_del_init(&entry->list); |
@@ -796,7 +766,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
796 | i_size_read(inode) - 1, &cached_state, | 766 | i_size_read(inode) - 1, &cached_state, |
797 | GFP_NOFS); | 767 | GFP_NOFS); |
798 | ret = 0; | 768 | ret = 0; |
799 | goto out_free; | 769 | goto out; |
800 | } | 770 | } |
801 | 771 | ||
802 | /* Zero out the rest of the pages just to make sure */ | 772 | /* Zero out the rest of the pages just to make sure */ |
@@ -811,20 +781,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
811 | index++; | 781 | index++; |
812 | } | 782 | } |
813 | 783 | ||
814 | /* Write the checksums and trans id to the first page */ | ||
815 | { | ||
816 | void *addr; | ||
817 | u64 *gen; | ||
818 | |||
819 | page = pages[0]; | ||
820 | |||
821 | addr = kmap(page); | ||
822 | memcpy(addr, checksums, sizeof(u32) * num_pages); | ||
823 | gen = addr + (sizeof(u32) * num_pages); | ||
824 | *gen = trans->transid; | ||
825 | kunmap(page); | ||
826 | } | ||
827 | |||
828 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | 784 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, |
829 | bytes, &cached_state); | 785 | bytes, &cached_state); |
830 | btrfs_drop_pages(pages, num_pages); | 786 | btrfs_drop_pages(pages, num_pages); |
@@ -833,7 +789,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
833 | 789 | ||
834 | if (ret) { | 790 | if (ret) { |
835 | ret = 0; | 791 | ret = 0; |
836 | goto out_free; | 792 | goto out; |
837 | } | 793 | } |
838 | 794 | ||
839 | BTRFS_I(inode)->generation = trans->transid; | 795 | BTRFS_I(inode)->generation = trans->transid; |
@@ -850,7 +806,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
850 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 806 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, |
851 | EXTENT_DIRTY | EXTENT_DELALLOC | | 807 | EXTENT_DIRTY | EXTENT_DELALLOC | |
852 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | 808 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); |
853 | goto out_free; | 809 | goto out; |
854 | } | 810 | } |
855 | leaf = path->nodes[0]; | 811 | leaf = path->nodes[0]; |
856 | if (ret > 0) { | 812 | if (ret > 0) { |
@@ -866,7 +822,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
866 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | 822 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, |
867 | GFP_NOFS); | 823 | GFP_NOFS); |
868 | btrfs_release_path(path); | 824 | btrfs_release_path(path); |
869 | goto out_free; | 825 | goto out; |
870 | } | 826 | } |
871 | } | 827 | } |
872 | header = btrfs_item_ptr(leaf, path->slots[0], | 828 | header = btrfs_item_ptr(leaf, path->slots[0], |
@@ -879,11 +835,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
879 | 835 | ||
880 | ret = 1; | 836 | ret = 1; |
881 | 837 | ||
882 | out_free: | 838 | out: |
883 | kfree(checksums); | ||
884 | kfree(pages); | 839 | kfree(pages); |
885 | |||
886 | out_update: | ||
887 | if (ret != 1) { | 840 | if (ret != 1) { |
888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 841 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); |
889 | BTRFS_I(inode)->generation = 0; | 842 | BTRFS_I(inode)->generation = 0; |
@@ -1219,9 +1172,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1219 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); | 1172 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
1220 | } | 1173 | } |
1221 | 1174 | ||
1222 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | 1175 | static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, |
1223 | struct btrfs_free_space *info, u64 offset, | 1176 | struct btrfs_free_space *info, |
1224 | u64 bytes) | 1177 | u64 offset, u64 bytes) |
1225 | { | 1178 | { |
1226 | unsigned long start, count; | 1179 | unsigned long start, count; |
1227 | 1180 | ||
@@ -1232,6 +1185,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | |||
1232 | bitmap_clear(info->bitmap, start, count); | 1185 | bitmap_clear(info->bitmap, start, count); |
1233 | 1186 | ||
1234 | info->bytes -= bytes; | 1187 | info->bytes -= bytes; |
1188 | } | ||
1189 | |||
1190 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | ||
1191 | struct btrfs_free_space *info, u64 offset, | ||
1192 | u64 bytes) | ||
1193 | { | ||
1194 | __bitmap_clear_bits(ctl, info, offset, bytes); | ||
1235 | ctl->free_space -= bytes; | 1195 | ctl->free_space -= bytes; |
1236 | } | 1196 | } |
1237 | 1197 | ||
@@ -2035,7 +1995,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | |||
2035 | return 0; | 1995 | return 0; |
2036 | 1996 | ||
2037 | ret = search_start; | 1997 | ret = search_start; |
2038 | bitmap_clear_bits(ctl, entry, ret, bytes); | 1998 | __bitmap_clear_bits(ctl, entry, ret, bytes); |
2039 | 1999 | ||
2040 | return ret; | 2000 | return ret; |
2041 | } | 2001 | } |
@@ -2090,7 +2050,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
2090 | continue; | 2050 | continue; |
2091 | } | 2051 | } |
2092 | } else { | 2052 | } else { |
2093 | |||
2094 | ret = entry->offset; | 2053 | ret = entry->offset; |
2095 | 2054 | ||
2096 | entry->offset += bytes; | 2055 | entry->offset += bytes; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e91b097e7252..4d14de6d121b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | |||
750 | return alloc_hint; | 750 | return alloc_hint; |
751 | } | 751 | } |
752 | 752 | ||
753 | static inline bool is_free_space_inode(struct btrfs_root *root, | ||
754 | struct inode *inode) | ||
755 | { | ||
756 | if (root == root->fs_info->tree_root || | ||
757 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
758 | return true; | ||
759 | return false; | ||
760 | } | ||
761 | |||
762 | /* | 753 | /* |
763 | * when extent_io.c finds a delayed allocation range in the file, | 754 | * when extent_io.c finds a delayed allocation range in the file, |
764 | * the call backs end up in this code. The basic idea is to | 755 | * the call backs end up in this code. The basic idea is to |
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
791 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 782 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
792 | int ret = 0; | 783 | int ret = 0; |
793 | 784 | ||
794 | BUG_ON(is_free_space_inode(root, inode)); | 785 | BUG_ON(btrfs_is_free_space_inode(root, inode)); |
795 | trans = btrfs_join_transaction(root); | 786 | trans = btrfs_join_transaction(root); |
796 | BUG_ON(IS_ERR(trans)); | 787 | BUG_ON(IS_ERR(trans)); |
797 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 788 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
@@ -1070,9 +1061,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1070 | u64 ino = btrfs_ino(inode); | 1061 | u64 ino = btrfs_ino(inode); |
1071 | 1062 | ||
1072 | path = btrfs_alloc_path(); | 1063 | path = btrfs_alloc_path(); |
1073 | BUG_ON(!path); | 1064 | if (!path) |
1065 | return -ENOMEM; | ||
1074 | 1066 | ||
1075 | nolock = is_free_space_inode(root, inode); | 1067 | nolock = btrfs_is_free_space_inode(root, inode); |
1076 | 1068 | ||
1077 | if (nolock) | 1069 | if (nolock) |
1078 | trans = btrfs_join_transaction_nolock(root); | 1070 | trans = btrfs_join_transaction_nolock(root); |
@@ -1291,15 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1291 | return ret; | 1283 | return ret; |
1292 | } | 1284 | } |
1293 | 1285 | ||
1294 | static int btrfs_split_extent_hook(struct inode *inode, | 1286 | static void btrfs_split_extent_hook(struct inode *inode, |
1295 | struct extent_state *orig, u64 split) | 1287 | struct extent_state *orig, u64 split) |
1296 | { | 1288 | { |
1297 | /* not delalloc, ignore it */ | 1289 | /* not delalloc, ignore it */ |
1298 | if (!(orig->state & EXTENT_DELALLOC)) | 1290 | if (!(orig->state & EXTENT_DELALLOC)) |
1299 | return 0; | 1291 | return; |
1300 | 1292 | ||
1301 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1293 | spin_lock(&BTRFS_I(inode)->lock); |
1302 | return 0; | 1294 | BTRFS_I(inode)->outstanding_extents++; |
1295 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1303 | } | 1296 | } |
1304 | 1297 | ||
1305 | /* | 1298 | /* |
@@ -1308,16 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode, | |||
1308 | * extents, such as when we are doing sequential writes, so we can properly | 1301 | * extents, such as when we are doing sequential writes, so we can properly |
1309 | * account for the metadata space we'll need. | 1302 | * account for the metadata space we'll need. |
1310 | */ | 1303 | */ |
1311 | static int btrfs_merge_extent_hook(struct inode *inode, | 1304 | static void btrfs_merge_extent_hook(struct inode *inode, |
1312 | struct extent_state *new, | 1305 | struct extent_state *new, |
1313 | struct extent_state *other) | 1306 | struct extent_state *other) |
1314 | { | 1307 | { |
1315 | /* not delalloc, ignore it */ | 1308 | /* not delalloc, ignore it */ |
1316 | if (!(other->state & EXTENT_DELALLOC)) | 1309 | if (!(other->state & EXTENT_DELALLOC)) |
1317 | return 0; | 1310 | return; |
1318 | 1311 | ||
1319 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1312 | spin_lock(&BTRFS_I(inode)->lock); |
1320 | return 0; | 1313 | BTRFS_I(inode)->outstanding_extents--; |
1314 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1321 | } | 1315 | } |
1322 | 1316 | ||
1323 | /* | 1317 | /* |
@@ -1325,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1325 | * bytes in this file, and to maintain the list of inodes that | 1319 | * bytes in this file, and to maintain the list of inodes that |
1326 | * have pending delalloc work to be done. | 1320 | * have pending delalloc work to be done. |
1327 | */ | 1321 | */ |
1328 | static int btrfs_set_bit_hook(struct inode *inode, | 1322 | static void btrfs_set_bit_hook(struct inode *inode, |
1329 | struct extent_state *state, int *bits) | 1323 | struct extent_state *state, int *bits) |
1330 | { | 1324 | { |
1331 | 1325 | ||
1332 | /* | 1326 | /* |
@@ -1337,12 +1331,15 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1337 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1331 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1338 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1332 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1339 | u64 len = state->end + 1 - state->start; | 1333 | u64 len = state->end + 1 - state->start; |
1340 | bool do_list = !is_free_space_inode(root, inode); | 1334 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1341 | 1335 | ||
1342 | if (*bits & EXTENT_FIRST_DELALLOC) | 1336 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1343 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1337 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1344 | else | 1338 | } else { |
1345 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1339 | spin_lock(&BTRFS_I(inode)->lock); |
1340 | BTRFS_I(inode)->outstanding_extents++; | ||
1341 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1342 | } | ||
1346 | 1343 | ||
1347 | spin_lock(&root->fs_info->delalloc_lock); | 1344 | spin_lock(&root->fs_info->delalloc_lock); |
1348 | BTRFS_I(inode)->delalloc_bytes += len; | 1345 | BTRFS_I(inode)->delalloc_bytes += len; |
@@ -1353,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1353 | } | 1350 | } |
1354 | spin_unlock(&root->fs_info->delalloc_lock); | 1351 | spin_unlock(&root->fs_info->delalloc_lock); |
1355 | } | 1352 | } |
1356 | return 0; | ||
1357 | } | 1353 | } |
1358 | 1354 | ||
1359 | /* | 1355 | /* |
1360 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1356 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1361 | */ | 1357 | */ |
1362 | static int btrfs_clear_bit_hook(struct inode *inode, | 1358 | static void btrfs_clear_bit_hook(struct inode *inode, |
1363 | struct extent_state *state, int *bits) | 1359 | struct extent_state *state, int *bits) |
1364 | { | 1360 | { |
1365 | /* | 1361 | /* |
1366 | * set_bit and clear bit hooks normally require _irqsave/restore | 1362 | * set_bit and clear bit hooks normally require _irqsave/restore |
@@ -1370,12 +1366,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1370 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1366 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1371 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1367 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1372 | u64 len = state->end + 1 - state->start; | 1368 | u64 len = state->end + 1 - state->start; |
1373 | bool do_list = !is_free_space_inode(root, inode); | 1369 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1374 | 1370 | ||
1375 | if (*bits & EXTENT_FIRST_DELALLOC) | 1371 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1376 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1372 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1377 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) | 1373 | } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { |
1378 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1374 | spin_lock(&BTRFS_I(inode)->lock); |
1375 | BTRFS_I(inode)->outstanding_extents--; | ||
1376 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1377 | } | ||
1379 | 1378 | ||
1380 | if (*bits & EXTENT_DO_ACCOUNTING) | 1379 | if (*bits & EXTENT_DO_ACCOUNTING) |
1381 | btrfs_delalloc_release_metadata(inode, len); | 1380 | btrfs_delalloc_release_metadata(inode, len); |
@@ -1394,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1394 | } | 1393 | } |
1395 | spin_unlock(&root->fs_info->delalloc_lock); | 1394 | spin_unlock(&root->fs_info->delalloc_lock); |
1396 | } | 1395 | } |
1397 | return 0; | ||
1398 | } | 1396 | } |
1399 | 1397 | ||
1400 | /* | 1398 | /* |
@@ -1477,7 +1475,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1477 | 1475 | ||
1478 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1476 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1479 | 1477 | ||
1480 | if (is_free_space_inode(root, inode)) | 1478 | if (btrfs_is_free_space_inode(root, inode)) |
1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); | 1479 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); |
1482 | else | 1480 | else |
1483 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
@@ -1644,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1644 | int ret; | 1642 | int ret; |
1645 | 1643 | ||
1646 | path = btrfs_alloc_path(); | 1644 | path = btrfs_alloc_path(); |
1647 | BUG_ON(!path); | 1645 | if (!path) |
1646 | return -ENOMEM; | ||
1648 | 1647 | ||
1649 | path->leave_spinning = 1; | 1648 | path->leave_spinning = 1; |
1650 | 1649 | ||
@@ -1726,7 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1726 | return 0; | 1725 | return 0; |
1727 | BUG_ON(!ordered_extent); | 1726 | BUG_ON(!ordered_extent); |
1728 | 1727 | ||
1729 | nolock = is_free_space_inode(root, inode); | 1728 | nolock = btrfs_is_free_space_inode(root, inode); |
1730 | 1729 | ||
1731 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1730 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1732 | BUG_ON(!list_empty(&ordered_extent->list)); | 1731 | BUG_ON(!list_empty(&ordered_extent->list)); |
@@ -1787,7 +1786,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1787 | &ordered_extent->list); | 1786 | &ordered_extent->list); |
1788 | 1787 | ||
1789 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1788 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1790 | if (!ret) { | 1789 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1791 | ret = btrfs_update_inode(trans, root, inode); | 1790 | ret = btrfs_update_inode(trans, root, inode); |
1792 | BUG_ON(ret); | 1791 | BUG_ON(ret); |
1793 | } | 1792 | } |
@@ -2214,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2214 | 2213 | ||
2215 | if (!root->orphan_block_rsv) { | 2214 | if (!root->orphan_block_rsv) { |
2216 | block_rsv = btrfs_alloc_block_rsv(root); | 2215 | block_rsv = btrfs_alloc_block_rsv(root); |
2217 | BUG_ON(!block_rsv); | 2216 | if (!block_rsv) |
2217 | return -ENOMEM; | ||
2218 | } | 2218 | } |
2219 | 2219 | ||
2220 | spin_lock(&root->orphan_lock); | 2220 | spin_lock(&root->orphan_lock); |
@@ -2516,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2516 | filled = true; | 2516 | filled = true; |
2517 | 2517 | ||
2518 | path = btrfs_alloc_path(); | 2518 | path = btrfs_alloc_path(); |
2519 | BUG_ON(!path); | 2519 | if (!path) |
2520 | goto make_bad; | ||
2521 | |||
2520 | path->leave_spinning = 1; | 2522 | path->leave_spinning = 1; |
2521 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 2523 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
2522 | 2524 | ||
@@ -2531,13 +2533,6 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2531 | 2533 | ||
2532 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2534 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2533 | struct btrfs_inode_item); | 2535 | struct btrfs_inode_item); |
2534 | if (!leaf->map_token) | ||
2535 | map_private_extent_buffer(leaf, (unsigned long)inode_item, | ||
2536 | sizeof(struct btrfs_inode_item), | ||
2537 | &leaf->map_token, &leaf->kaddr, | ||
2538 | &leaf->map_start, &leaf->map_len, | ||
2539 | KM_USER1); | ||
2540 | |||
2541 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2536 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2542 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 2537 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
2543 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | 2538 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); |
@@ -2575,11 +2570,6 @@ cache_acl: | |||
2575 | if (!maybe_acls) | 2570 | if (!maybe_acls) |
2576 | cache_no_acl(inode); | 2571 | cache_no_acl(inode); |
2577 | 2572 | ||
2578 | if (leaf->map_token) { | ||
2579 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2580 | leaf->map_token = NULL; | ||
2581 | } | ||
2582 | |||
2583 | btrfs_free_path(path); | 2573 | btrfs_free_path(path); |
2584 | 2574 | ||
2585 | switch (inode->i_mode & S_IFMT) { | 2575 | switch (inode->i_mode & S_IFMT) { |
@@ -2624,13 +2614,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2624 | struct btrfs_inode_item *item, | 2614 | struct btrfs_inode_item *item, |
2625 | struct inode *inode) | 2615 | struct inode *inode) |
2626 | { | 2616 | { |
2627 | if (!leaf->map_token) | ||
2628 | map_private_extent_buffer(leaf, (unsigned long)item, | ||
2629 | sizeof(struct btrfs_inode_item), | ||
2630 | &leaf->map_token, &leaf->kaddr, | ||
2631 | &leaf->map_start, &leaf->map_len, | ||
2632 | KM_USER1); | ||
2633 | |||
2634 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2617 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
2635 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2618 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
2636 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2619 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
@@ -2659,11 +2642,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2659 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2642 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2660 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2643 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2661 | btrfs_set_inode_block_group(leaf, item, 0); | 2644 | btrfs_set_inode_block_group(leaf, item, 0); |
2662 | |||
2663 | if (leaf->map_token) { | ||
2664 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2665 | leaf->map_token = NULL; | ||
2666 | } | ||
2667 | } | 2645 | } |
2668 | 2646 | ||
2669 | /* | 2647 | /* |
@@ -2684,7 +2662,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2684 | * The data relocation inode should also be directly updated | 2662 | * The data relocation inode should also be directly updated |
2685 | * without delay | 2663 | * without delay |
2686 | */ | 2664 | */ |
2687 | if (!is_free_space_inode(root, inode) | 2665 | if (!btrfs_is_free_space_inode(root, inode) |
2688 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | 2666 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { |
2689 | ret = btrfs_delayed_update_inode(trans, root, inode); | 2667 | ret = btrfs_delayed_update_inode(trans, root, inode); |
2690 | if (!ret) | 2668 | if (!ret) |
@@ -3021,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3021 | 2999 | ||
3022 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 3000 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
3023 | dentry->d_name.name, dentry->d_name.len); | 3001 | dentry->d_name.name, dentry->d_name.len); |
3024 | BUG_ON(ret); | 3002 | if (ret) |
3003 | goto out; | ||
3025 | 3004 | ||
3026 | if (inode->i_nlink == 0) { | 3005 | if (inode->i_nlink == 0) { |
3027 | ret = btrfs_orphan_add(trans, inode); | 3006 | ret = btrfs_orphan_add(trans, inode); |
3028 | BUG_ON(ret); | 3007 | if (ret) |
3008 | goto out; | ||
3029 | } | 3009 | } |
3030 | 3010 | ||
3011 | out: | ||
3031 | nr = trans->blocks_used; | 3012 | nr = trans->blocks_used; |
3032 | __unlink_end_trans(trans, root); | 3013 | __unlink_end_trans(trans, root); |
3033 | btrfs_btree_balance_dirty(root, nr); | 3014 | btrfs_btree_balance_dirty(root, nr); |
@@ -3170,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3170 | 3151 | ||
3171 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); | 3152 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
3172 | 3153 | ||
3154 | path = btrfs_alloc_path(); | ||
3155 | if (!path) | ||
3156 | return -ENOMEM; | ||
3157 | path->reada = -1; | ||
3158 | |||
3173 | if (root->ref_cows || root == root->fs_info->tree_root) | 3159 | if (root->ref_cows || root == root->fs_info->tree_root) |
3174 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3160 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); |
3175 | 3161 | ||
@@ -3182,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3182 | if (min_type == 0 && root == BTRFS_I(inode)->root) | 3168 | if (min_type == 0 && root == BTRFS_I(inode)->root) |
3183 | btrfs_kill_delayed_inode_items(inode); | 3169 | btrfs_kill_delayed_inode_items(inode); |
3184 | 3170 | ||
3185 | path = btrfs_alloc_path(); | ||
3186 | BUG_ON(!path); | ||
3187 | path->reada = -1; | ||
3188 | |||
3189 | key.objectid = ino; | 3171 | key.objectid = ino; |
3190 | key.offset = (u64)-1; | 3172 | key.offset = (u64)-1; |
3191 | key.type = (u8)-1; | 3173 | key.type = (u8)-1; |
@@ -3398,7 +3380,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3398 | 3380 | ||
3399 | ret = -ENOMEM; | 3381 | ret = -ENOMEM; |
3400 | again: | 3382 | again: |
3401 | page = grab_cache_page(mapping, index); | 3383 | page = find_or_create_page(mapping, index, GFP_NOFS); |
3402 | if (!page) { | 3384 | if (!page) { |
3403 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3385 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3404 | goto out; | 3386 | goto out; |
@@ -3528,15 +3510,19 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3528 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3510 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3529 | cur_offset + hole_size, | 3511 | cur_offset + hole_size, |
3530 | &hint_byte, 1); | 3512 | &hint_byte, 1); |
3531 | if (err) | 3513 | if (err) { |
3514 | btrfs_end_transaction(trans, root); | ||
3532 | break; | 3515 | break; |
3516 | } | ||
3533 | 3517 | ||
3534 | err = btrfs_insert_file_extent(trans, root, | 3518 | err = btrfs_insert_file_extent(trans, root, |
3535 | btrfs_ino(inode), cur_offset, 0, | 3519 | btrfs_ino(inode), cur_offset, 0, |
3536 | 0, hole_size, 0, hole_size, | 3520 | 0, hole_size, 0, hole_size, |
3537 | 0, 0, 0); | 3521 | 0, 0, 0); |
3538 | if (err) | 3522 | if (err) { |
3523 | btrfs_end_transaction(trans, root); | ||
3539 | break; | 3524 | break; |
3525 | } | ||
3540 | 3526 | ||
3541 | btrfs_drop_extent_cache(inode, hole_start, | 3527 | btrfs_drop_extent_cache(inode, hole_start, |
3542 | last_byte - 1, 0); | 3528 | last_byte - 1, 0); |
@@ -3634,7 +3620,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3634 | 3620 | ||
3635 | truncate_inode_pages(&inode->i_data, 0); | 3621 | truncate_inode_pages(&inode->i_data, 0); |
3636 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3622 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3637 | is_free_space_inode(root, inode))) | 3623 | btrfs_is_free_space_inode(root, inode))) |
3638 | goto no_delete; | 3624 | goto no_delete; |
3639 | 3625 | ||
3640 | if (is_bad_inode(inode)) { | 3626 | if (is_bad_inode(inode)) { |
@@ -3713,7 +3699,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, | |||
3713 | int ret = 0; | 3699 | int ret = 0; |
3714 | 3700 | ||
3715 | path = btrfs_alloc_path(); | 3701 | path = btrfs_alloc_path(); |
3716 | BUG_ON(!path); | 3702 | if (!path) |
3703 | return -ENOMEM; | ||
3717 | 3704 | ||
3718 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, | 3705 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, |
3719 | namelen, 0); | 3706 | namelen, 0); |
@@ -3978,10 +3965,16 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3978 | BTRFS_I(inode)->root = root; | 3965 | BTRFS_I(inode)->root = root; |
3979 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); | 3966 | memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); |
3980 | btrfs_read_locked_inode(inode); | 3967 | btrfs_read_locked_inode(inode); |
3981 | inode_tree_add(inode); | 3968 | if (!is_bad_inode(inode)) { |
3982 | unlock_new_inode(inode); | 3969 | inode_tree_add(inode); |
3983 | if (new) | 3970 | unlock_new_inode(inode); |
3984 | *new = 1; | 3971 | if (new) |
3972 | *new = 1; | ||
3973 | } else { | ||
3974 | unlock_new_inode(inode); | ||
3975 | iput(inode); | ||
3976 | inode = ERR_PTR(-ESTALE); | ||
3977 | } | ||
3985 | } | 3978 | } |
3986 | 3979 | ||
3987 | return inode; | 3980 | return inode; |
@@ -4016,12 +4009,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4016 | struct btrfs_root *sub_root = root; | 4009 | struct btrfs_root *sub_root = root; |
4017 | struct btrfs_key location; | 4010 | struct btrfs_key location; |
4018 | int index; | 4011 | int index; |
4019 | int ret; | 4012 | int ret = 0; |
4020 | 4013 | ||
4021 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 4014 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
4022 | return ERR_PTR(-ENAMETOOLONG); | 4015 | return ERR_PTR(-ENAMETOOLONG); |
4023 | 4016 | ||
4024 | ret = btrfs_inode_by_name(dir, dentry, &location); | 4017 | if (unlikely(d_need_lookup(dentry))) { |
4018 | memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); | ||
4019 | kfree(dentry->d_fsdata); | ||
4020 | dentry->d_fsdata = NULL; | ||
4021 | d_clear_need_lookup(dentry); | ||
4022 | } else { | ||
4023 | ret = btrfs_inode_by_name(dir, dentry, &location); | ||
4024 | } | ||
4025 | 4025 | ||
4026 | if (ret < 0) | 4026 | if (ret < 0) |
4027 | return ERR_PTR(ret); | 4027 | return ERR_PTR(ret); |
@@ -4076,6 +4076,12 @@ static int btrfs_dentry_delete(const struct dentry *dentry) | |||
4076 | return 0; | 4076 | return 0; |
4077 | } | 4077 | } |
4078 | 4078 | ||
4079 | static void btrfs_dentry_release(struct dentry *dentry) | ||
4080 | { | ||
4081 | if (dentry->d_fsdata) | ||
4082 | kfree(dentry->d_fsdata); | ||
4083 | } | ||
4084 | |||
4079 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 4085 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
4080 | struct nameidata *nd) | 4086 | struct nameidata *nd) |
4081 | { | 4087 | { |
@@ -4098,6 +4104,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4098 | struct btrfs_path *path; | 4104 | struct btrfs_path *path; |
4099 | struct list_head ins_list; | 4105 | struct list_head ins_list; |
4100 | struct list_head del_list; | 4106 | struct list_head del_list; |
4107 | struct qstr q; | ||
4101 | int ret; | 4108 | int ret; |
4102 | struct extent_buffer *leaf; | 4109 | struct extent_buffer *leaf; |
4103 | int slot; | 4110 | int slot; |
@@ -4187,6 +4194,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4187 | 4194 | ||
4188 | while (di_cur < di_total) { | 4195 | while (di_cur < di_total) { |
4189 | struct btrfs_key location; | 4196 | struct btrfs_key location; |
4197 | struct dentry *tmp; | ||
4190 | 4198 | ||
4191 | if (verify_dir_item(root, leaf, di)) | 4199 | if (verify_dir_item(root, leaf, di)) |
4192 | break; | 4200 | break; |
@@ -4207,6 +4215,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4207 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; | 4215 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; |
4208 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 4216 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
4209 | 4217 | ||
4218 | q.name = name_ptr; | ||
4219 | q.len = name_len; | ||
4220 | q.hash = full_name_hash(q.name, q.len); | ||
4221 | tmp = d_lookup(filp->f_dentry, &q); | ||
4222 | if (!tmp) { | ||
4223 | struct btrfs_key *newkey; | ||
4224 | |||
4225 | newkey = kzalloc(sizeof(struct btrfs_key), | ||
4226 | GFP_NOFS); | ||
4227 | if (!newkey) | ||
4228 | goto no_dentry; | ||
4229 | tmp = d_alloc(filp->f_dentry, &q); | ||
4230 | if (!tmp) { | ||
4231 | kfree(newkey); | ||
4232 | dput(tmp); | ||
4233 | goto no_dentry; | ||
4234 | } | ||
4235 | memcpy(newkey, &location, | ||
4236 | sizeof(struct btrfs_key)); | ||
4237 | tmp->d_fsdata = newkey; | ||
4238 | tmp->d_flags |= DCACHE_NEED_LOOKUP; | ||
4239 | d_rehash(tmp); | ||
4240 | dput(tmp); | ||
4241 | } else { | ||
4242 | dput(tmp); | ||
4243 | } | ||
4244 | no_dentry: | ||
4210 | /* is this a reference to our own snapshot? If so | 4245 | /* is this a reference to our own snapshot? If so |
4211 | * skip it | 4246 | * skip it |
4212 | */ | 4247 | */ |
@@ -4271,7 +4306,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4271 | if (BTRFS_I(inode)->dummy_inode) | 4306 | if (BTRFS_I(inode)->dummy_inode) |
4272 | return 0; | 4307 | return 0; |
4273 | 4308 | ||
4274 | if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) | 4309 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) |
4275 | nolock = true; | 4310 | nolock = true; |
4276 | 4311 | ||
4277 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4312 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -4432,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4432 | int owner; | 4467 | int owner; |
4433 | 4468 | ||
4434 | path = btrfs_alloc_path(); | 4469 | path = btrfs_alloc_path(); |
4435 | BUG_ON(!path); | 4470 | if (!path) |
4471 | return ERR_PTR(-ENOMEM); | ||
4436 | 4472 | ||
4437 | inode = new_inode(root->fs_info->sb); | 4473 | inode = new_inode(root->fs_info->sb); |
4438 | if (!inode) { | 4474 | if (!inode) { |
@@ -4467,7 +4503,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4467 | inode->i_generation = BTRFS_I(inode)->generation; | 4503 | inode->i_generation = BTRFS_I(inode)->generation; |
4468 | btrfs_set_inode_space_info(root, inode); | 4504 | btrfs_set_inode_space_info(root, inode); |
4469 | 4505 | ||
4470 | if (mode & S_IFDIR) | 4506 | if (S_ISDIR(mode)) |
4471 | owner = 0; | 4507 | owner = 0; |
4472 | else | 4508 | else |
4473 | owner = 1; | 4509 | owner = 1; |
@@ -4512,7 +4548,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4512 | 4548 | ||
4513 | btrfs_inherit_iflags(inode, dir); | 4549 | btrfs_inherit_iflags(inode, dir); |
4514 | 4550 | ||
4515 | if ((mode & S_IFREG)) { | 4551 | if (S_ISREG(mode)) { |
4516 | if (btrfs_test_opt(root, NODATASUM)) | 4552 | if (btrfs_test_opt(root, NODATASUM)) |
4517 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4553 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
4518 | if (btrfs_test_opt(root, NODATACOW) || | 4554 | if (btrfs_test_opt(root, NODATACOW) || |
@@ -5787,7 +5823,7 @@ again: | |||
5787 | 5823 | ||
5788 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | 5824 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); |
5789 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5825 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5790 | if (!ret) | 5826 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) |
5791 | btrfs_update_inode(trans, root, inode); | 5827 | btrfs_update_inode(trans, root, inode); |
5792 | ret = 0; | 5828 | ret = 0; |
5793 | out_unlock: | 5829 | out_unlock: |
@@ -6692,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
6692 | return 0; | 6728 | return 0; |
6693 | } | 6729 | } |
6694 | 6730 | ||
6695 | /* helper function for file defrag and space balancing. This | ||
6696 | * forces readahead on a given range of bytes in an inode | ||
6697 | */ | ||
6698 | unsigned long btrfs_force_ra(struct address_space *mapping, | ||
6699 | struct file_ra_state *ra, struct file *file, | ||
6700 | pgoff_t offset, pgoff_t last_index) | ||
6701 | { | ||
6702 | pgoff_t req_size = last_index - offset + 1; | ||
6703 | |||
6704 | page_cache_sync_readahead(mapping, ra, file, offset, req_size); | ||
6705 | return offset + req_size; | ||
6706 | } | ||
6707 | |||
6708 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6731 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
6709 | { | 6732 | { |
6710 | struct btrfs_inode *ei; | 6733 | struct btrfs_inode *ei; |
@@ -6728,8 +6751,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6728 | ei->index_cnt = (u64)-1; | 6751 | ei->index_cnt = (u64)-1; |
6729 | ei->last_unlink_trans = 0; | 6752 | ei->last_unlink_trans = 0; |
6730 | 6753 | ||
6731 | atomic_set(&ei->outstanding_extents, 0); | 6754 | spin_lock_init(&ei->lock); |
6732 | atomic_set(&ei->reserved_extents, 0); | 6755 | ei->outstanding_extents = 0; |
6756 | ei->reserved_extents = 0; | ||
6733 | 6757 | ||
6734 | ei->ordered_data_close = 0; | 6758 | ei->ordered_data_close = 0; |
6735 | ei->orphan_meta_reserved = 0; | 6759 | ei->orphan_meta_reserved = 0; |
@@ -6767,8 +6791,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6767 | 6791 | ||
6768 | WARN_ON(!list_empty(&inode->i_dentry)); | 6792 | WARN_ON(!list_empty(&inode->i_dentry)); |
6769 | WARN_ON(inode->i_data.nrpages); | 6793 | WARN_ON(inode->i_data.nrpages); |
6770 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6794 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
6771 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); | 6795 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
6772 | 6796 | ||
6773 | /* | 6797 | /* |
6774 | * This can happen where we create an inode, but somebody else also | 6798 | * This can happen where we create an inode, but somebody else also |
@@ -6823,7 +6847,7 @@ int btrfs_drop_inode(struct inode *inode) | |||
6823 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6847 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6824 | 6848 | ||
6825 | if (btrfs_root_refs(&root->root_item) == 0 && | 6849 | if (btrfs_root_refs(&root->root_item) == 0 && |
6826 | !is_free_space_inode(root, inode)) | 6850 | !btrfs_is_free_space_inode(root, inode)) |
6827 | return 1; | 6851 | return 1; |
6828 | else | 6852 | else |
6829 | return generic_drop_inode(inode); | 6853 | return generic_drop_inode(inode); |
@@ -7186,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7186 | goto out_unlock; | 7210 | goto out_unlock; |
7187 | 7211 | ||
7188 | path = btrfs_alloc_path(); | 7212 | path = btrfs_alloc_path(); |
7189 | BUG_ON(!path); | 7213 | if (!path) { |
7214 | err = -ENOMEM; | ||
7215 | drop_inode = 1; | ||
7216 | goto out_unlock; | ||
7217 | } | ||
7190 | key.objectid = btrfs_ino(inode); | 7218 | key.objectid = btrfs_ino(inode); |
7191 | key.offset = 0; | 7219 | key.offset = 0; |
7192 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 7220 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); |
@@ -7326,11 +7354,15 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7326 | static int btrfs_permission(struct inode *inode, int mask) | 7354 | static int btrfs_permission(struct inode *inode, int mask) |
7327 | { | 7355 | { |
7328 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7356 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7357 | umode_t mode = inode->i_mode; | ||
7329 | 7358 | ||
7330 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | 7359 | if (mask & MAY_WRITE && |
7331 | return -EROFS; | 7360 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { |
7332 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7361 | if (btrfs_root_readonly(root)) |
7333 | return -EACCES; | 7362 | return -EROFS; |
7363 | if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) | ||
7364 | return -EACCES; | ||
7365 | } | ||
7334 | return generic_permission(inode, mask); | 7366 | return generic_permission(inode, mask); |
7335 | } | 7367 | } |
7336 | 7368 | ||
@@ -7452,4 +7484,5 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7452 | 7484 | ||
7453 | const struct dentry_operations btrfs_dentry_operations = { | 7485 | const struct dentry_operations btrfs_dentry_operations = { |
7454 | .d_delete = btrfs_dentry_delete, | 7486 | .d_delete = btrfs_dentry_delete, |
7487 | .d_release = btrfs_dentry_release, | ||
7455 | }; | 7488 | }; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 622543309eb2..3351b1b24574 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -859,8 +859,8 @@ again: | |||
859 | /* step one, lock all the pages */ | 859 | /* step one, lock all the pages */ |
860 | for (i = 0; i < num_pages; i++) { | 860 | for (i = 0; i < num_pages; i++) { |
861 | struct page *page; | 861 | struct page *page; |
862 | page = grab_cache_page(inode->i_mapping, | 862 | page = find_or_create_page(inode->i_mapping, |
863 | start_index + i); | 863 | start_index + i, GFP_NOFS); |
864 | if (!page) | 864 | if (!page) |
865 | break; | 865 | break; |
866 | 866 | ||
@@ -930,7 +930,9 @@ again: | |||
930 | GFP_NOFS); | 930 | GFP_NOFS); |
931 | 931 | ||
932 | if (i_done != num_pages) { | 932 | if (i_done != num_pages) { |
933 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 933 | spin_lock(&BTRFS_I(inode)->lock); |
934 | BTRFS_I(inode)->outstanding_extents++; | ||
935 | spin_unlock(&BTRFS_I(inode)->lock); | ||
934 | btrfs_delalloc_release_space(inode, | 936 | btrfs_delalloc_release_space(inode, |
935 | (num_pages - i_done) << PAGE_CACHE_SHIFT); | 937 | (num_pages - i_done) << PAGE_CACHE_SHIFT); |
936 | } | 938 | } |
@@ -1747,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, | |||
1747 | key.objectid = key.offset; | 1749 | key.objectid = key.offset; |
1748 | key.offset = (u64)-1; | 1750 | key.offset = (u64)-1; |
1749 | dirid = key.objectid; | 1751 | dirid = key.objectid; |
1750 | |||
1751 | } | 1752 | } |
1752 | if (ptr < name) | 1753 | if (ptr < name) |
1753 | goto out; | 1754 | goto out; |
1754 | memcpy(name, ptr, total_len); | 1755 | memmove(name, ptr, total_len); |
1755 | name[total_len]='\0'; | 1756 | name[total_len]='\0'; |
1756 | ret = 0; | 1757 | ret = 0; |
1757 | out: | 1758 | out: |
@@ -2219,6 +2220,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2219 | !IS_ALIGNED(destoff, bs)) | 2220 | !IS_ALIGNED(destoff, bs)) |
2220 | goto out_unlock; | 2221 | goto out_unlock; |
2221 | 2222 | ||
2223 | if (destoff > inode->i_size) { | ||
2224 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); | ||
2225 | if (ret) | ||
2226 | goto out_unlock; | ||
2227 | } | ||
2228 | |||
2222 | /* do any pending delalloc/csum calc on src, one way or | 2229 | /* do any pending delalloc/csum calc on src, one way or |
2223 | another, and lock file content */ | 2230 | another, and lock file content */ |
2224 | while (1) { | 2231 | while (1) { |
@@ -2235,6 +2242,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2235 | btrfs_wait_ordered_range(src, off, len); | 2242 | btrfs_wait_ordered_range(src, off, len); |
2236 | } | 2243 | } |
2237 | 2244 | ||
2245 | /* truncate page cache pages from target inode range */ | ||
2246 | truncate_inode_pages_range(&inode->i_data, off, | ||
2247 | ALIGN(off + len, PAGE_CACHE_SIZE) - 1); | ||
2248 | |||
2238 | /* clone data */ | 2249 | /* clone data */ |
2239 | key.objectid = btrfs_ino(src); | 2250 | key.objectid = btrfs_ino(src); |
2240 | key.type = BTRFS_EXTENT_DATA_KEY; | 2251 | key.type = BTRFS_EXTENT_DATA_KEY; |
@@ -2320,14 +2331,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2320 | 2331 | ||
2321 | if (type == BTRFS_FILE_EXTENT_REG || | 2332 | if (type == BTRFS_FILE_EXTENT_REG || |
2322 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 2333 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
2334 | /* | ||
2335 | * a | --- range to clone ---| b | ||
2336 | * | ------------- extent ------------- | | ||
2337 | */ | ||
2338 | |||
2339 | /* substract range b */ | ||
2340 | if (key.offset + datal > off + len) | ||
2341 | datal = off + len - key.offset; | ||
2342 | |||
2343 | /* substract range a */ | ||
2323 | if (off > key.offset) { | 2344 | if (off > key.offset) { |
2324 | datao += off - key.offset; | 2345 | datao += off - key.offset; |
2325 | datal -= off - key.offset; | 2346 | datal -= off - key.offset; |
2326 | } | 2347 | } |
2327 | 2348 | ||
2328 | if (key.offset + datal > off + len) | ||
2329 | datal = off + len - key.offset; | ||
2330 | |||
2331 | ret = btrfs_drop_extents(trans, inode, | 2349 | ret = btrfs_drop_extents(trans, inode, |
2332 | new_key.offset, | 2350 | new_key.offset, |
2333 | new_key.offset + datal, | 2351 | new_key.offset + datal, |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 66fa43dc3f0f..d77b67c4b275 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -24,185 +24,197 @@ | |||
24 | #include "extent_io.h" | 24 | #include "extent_io.h" |
25 | #include "locking.h" | 25 | #include "locking.h" |
26 | 26 | ||
27 | static inline void spin_nested(struct extent_buffer *eb) | 27 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb); |
28 | { | ||
29 | spin_lock(&eb->lock); | ||
30 | } | ||
31 | 28 | ||
32 | /* | 29 | /* |
33 | * Setting a lock to blocking will drop the spinlock and set the | 30 | * if we currently have a spinning reader or writer lock |
34 | * flag that forces other procs who want the lock to wait. After | 31 | * (indicated by the rw flag) this will bump the count |
35 | * this you can safely schedule with the lock held. | 32 | * of blocking holders and drop the spinlock. |
36 | */ | 33 | */ |
37 | void btrfs_set_lock_blocking(struct extent_buffer *eb) | 34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) |
38 | { | 35 | { |
39 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 36 | if (rw == BTRFS_WRITE_LOCK) { |
40 | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 37 | if (atomic_read(&eb->blocking_writers) == 0) { |
41 | spin_unlock(&eb->lock); | 38 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); |
39 | atomic_dec(&eb->spinning_writers); | ||
40 | btrfs_assert_tree_locked(eb); | ||
41 | atomic_inc(&eb->blocking_writers); | ||
42 | write_unlock(&eb->lock); | ||
43 | } | ||
44 | } else if (rw == BTRFS_READ_LOCK) { | ||
45 | btrfs_assert_tree_read_locked(eb); | ||
46 | atomic_inc(&eb->blocking_readers); | ||
47 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | ||
48 | atomic_dec(&eb->spinning_readers); | ||
49 | read_unlock(&eb->lock); | ||
42 | } | 50 | } |
43 | /* exit with the spin lock released and the bit set */ | 51 | return; |
44 | } | 52 | } |
45 | 53 | ||
46 | /* | 54 | /* |
47 | * clearing the blocking flag will take the spinlock again. | 55 | * if we currently have a blocking lock, take the spinlock |
48 | * After this you can't safely schedule | 56 | * and drop our blocking count |
49 | */ | 57 | */ |
50 | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | 58 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) |
51 | { | 59 | { |
52 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 60 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { |
53 | spin_nested(eb); | 61 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); |
54 | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 62 | write_lock(&eb->lock); |
55 | smp_mb__after_clear_bit(); | 63 | WARN_ON(atomic_read(&eb->spinning_writers)); |
64 | atomic_inc(&eb->spinning_writers); | ||
65 | if (atomic_dec_and_test(&eb->blocking_writers)) | ||
66 | wake_up(&eb->write_lock_wq); | ||
67 | } else if (rw == BTRFS_READ_LOCK_BLOCKING) { | ||
68 | BUG_ON(atomic_read(&eb->blocking_readers) == 0); | ||
69 | read_lock(&eb->lock); | ||
70 | atomic_inc(&eb->spinning_readers); | ||
71 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
72 | wake_up(&eb->read_lock_wq); | ||
56 | } | 73 | } |
57 | /* exit with the spin lock held */ | 74 | return; |
58 | } | 75 | } |
59 | 76 | ||
60 | /* | 77 | /* |
61 | * unfortunately, many of the places that currently set a lock to blocking | 78 | * take a spinning read lock. This will wait for any blocking |
62 | * don't end up blocking for very long, and often they don't block | 79 | * writers |
63 | * at all. For a dbench 50 run, if we don't spin on the blocking bit | ||
64 | * at all, the context switch rate can jump up to 400,000/sec or more. | ||
65 | * | ||
66 | * So, we're still stuck with this crummy spin on the blocking bit, | ||
67 | * at least until the most common causes of the short blocks | ||
68 | * can be dealt with. | ||
69 | */ | 80 | */ |
70 | static int btrfs_spin_on_block(struct extent_buffer *eb) | 81 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
71 | { | 82 | { |
72 | int i; | 83 | again: |
73 | 84 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
74 | for (i = 0; i < 512; i++) { | 85 | read_lock(&eb->lock); |
75 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 86 | if (atomic_read(&eb->blocking_writers)) { |
76 | return 1; | 87 | read_unlock(&eb->lock); |
77 | if (need_resched()) | 88 | wait_event(eb->write_lock_wq, |
78 | break; | 89 | atomic_read(&eb->blocking_writers) == 0); |
79 | cpu_relax(); | 90 | goto again; |
80 | } | 91 | } |
81 | return 0; | 92 | atomic_inc(&eb->read_locks); |
93 | atomic_inc(&eb->spinning_readers); | ||
82 | } | 94 | } |
83 | 95 | ||
84 | /* | 96 | /* |
85 | * This is somewhat different from trylock. It will take the | 97 | * returns 1 if we get the read lock and 0 if we don't |
86 | * spinlock but if it finds the lock is set to blocking, it will | 98 | * this won't wait for blocking writers |
87 | * return without the lock held. | ||
88 | * | ||
89 | * returns 1 if it was able to take the lock and zero otherwise | ||
90 | * | ||
91 | * After this call, scheduling is not safe without first calling | ||
92 | * btrfs_set_lock_blocking() | ||
93 | */ | 99 | */ |
94 | int btrfs_try_spin_lock(struct extent_buffer *eb) | 100 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
95 | { | 101 | { |
96 | int i; | 102 | if (atomic_read(&eb->blocking_writers)) |
103 | return 0; | ||
97 | 104 | ||
98 | if (btrfs_spin_on_block(eb)) { | 105 | read_lock(&eb->lock); |
99 | spin_nested(eb); | 106 | if (atomic_read(&eb->blocking_writers)) { |
100 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 107 | read_unlock(&eb->lock); |
101 | return 1; | 108 | return 0; |
102 | spin_unlock(&eb->lock); | ||
103 | } | 109 | } |
104 | /* spin for a bit on the BLOCKING flag */ | 110 | atomic_inc(&eb->read_locks); |
105 | for (i = 0; i < 2; i++) { | 111 | atomic_inc(&eb->spinning_readers); |
106 | cpu_relax(); | 112 | return 1; |
107 | if (!btrfs_spin_on_block(eb)) | ||
108 | break; | ||
109 | |||
110 | spin_nested(eb); | ||
111 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
112 | return 1; | ||
113 | spin_unlock(&eb->lock); | ||
114 | } | ||
115 | return 0; | ||
116 | } | 113 | } |
117 | 114 | ||
118 | /* | 115 | /* |
119 | * the autoremove wake function will return 0 if it tried to wake up | 116 | * returns 1 if we get the read lock and 0 if we don't |
120 | * a process that was already awake, which means that process won't | 117 | * this won't wait for blocking writers or readers |
121 | * count as an exclusive wakeup. The waitq code will continue waking | ||
122 | * procs until it finds one that was actually sleeping. | ||
123 | * | ||
124 | * For btrfs, this isn't quite what we want. We want a single proc | ||
125 | * to be notified that the lock is ready for taking. If that proc | ||
126 | * already happen to be awake, great, it will loop around and try for | ||
127 | * the lock. | ||
128 | * | ||
129 | * So, btrfs_wake_function always returns 1, even when the proc that we | ||
130 | * tried to wake up was already awake. | ||
131 | */ | 118 | */ |
132 | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | 119 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
133 | int sync, void *key) | ||
134 | { | 120 | { |
135 | autoremove_wake_function(wait, mode, sync, key); | 121 | if (atomic_read(&eb->blocking_writers) || |
122 | atomic_read(&eb->blocking_readers)) | ||
123 | return 0; | ||
124 | write_lock(&eb->lock); | ||
125 | if (atomic_read(&eb->blocking_writers) || | ||
126 | atomic_read(&eb->blocking_readers)) { | ||
127 | write_unlock(&eb->lock); | ||
128 | return 0; | ||
129 | } | ||
130 | atomic_inc(&eb->write_locks); | ||
131 | atomic_inc(&eb->spinning_writers); | ||
136 | return 1; | 132 | return 1; |
137 | } | 133 | } |
138 | 134 | ||
139 | /* | 135 | /* |
140 | * returns with the extent buffer spinlocked. | 136 | * drop a spinning read lock |
141 | * | 137 | */ |
142 | * This will spin and/or wait as required to take the lock, and then | 138 | void btrfs_tree_read_unlock(struct extent_buffer *eb) |
143 | * return with the spinlock held. | 139 | { |
144 | * | 140 | btrfs_assert_tree_read_locked(eb); |
145 | * After this call, scheduling is not safe without first calling | 141 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); |
146 | * btrfs_set_lock_blocking() | 142 | atomic_dec(&eb->spinning_readers); |
143 | atomic_dec(&eb->read_locks); | ||
144 | read_unlock(&eb->lock); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * drop a blocking read lock | ||
149 | */ | ||
150 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | ||
151 | { | ||
152 | btrfs_assert_tree_read_locked(eb); | ||
153 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | ||
154 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
155 | wake_up(&eb->read_lock_wq); | ||
156 | atomic_dec(&eb->read_locks); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * take a spinning write lock. This will wait for both | ||
161 | * blocking readers or writers | ||
147 | */ | 162 | */ |
148 | int btrfs_tree_lock(struct extent_buffer *eb) | 163 | int btrfs_tree_lock(struct extent_buffer *eb) |
149 | { | 164 | { |
150 | DEFINE_WAIT(wait); | 165 | again: |
151 | wait.func = btrfs_wake_function; | 166 | wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); |
152 | 167 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
153 | if (!btrfs_spin_on_block(eb)) | 168 | write_lock(&eb->lock); |
154 | goto sleep; | 169 | if (atomic_read(&eb->blocking_readers)) { |
155 | 170 | write_unlock(&eb->lock); | |
156 | while(1) { | 171 | wait_event(eb->read_lock_wq, |
157 | spin_nested(eb); | 172 | atomic_read(&eb->blocking_readers) == 0); |
158 | 173 | goto again; | |
159 | /* nobody is blocking, exit with the spinlock held */ | ||
160 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
161 | return 0; | ||
162 | |||
163 | /* | ||
164 | * we have the spinlock, but the real owner is blocking. | ||
165 | * wait for them | ||
166 | */ | ||
167 | spin_unlock(&eb->lock); | ||
168 | |||
169 | /* | ||
170 | * spin for a bit, and if the blocking flag goes away, | ||
171 | * loop around | ||
172 | */ | ||
173 | cpu_relax(); | ||
174 | if (btrfs_spin_on_block(eb)) | ||
175 | continue; | ||
176 | sleep: | ||
177 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | ||
178 | TASK_UNINTERRUPTIBLE); | ||
179 | |||
180 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
181 | schedule(); | ||
182 | |||
183 | finish_wait(&eb->lock_wq, &wait); | ||
184 | } | 174 | } |
175 | if (atomic_read(&eb->blocking_writers)) { | ||
176 | write_unlock(&eb->lock); | ||
177 | wait_event(eb->write_lock_wq, | ||
178 | atomic_read(&eb->blocking_writers) == 0); | ||
179 | goto again; | ||
180 | } | ||
181 | WARN_ON(atomic_read(&eb->spinning_writers)); | ||
182 | atomic_inc(&eb->spinning_writers); | ||
183 | atomic_inc(&eb->write_locks); | ||
185 | return 0; | 184 | return 0; |
186 | } | 185 | } |
187 | 186 | ||
187 | /* | ||
188 | * drop a spinning or a blocking write lock. | ||
189 | */ | ||
188 | int btrfs_tree_unlock(struct extent_buffer *eb) | 190 | int btrfs_tree_unlock(struct extent_buffer *eb) |
189 | { | 191 | { |
190 | /* | 192 | int blockers = atomic_read(&eb->blocking_writers); |
191 | * if we were a blocking owner, we don't have the spinlock held | 193 | |
192 | * just clear the bit and look for waiters | 194 | BUG_ON(blockers > 1); |
193 | */ | 195 | |
194 | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 196 | btrfs_assert_tree_locked(eb); |
195 | smp_mb__after_clear_bit(); | 197 | atomic_dec(&eb->write_locks); |
196 | else | 198 | |
197 | spin_unlock(&eb->lock); | 199 | if (blockers) { |
198 | 200 | WARN_ON(atomic_read(&eb->spinning_writers)); | |
199 | if (waitqueue_active(&eb->lock_wq)) | 201 | atomic_dec(&eb->blocking_writers); |
200 | wake_up(&eb->lock_wq); | 202 | smp_wmb(); |
203 | wake_up(&eb->write_lock_wq); | ||
204 | } else { | ||
205 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); | ||
206 | atomic_dec(&eb->spinning_writers); | ||
207 | write_unlock(&eb->lock); | ||
208 | } | ||
201 | return 0; | 209 | return 0; |
202 | } | 210 | } |
203 | 211 | ||
204 | void btrfs_assert_tree_locked(struct extent_buffer *eb) | 212 | void btrfs_assert_tree_locked(struct extent_buffer *eb) |
205 | { | 213 | { |
206 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 214 | BUG_ON(!atomic_read(&eb->write_locks)); |
207 | assert_spin_locked(&eb->lock); | 215 | } |
216 | |||
217 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb) | ||
218 | { | ||
219 | BUG_ON(!atomic_read(&eb->read_locks)); | ||
208 | } | 220 | } |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 5c33a560a2f1..17247ddb81a0 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -19,11 +19,43 @@ | |||
19 | #ifndef __BTRFS_LOCKING_ | 19 | #ifndef __BTRFS_LOCKING_ |
20 | #define __BTRFS_LOCKING_ | 20 | #define __BTRFS_LOCKING_ |
21 | 21 | ||
22 | #define BTRFS_WRITE_LOCK 1 | ||
23 | #define BTRFS_READ_LOCK 2 | ||
24 | #define BTRFS_WRITE_LOCK_BLOCKING 3 | ||
25 | #define BTRFS_READ_LOCK_BLOCKING 4 | ||
26 | |||
22 | int btrfs_tree_lock(struct extent_buffer *eb); | 27 | int btrfs_tree_lock(struct extent_buffer *eb); |
23 | int btrfs_tree_unlock(struct extent_buffer *eb); | 28 | int btrfs_tree_unlock(struct extent_buffer *eb); |
24 | int btrfs_try_spin_lock(struct extent_buffer *eb); | 29 | int btrfs_try_spin_lock(struct extent_buffer *eb); |
25 | 30 | ||
26 | void btrfs_set_lock_blocking(struct extent_buffer *eb); | 31 | void btrfs_tree_read_lock(struct extent_buffer *eb); |
27 | void btrfs_clear_lock_blocking(struct extent_buffer *eb); | 32 | void btrfs_tree_read_unlock(struct extent_buffer *eb); |
33 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb); | ||
34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
35 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
28 | void btrfs_assert_tree_locked(struct extent_buffer *eb); | 36 | void btrfs_assert_tree_locked(struct extent_buffer *eb); |
37 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); | ||
38 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); | ||
39 | |||
40 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) | ||
41 | { | ||
42 | if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING) | ||
43 | btrfs_tree_unlock(eb); | ||
44 | else if (rw == BTRFS_READ_LOCK_BLOCKING) | ||
45 | btrfs_tree_read_unlock_blocking(eb); | ||
46 | else if (rw == BTRFS_READ_LOCK) | ||
47 | btrfs_tree_read_unlock(eb); | ||
48 | else | ||
49 | BUG(); | ||
50 | } | ||
51 | |||
52 | static inline void btrfs_set_lock_blocking(struct extent_buffer *eb) | ||
53 | { | ||
54 | btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK); | ||
55 | } | ||
56 | |||
57 | static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb) | ||
58 | { | ||
59 | btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING); | ||
60 | } | ||
29 | #endif | 61 | #endif |
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c deleted file mode 100644 index 82d569cb6267..000000000000 --- a/fs/btrfs/ref-cache.c +++ /dev/null | |||
@@ -1,68 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/sort.h> | ||
22 | #include "ctree.h" | ||
23 | #include "ref-cache.h" | ||
24 | #include "transaction.h" | ||
25 | |||
26 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | ||
27 | struct rb_node *node) | ||
28 | { | ||
29 | struct rb_node **p = &root->rb_node; | ||
30 | struct rb_node *parent = NULL; | ||
31 | struct btrfs_leaf_ref *entry; | ||
32 | |||
33 | while (*p) { | ||
34 | parent = *p; | ||
35 | entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); | ||
36 | |||
37 | if (bytenr < entry->bytenr) | ||
38 | p = &(*p)->rb_left; | ||
39 | else if (bytenr > entry->bytenr) | ||
40 | p = &(*p)->rb_right; | ||
41 | else | ||
42 | return parent; | ||
43 | } | ||
44 | |||
45 | entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); | ||
46 | rb_link_node(node, parent, p); | ||
47 | rb_insert_color(node, root); | ||
48 | return NULL; | ||
49 | } | ||
50 | |||
51 | static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) | ||
52 | { | ||
53 | struct rb_node *n = root->rb_node; | ||
54 | struct btrfs_leaf_ref *entry; | ||
55 | |||
56 | while (n) { | ||
57 | entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); | ||
58 | WARN_ON(!entry->in_tree); | ||
59 | |||
60 | if (bytenr < entry->bytenr) | ||
61 | n = n->rb_left; | ||
62 | else if (bytenr > entry->bytenr) | ||
63 | n = n->rb_right; | ||
64 | else | ||
65 | return n; | ||
66 | } | ||
67 | return NULL; | ||
68 | } | ||
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h deleted file mode 100644 index 24f7001f6387..000000000000 --- a/fs/btrfs/ref-cache.h +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | #ifndef __REFCACHE__ | ||
19 | #define __REFCACHE__ | ||
20 | |||
21 | struct btrfs_extent_info { | ||
22 | /* bytenr and num_bytes find the extent in the extent allocation tree */ | ||
23 | u64 bytenr; | ||
24 | u64 num_bytes; | ||
25 | |||
26 | /* objectid and offset find the back reference for the file */ | ||
27 | u64 objectid; | ||
28 | u64 offset; | ||
29 | }; | ||
30 | |||
31 | struct btrfs_leaf_ref { | ||
32 | struct rb_node rb_node; | ||
33 | struct btrfs_leaf_ref_tree *tree; | ||
34 | int in_tree; | ||
35 | atomic_t usage; | ||
36 | |||
37 | u64 root_gen; | ||
38 | u64 bytenr; | ||
39 | u64 owner; | ||
40 | u64 generation; | ||
41 | int nritems; | ||
42 | |||
43 | struct list_head list; | ||
44 | struct btrfs_extent_info extents[]; | ||
45 | }; | ||
46 | |||
47 | static inline size_t btrfs_leaf_ref_size(int nr_extents) | ||
48 | { | ||
49 | return sizeof(struct btrfs_leaf_ref) + | ||
50 | sizeof(struct btrfs_extent_info) * nr_extents; | ||
51 | } | ||
52 | #endif | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e0a3dc79a45..59bb1764273d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2955 | page_cache_sync_readahead(inode->i_mapping, | 2955 | page_cache_sync_readahead(inode->i_mapping, |
2956 | ra, NULL, index, | 2956 | ra, NULL, index, |
2957 | last_index + 1 - index); | 2957 | last_index + 1 - index); |
2958 | page = grab_cache_page(inode->i_mapping, index); | 2958 | page = find_or_create_page(inode->i_mapping, index, |
2959 | GFP_NOFS); | ||
2959 | if (!page) { | 2960 | if (!page) { |
2960 | btrfs_delalloc_release_metadata(inode, | 2961 | btrfs_delalloc_release_metadata(inode, |
2961 | PAGE_CACHE_SIZE); | 2962 | PAGE_CACHE_SIZE); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ebe45443de06..f4099904565a 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -71,13 +71,12 @@ out: | |||
71 | return ret; | 71 | return ret; |
72 | } | 72 | } |
73 | 73 | ||
74 | int btrfs_set_root_node(struct btrfs_root_item *item, | 74 | void btrfs_set_root_node(struct btrfs_root_item *item, |
75 | struct extent_buffer *node) | 75 | struct extent_buffer *node) |
76 | { | 76 | { |
77 | btrfs_set_root_bytenr(item, node->start); | 77 | btrfs_set_root_bytenr(item, node->start); |
78 | btrfs_set_root_level(item, btrfs_header_level(node)); | 78 | btrfs_set_root_level(item, btrfs_header_level(node)); |
79 | btrfs_set_root_generation(item, btrfs_header_generation(node)); | 79 | btrfs_set_root_generation(item, btrfs_header_generation(node)); |
80 | return 0; | ||
81 | } | 80 | } |
82 | 81 | ||
83 | /* | 82 | /* |
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c0f7ecaf1e79..bc1f6ad18442 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c | |||
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ | |||
50 | unsigned long part_offset = (unsigned long)s; \ | 50 | unsigned long part_offset = (unsigned long)s; \ |
51 | unsigned long offset = part_offset + offsetof(type, member); \ | 51 | unsigned long offset = part_offset + offsetof(type, member); \ |
52 | type *p; \ | 52 | type *p; \ |
53 | /* ugly, but we want the fast path here */ \ | 53 | int err; \ |
54 | if (eb->map_token && offset >= eb->map_start && \ | 54 | char *kaddr; \ |
55 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 55 | unsigned long map_start; \ |
56 | eb->map_len) { \ | 56 | unsigned long map_len; \ |
57 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 57 | u##bits res; \ |
58 | return le##bits##_to_cpu(p->member); \ | 58 | err = map_private_extent_buffer(eb, offset, \ |
59 | } \ | 59 | sizeof(((type *)0)->member), \ |
60 | { \ | 60 | &kaddr, &map_start, &map_len); \ |
61 | int err; \ | 61 | if (err) { \ |
62 | char *map_token; \ | 62 | __le##bits leres; \ |
63 | char *kaddr; \ | 63 | read_eb_member(eb, s, type, member, &leres); \ |
64 | int unmap_on_exit = (eb->map_token == NULL); \ | 64 | return le##bits##_to_cpu(leres); \ |
65 | unsigned long map_start; \ | 65 | } \ |
66 | unsigned long map_len; \ | 66 | p = (type *)(kaddr + part_offset - map_start); \ |
67 | u##bits res; \ | 67 | res = le##bits##_to_cpu(p->member); \ |
68 | err = map_extent_buffer(eb, offset, \ | 68 | return res; \ |
69 | sizeof(((type *)0)->member), \ | ||
70 | &map_token, &kaddr, \ | ||
71 | &map_start, &map_len, KM_USER1); \ | ||
72 | if (err) { \ | ||
73 | __le##bits leres; \ | ||
74 | read_eb_member(eb, s, type, member, &leres); \ | ||
75 | return le##bits##_to_cpu(leres); \ | ||
76 | } \ | ||
77 | p = (type *)(kaddr + part_offset - map_start); \ | ||
78 | res = le##bits##_to_cpu(p->member); \ | ||
79 | if (unmap_on_exit) \ | ||
80 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
81 | return res; \ | ||
82 | } \ | ||
83 | } \ | 69 | } \ |
84 | void btrfs_set_##name(struct extent_buffer *eb, \ | 70 | void btrfs_set_##name(struct extent_buffer *eb, \ |
85 | type *s, u##bits val) \ | 71 | type *s, u##bits val) \ |
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \ | |||
87 | unsigned long part_offset = (unsigned long)s; \ | 73 | unsigned long part_offset = (unsigned long)s; \ |
88 | unsigned long offset = part_offset + offsetof(type, member); \ | 74 | unsigned long offset = part_offset + offsetof(type, member); \ |
89 | type *p; \ | 75 | type *p; \ |
90 | /* ugly, but we want the fast path here */ \ | 76 | int err; \ |
91 | if (eb->map_token && offset >= eb->map_start && \ | 77 | char *kaddr; \ |
92 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 78 | unsigned long map_start; \ |
93 | eb->map_len) { \ | 79 | unsigned long map_len; \ |
94 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 80 | err = map_private_extent_buffer(eb, offset, \ |
95 | p->member = cpu_to_le##bits(val); \ | 81 | sizeof(((type *)0)->member), \ |
96 | return; \ | 82 | &kaddr, &map_start, &map_len); \ |
97 | } \ | 83 | if (err) { \ |
98 | { \ | 84 | __le##bits val2; \ |
99 | int err; \ | 85 | val2 = cpu_to_le##bits(val); \ |
100 | char *map_token; \ | 86 | write_eb_member(eb, s, type, member, &val2); \ |
101 | char *kaddr; \ | 87 | return; \ |
102 | int unmap_on_exit = (eb->map_token == NULL); \ | 88 | } \ |
103 | unsigned long map_start; \ | 89 | p = (type *)(kaddr + part_offset - map_start); \ |
104 | unsigned long map_len; \ | 90 | p->member = cpu_to_le##bits(val); \ |
105 | err = map_extent_buffer(eb, offset, \ | ||
106 | sizeof(((type *)0)->member), \ | ||
107 | &map_token, &kaddr, \ | ||
108 | &map_start, &map_len, KM_USER1); \ | ||
109 | if (err) { \ | ||
110 | __le##bits val2; \ | ||
111 | val2 = cpu_to_le##bits(val); \ | ||
112 | write_eb_member(eb, s, type, member, &val2); \ | ||
113 | return; \ | ||
114 | } \ | ||
115 | p = (type *)(kaddr + part_offset - map_start); \ | ||
116 | p->member = cpu_to_le##bits(val); \ | ||
117 | if (unmap_on_exit) \ | ||
118 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
119 | } \ | ||
120 | } | 91 | } |
121 | 92 | ||
122 | #include "ctree.h" | 93 | #include "ctree.h" |
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb, | |||
125 | struct btrfs_disk_key *disk_key, int nr) | 96 | struct btrfs_disk_key *disk_key, int nr) |
126 | { | 97 | { |
127 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); | 98 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); |
128 | if (eb->map_token && ptr >= eb->map_start && | ||
129 | ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { | ||
130 | memcpy(disk_key, eb->kaddr + ptr - eb->map_start, | ||
131 | sizeof(*disk_key)); | ||
132 | return; | ||
133 | } else if (eb->map_token) { | ||
134 | unmap_extent_buffer(eb, eb->map_token, KM_USER1); | ||
135 | eb->map_token = NULL; | ||
136 | } | ||
137 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, | 99 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, |
138 | struct btrfs_key_ptr, key, disk_key); | 100 | struct btrfs_key_ptr, key, disk_key); |
139 | } | 101 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 51dcec86757f..e24b7964a155 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root) | |||
216 | spin_lock(&root->fs_info->trans_lock); | 216 | spin_lock(&root->fs_info->trans_lock); |
217 | cur_trans = root->fs_info->running_transaction; | 217 | cur_trans = root->fs_info->running_transaction; |
218 | if (cur_trans && cur_trans->blocked) { | 218 | if (cur_trans && cur_trans->blocked) { |
219 | DEFINE_WAIT(wait); | ||
220 | atomic_inc(&cur_trans->use_count); | 219 | atomic_inc(&cur_trans->use_count); |
221 | spin_unlock(&root->fs_info->trans_lock); | 220 | spin_unlock(&root->fs_info->trans_lock); |
222 | while (1) { | 221 | |
223 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 222 | wait_event(root->fs_info->transaction_wait, |
224 | TASK_UNINTERRUPTIBLE); | 223 | !cur_trans->blocked); |
225 | if (!cur_trans->blocked) | ||
226 | break; | ||
227 | schedule(); | ||
228 | } | ||
229 | finish_wait(&root->fs_info->transaction_wait, &wait); | ||
230 | put_transaction(cur_trans); | 224 | put_transaction(cur_trans); |
231 | } else { | 225 | } else { |
232 | spin_unlock(&root->fs_info->trans_lock); | 226 | spin_unlock(&root->fs_info->trans_lock); |
@@ -260,7 +254,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
260 | { | 254 | { |
261 | struct btrfs_trans_handle *h; | 255 | struct btrfs_trans_handle *h; |
262 | struct btrfs_transaction *cur_trans; | 256 | struct btrfs_transaction *cur_trans; |
263 | int retries = 0; | 257 | u64 num_bytes = 0; |
264 | int ret; | 258 | int ret; |
265 | 259 | ||
266 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 260 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
@@ -274,6 +268,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
274 | h->block_rsv = NULL; | 268 | h->block_rsv = NULL; |
275 | goto got_it; | 269 | goto got_it; |
276 | } | 270 | } |
271 | |||
272 | /* | ||
273 | * Do the reservation before we join the transaction so we can do all | ||
274 | * the appropriate flushing if need be. | ||
275 | */ | ||
276 | if (num_items > 0 && root != root->fs_info->chunk_root) { | ||
277 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
278 | ret = btrfs_block_rsv_add(NULL, root, | ||
279 | &root->fs_info->trans_block_rsv, | ||
280 | num_bytes); | ||
281 | if (ret) | ||
282 | return ERR_PTR(ret); | ||
283 | } | ||
277 | again: | 284 | again: |
278 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 285 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
279 | if (!h) | 286 | if (!h) |
@@ -310,24 +317,9 @@ again: | |||
310 | goto again; | 317 | goto again; |
311 | } | 318 | } |
312 | 319 | ||
313 | if (num_items > 0) { | 320 | if (num_bytes) { |
314 | ret = btrfs_trans_reserve_metadata(h, root, num_items); | 321 | h->block_rsv = &root->fs_info->trans_block_rsv; |
315 | if (ret == -EAGAIN && !retries) { | 322 | h->bytes_reserved = num_bytes; |
316 | retries++; | ||
317 | btrfs_commit_transaction(h, root); | ||
318 | goto again; | ||
319 | } else if (ret == -EAGAIN) { | ||
320 | /* | ||
321 | * We have already retried and got EAGAIN, so really we | ||
322 | * don't have space, so set ret to -ENOSPC. | ||
323 | */ | ||
324 | ret = -ENOSPC; | ||
325 | } | ||
326 | |||
327 | if (ret < 0) { | ||
328 | btrfs_end_transaction(h, root); | ||
329 | return ERR_PTR(ret); | ||
330 | } | ||
331 | } | 323 | } |
332 | 324 | ||
333 | got_it: | 325 | got_it: |
@@ -359,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root | |||
359 | } | 351 | } |
360 | 352 | ||
361 | /* wait for a transaction commit to be fully complete */ | 353 | /* wait for a transaction commit to be fully complete */ |
362 | static noinline int wait_for_commit(struct btrfs_root *root, | 354 | static noinline void wait_for_commit(struct btrfs_root *root, |
363 | struct btrfs_transaction *commit) | 355 | struct btrfs_transaction *commit) |
364 | { | 356 | { |
365 | DEFINE_WAIT(wait); | 357 | wait_event(commit->commit_wait, commit->commit_done); |
366 | while (!commit->commit_done) { | ||
367 | prepare_to_wait(&commit->commit_wait, &wait, | ||
368 | TASK_UNINTERRUPTIBLE); | ||
369 | if (commit->commit_done) | ||
370 | break; | ||
371 | schedule(); | ||
372 | } | ||
373 | finish_wait(&commit->commit_wait, &wait); | ||
374 | return 0; | ||
375 | } | 358 | } |
376 | 359 | ||
377 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | 360 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
@@ -499,10 +482,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
499 | } | 482 | } |
500 | 483 | ||
501 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 484 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
502 | if (throttle) | 485 | if (throttle) { |
486 | /* | ||
487 | * We may race with somebody else here so end up having | ||
488 | * to call end_transaction on ourselves again, so inc | ||
489 | * our use_count. | ||
490 | */ | ||
491 | trans->use_count++; | ||
503 | return btrfs_commit_transaction(trans, root); | 492 | return btrfs_commit_transaction(trans, root); |
504 | else | 493 | } else { |
505 | wake_up_process(info->transaction_kthread); | 494 | wake_up_process(info->transaction_kthread); |
495 | } | ||
506 | } | 496 | } |
507 | 497 | ||
508 | WARN_ON(cur_trans != info->running_transaction); | 498 | WARN_ON(cur_trans != info->running_transaction); |
@@ -894,6 +884,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
894 | struct btrfs_root *tree_root = fs_info->tree_root; | 884 | struct btrfs_root *tree_root = fs_info->tree_root; |
895 | struct btrfs_root *root = pending->root; | 885 | struct btrfs_root *root = pending->root; |
896 | struct btrfs_root *parent_root; | 886 | struct btrfs_root *parent_root; |
887 | struct btrfs_block_rsv *rsv; | ||
897 | struct inode *parent_inode; | 888 | struct inode *parent_inode; |
898 | struct dentry *parent; | 889 | struct dentry *parent; |
899 | struct dentry *dentry; | 890 | struct dentry *dentry; |
@@ -905,6 +896,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
905 | u64 objectid; | 896 | u64 objectid; |
906 | u64 root_flags; | 897 | u64 root_flags; |
907 | 898 | ||
899 | rsv = trans->block_rsv; | ||
900 | |||
908 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 901 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
909 | if (!new_root_item) { | 902 | if (!new_root_item) { |
910 | pending->error = -ENOMEM; | 903 | pending->error = -ENOMEM; |
@@ -1012,6 +1005,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1012 | btrfs_orphan_post_snapshot(trans, pending); | 1005 | btrfs_orphan_post_snapshot(trans, pending); |
1013 | fail: | 1006 | fail: |
1014 | kfree(new_root_item); | 1007 | kfree(new_root_item); |
1008 | trans->block_rsv = rsv; | ||
1015 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); | 1009 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
1016 | return 0; | 1010 | return 0; |
1017 | } | 1011 | } |
@@ -1080,22 +1074,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) | |||
1080 | static void wait_current_trans_commit_start(struct btrfs_root *root, | 1074 | static void wait_current_trans_commit_start(struct btrfs_root *root, |
1081 | struct btrfs_transaction *trans) | 1075 | struct btrfs_transaction *trans) |
1082 | { | 1076 | { |
1083 | DEFINE_WAIT(wait); | 1077 | wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); |
1084 | |||
1085 | if (trans->in_commit) | ||
1086 | return; | ||
1087 | |||
1088 | while (1) { | ||
1089 | prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, | ||
1090 | TASK_UNINTERRUPTIBLE); | ||
1091 | if (trans->in_commit) { | ||
1092 | finish_wait(&root->fs_info->transaction_blocked_wait, | ||
1093 | &wait); | ||
1094 | break; | ||
1095 | } | ||
1096 | schedule(); | ||
1097 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | ||
1098 | } | ||
1099 | } | 1078 | } |
1100 | 1079 | ||
1101 | /* | 1080 | /* |
@@ -1105,24 +1084,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, | |||
1105 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | 1084 | static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, |
1106 | struct btrfs_transaction *trans) | 1085 | struct btrfs_transaction *trans) |
1107 | { | 1086 | { |
1108 | DEFINE_WAIT(wait); | 1087 | wait_event(root->fs_info->transaction_wait, |
1109 | 1088 | trans->commit_done || (trans->in_commit && !trans->blocked)); | |
1110 | if (trans->commit_done || (trans->in_commit && !trans->blocked)) | ||
1111 | return; | ||
1112 | |||
1113 | while (1) { | ||
1114 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | ||
1115 | TASK_UNINTERRUPTIBLE); | ||
1116 | if (trans->commit_done || | ||
1117 | (trans->in_commit && !trans->blocked)) { | ||
1118 | finish_wait(&root->fs_info->transaction_wait, | ||
1119 | &wait); | ||
1120 | break; | ||
1121 | } | ||
1122 | schedule(); | ||
1123 | finish_wait(&root->fs_info->transaction_wait, | ||
1124 | &wait); | ||
1125 | } | ||
1126 | } | 1089 | } |
1127 | 1090 | ||
1128 | /* | 1091 | /* |
@@ -1229,8 +1192,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1229 | atomic_inc(&cur_trans->use_count); | 1192 | atomic_inc(&cur_trans->use_count); |
1230 | btrfs_end_transaction(trans, root); | 1193 | btrfs_end_transaction(trans, root); |
1231 | 1194 | ||
1232 | ret = wait_for_commit(root, cur_trans); | 1195 | wait_for_commit(root, cur_trans); |
1233 | BUG_ON(ret); | ||
1234 | 1196 | ||
1235 | put_transaction(cur_trans); | 1197 | put_transaction(cur_trans); |
1236 | 1198 | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f41d1e..786639fca067 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | |||
799 | struct extent_buffer *eb, int slot, | 799 | struct extent_buffer *eb, int slot, |
800 | struct btrfs_key *key) | 800 | struct btrfs_key *key) |
801 | { | 801 | { |
802 | struct inode *dir; | ||
803 | int ret; | ||
804 | struct btrfs_inode_ref *ref; | 802 | struct btrfs_inode_ref *ref; |
803 | struct btrfs_dir_item *di; | ||
804 | struct inode *dir; | ||
805 | struct inode *inode; | 805 | struct inode *inode; |
806 | char *name; | ||
807 | int namelen; | ||
808 | unsigned long ref_ptr; | 806 | unsigned long ref_ptr; |
809 | unsigned long ref_end; | 807 | unsigned long ref_end; |
808 | char *name; | ||
809 | int namelen; | ||
810 | int ret; | ||
810 | int search_done = 0; | 811 | int search_done = 0; |
811 | 812 | ||
812 | /* | 813 | /* |
@@ -909,6 +910,25 @@ again: | |||
909 | } | 910 | } |
910 | btrfs_release_path(path); | 911 | btrfs_release_path(path); |
911 | 912 | ||
913 | /* look for a conflicting sequence number */ | ||
914 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | ||
915 | btrfs_inode_ref_index(eb, ref), | ||
916 | name, namelen, 0); | ||
917 | if (di && !IS_ERR(di)) { | ||
918 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
919 | BUG_ON(ret); | ||
920 | } | ||
921 | btrfs_release_path(path); | ||
922 | |||
923 | /* look for a conflicing name */ | ||
924 | di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), | ||
925 | name, namelen, 0); | ||
926 | if (di && !IS_ERR(di)) { | ||
927 | ret = drop_one_dir_item(trans, root, path, dir, di); | ||
928 | BUG_ON(ret); | ||
929 | } | ||
930 | btrfs_release_path(path); | ||
931 | |||
912 | insert: | 932 | insert: |
913 | /* insert our name */ | 933 | /* insert our name */ |
914 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | 934 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, |
@@ -1617,7 +1637,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1617 | return 0; | 1637 | return 0; |
1618 | 1638 | ||
1619 | path = btrfs_alloc_path(); | 1639 | path = btrfs_alloc_path(); |
1620 | BUG_ON(!path); | 1640 | if (!path) |
1641 | return -ENOMEM; | ||
1621 | 1642 | ||
1622 | nritems = btrfs_header_nritems(eb); | 1643 | nritems = btrfs_header_nritems(eb); |
1623 | for (i = 0; i < nritems; i++) { | 1644 | for (i = 0; i < nritems; i++) { |
@@ -1723,15 +1744,17 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1723 | return -ENOMEM; | 1744 | return -ENOMEM; |
1724 | 1745 | ||
1725 | if (*level == 1) { | 1746 | if (*level == 1) { |
1726 | wc->process_func(root, next, wc, ptr_gen); | 1747 | ret = wc->process_func(root, next, wc, ptr_gen); |
1748 | if (ret) | ||
1749 | return ret; | ||
1727 | 1750 | ||
1728 | path->slots[*level]++; | 1751 | path->slots[*level]++; |
1729 | if (wc->free) { | 1752 | if (wc->free) { |
1730 | btrfs_read_buffer(next, ptr_gen); | 1753 | btrfs_read_buffer(next, ptr_gen); |
1731 | 1754 | ||
1732 | btrfs_tree_lock(next); | 1755 | btrfs_tree_lock(next); |
1733 | clean_tree_block(trans, root, next); | ||
1734 | btrfs_set_lock_blocking(next); | 1756 | btrfs_set_lock_blocking(next); |
1757 | clean_tree_block(trans, root, next); | ||
1735 | btrfs_wait_tree_block_writeback(next); | 1758 | btrfs_wait_tree_block_writeback(next); |
1736 | btrfs_tree_unlock(next); | 1759 | btrfs_tree_unlock(next); |
1737 | 1760 | ||
@@ -1788,16 +1811,19 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1788 | parent = path->nodes[*level + 1]; | 1811 | parent = path->nodes[*level + 1]; |
1789 | 1812 | ||
1790 | root_owner = btrfs_header_owner(parent); | 1813 | root_owner = btrfs_header_owner(parent); |
1791 | wc->process_func(root, path->nodes[*level], wc, | 1814 | ret = wc->process_func(root, path->nodes[*level], wc, |
1792 | btrfs_header_generation(path->nodes[*level])); | 1815 | btrfs_header_generation(path->nodes[*level])); |
1816 | if (ret) | ||
1817 | return ret; | ||
1818 | |||
1793 | if (wc->free) { | 1819 | if (wc->free) { |
1794 | struct extent_buffer *next; | 1820 | struct extent_buffer *next; |
1795 | 1821 | ||
1796 | next = path->nodes[*level]; | 1822 | next = path->nodes[*level]; |
1797 | 1823 | ||
1798 | btrfs_tree_lock(next); | 1824 | btrfs_tree_lock(next); |
1799 | clean_tree_block(trans, root, next); | ||
1800 | btrfs_set_lock_blocking(next); | 1825 | btrfs_set_lock_blocking(next); |
1826 | clean_tree_block(trans, root, next); | ||
1801 | btrfs_wait_tree_block_writeback(next); | 1827 | btrfs_wait_tree_block_writeback(next); |
1802 | btrfs_tree_unlock(next); | 1828 | btrfs_tree_unlock(next); |
1803 | 1829 | ||
@@ -1864,8 +1890,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1864 | next = path->nodes[orig_level]; | 1890 | next = path->nodes[orig_level]; |
1865 | 1891 | ||
1866 | btrfs_tree_lock(next); | 1892 | btrfs_tree_lock(next); |
1867 | clean_tree_block(trans, log, next); | ||
1868 | btrfs_set_lock_blocking(next); | 1893 | btrfs_set_lock_blocking(next); |
1894 | clean_tree_block(trans, log, next); | ||
1869 | btrfs_wait_tree_block_writeback(next); | 1895 | btrfs_wait_tree_block_writeback(next); |
1870 | btrfs_tree_unlock(next); | 1896 | btrfs_tree_unlock(next); |
1871 | 1897 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19450bc53632..f2a4cc79da61 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
142 | unsigned long limit; | 142 | unsigned long limit; |
143 | unsigned long last_waited = 0; | 143 | unsigned long last_waited = 0; |
144 | int force_reg = 0; | 144 | int force_reg = 0; |
145 | int sync_pending = 0; | ||
145 | struct blk_plug plug; | 146 | struct blk_plug plug; |
146 | 147 | ||
147 | /* | 148 | /* |
@@ -229,6 +230,22 @@ loop_lock: | |||
229 | 230 | ||
230 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 231 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
231 | 232 | ||
233 | /* | ||
234 | * if we're doing the sync list, record that our | ||
235 | * plug has some sync requests on it | ||
236 | * | ||
237 | * If we're doing the regular list and there are | ||
238 | * sync requests sitting around, unplug before | ||
239 | * we add more | ||
240 | */ | ||
241 | if (pending_bios == &device->pending_sync_bios) { | ||
242 | sync_pending = 1; | ||
243 | } else if (sync_pending) { | ||
244 | blk_finish_plug(&plug); | ||
245 | blk_start_plug(&plug); | ||
246 | sync_pending = 0; | ||
247 | } | ||
248 | |||
232 | submit_bio(cur->bi_rw, cur); | 249 | submit_bio(cur->bi_rw, cur); |
233 | num_run++; | 250 | num_run++; |
234 | batch_run++; | 251 | batch_run++; |
@@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
500 | fs_devices->rw_devices--; | 517 | fs_devices->rw_devices--; |
501 | } | 518 | } |
502 | 519 | ||
520 | if (device->can_discard) | ||
521 | fs_devices->num_can_discard--; | ||
522 | |||
503 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); | 523 | new_device = kmalloc(sizeof(*new_device), GFP_NOFS); |
504 | BUG_ON(!new_device); | 524 | BUG_ON(!new_device); |
505 | memcpy(new_device, device, sizeof(*new_device)); | 525 | memcpy(new_device, device, sizeof(*new_device)); |
@@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
508 | new_device->bdev = NULL; | 528 | new_device->bdev = NULL; |
509 | new_device->writeable = 0; | 529 | new_device->writeable = 0; |
510 | new_device->in_fs_metadata = 0; | 530 | new_device->in_fs_metadata = 0; |
531 | new_device->can_discard = 0; | ||
511 | list_replace_rcu(&device->dev_list, &new_device->dev_list); | 532 | list_replace_rcu(&device->dev_list, &new_device->dev_list); |
512 | 533 | ||
513 | call_rcu(&device->rcu, free_device); | 534 | call_rcu(&device->rcu, free_device); |
@@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
547 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | 568 | static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, |
548 | fmode_t flags, void *holder) | 569 | fmode_t flags, void *holder) |
549 | { | 570 | { |
571 | struct request_queue *q; | ||
550 | struct block_device *bdev; | 572 | struct block_device *bdev; |
551 | struct list_head *head = &fs_devices->devices; | 573 | struct list_head *head = &fs_devices->devices; |
552 | struct btrfs_device *device; | 574 | struct btrfs_device *device; |
@@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
603 | seeding = 0; | 625 | seeding = 0; |
604 | } | 626 | } |
605 | 627 | ||
628 | q = bdev_get_queue(bdev); | ||
629 | if (blk_queue_discard(q)) { | ||
630 | device->can_discard = 1; | ||
631 | fs_devices->num_can_discard++; | ||
632 | } | ||
633 | |||
606 | device->bdev = bdev; | 634 | device->bdev = bdev; |
607 | device->in_fs_metadata = 0; | 635 | device->in_fs_metadata = 0; |
608 | device->mode = flags; | 636 | device->mode = flags; |
@@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
835 | 863 | ||
836 | max_hole_start = search_start; | 864 | max_hole_start = search_start; |
837 | max_hole_size = 0; | 865 | max_hole_size = 0; |
866 | hole_size = 0; | ||
838 | 867 | ||
839 | if (search_start >= search_end) { | 868 | if (search_start >= search_end) { |
840 | ret = -ENOSPC; | 869 | ret = -ENOSPC; |
@@ -917,7 +946,14 @@ next: | |||
917 | cond_resched(); | 946 | cond_resched(); |
918 | } | 947 | } |
919 | 948 | ||
920 | hole_size = search_end- search_start; | 949 | /* |
950 | * At this point, search_start should be the end of | ||
951 | * allocated dev extents, and when shrinking the device, | ||
952 | * search_end may be smaller than search_start. | ||
953 | */ | ||
954 | if (search_end > search_start) | ||
955 | hole_size = search_end - search_start; | ||
956 | |||
921 | if (hole_size > max_hole_size) { | 957 | if (hole_size > max_hole_size) { |
922 | max_hole_start = search_start; | 958 | max_hole_start = search_start; |
923 | max_hole_size = hole_size; | 959 | max_hole_size = hole_size; |
@@ -1037,7 +1073,8 @@ static noinline int find_next_chunk(struct btrfs_root *root, | |||
1037 | struct btrfs_key found_key; | 1073 | struct btrfs_key found_key; |
1038 | 1074 | ||
1039 | path = btrfs_alloc_path(); | 1075 | path = btrfs_alloc_path(); |
1040 | BUG_ON(!path); | 1076 | if (!path) |
1077 | return -ENOMEM; | ||
1041 | 1078 | ||
1042 | key.objectid = objectid; | 1079 | key.objectid = objectid; |
1043 | key.offset = (u64)-1; | 1080 | key.offset = (u64)-1; |
@@ -1542,6 +1579,7 @@ error: | |||
1542 | 1579 | ||
1543 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | 1580 | int btrfs_init_new_device(struct btrfs_root *root, char *device_path) |
1544 | { | 1581 | { |
1582 | struct request_queue *q; | ||
1545 | struct btrfs_trans_handle *trans; | 1583 | struct btrfs_trans_handle *trans; |
1546 | struct btrfs_device *device; | 1584 | struct btrfs_device *device; |
1547 | struct block_device *bdev; | 1585 | struct block_device *bdev; |
@@ -1611,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1611 | 1649 | ||
1612 | lock_chunks(root); | 1650 | lock_chunks(root); |
1613 | 1651 | ||
1652 | q = bdev_get_queue(bdev); | ||
1653 | if (blk_queue_discard(q)) | ||
1654 | device->can_discard = 1; | ||
1614 | device->writeable = 1; | 1655 | device->writeable = 1; |
1615 | device->work.func = pending_bios_fn; | 1656 | device->work.func = pending_bios_fn; |
1616 | generate_random_uuid(device->uuid); | 1657 | generate_random_uuid(device->uuid); |
@@ -1646,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1646 | root->fs_info->fs_devices->num_devices++; | 1687 | root->fs_info->fs_devices->num_devices++; |
1647 | root->fs_info->fs_devices->open_devices++; | 1688 | root->fs_info->fs_devices->open_devices++; |
1648 | root->fs_info->fs_devices->rw_devices++; | 1689 | root->fs_info->fs_devices->rw_devices++; |
1690 | if (device->can_discard) | ||
1691 | root->fs_info->fs_devices->num_can_discard++; | ||
1649 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1692 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
1650 | 1693 | ||
1651 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 1694 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
@@ -2061,8 +2104,10 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
2061 | 2104 | ||
2062 | /* step two, relocate all the chunks */ | 2105 | /* step two, relocate all the chunks */ |
2063 | path = btrfs_alloc_path(); | 2106 | path = btrfs_alloc_path(); |
2064 | BUG_ON(!path); | 2107 | if (!path) { |
2065 | 2108 | ret = -ENOMEM; | |
2109 | goto error; | ||
2110 | } | ||
2066 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 2111 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2067 | key.offset = (u64)-1; | 2112 | key.offset = (u64)-1; |
2068 | key.type = BTRFS_CHUNK_ITEM_KEY; | 2113 | key.type = BTRFS_CHUNK_ITEM_KEY; |
@@ -2410,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2410 | total_avail = device->total_bytes - device->bytes_used; | 2455 | total_avail = device->total_bytes - device->bytes_used; |
2411 | else | 2456 | else |
2412 | total_avail = 0; | 2457 | total_avail = 0; |
2413 | /* avail is off by max(alloc_start, 1MB), but that is the same | 2458 | |
2414 | * for all devices, so it doesn't hurt the sorting later on | 2459 | /* If there is no space on this device, skip it. */ |
2415 | */ | 2460 | if (total_avail == 0) |
2461 | continue; | ||
2416 | 2462 | ||
2417 | ret = find_free_dev_extent(trans, device, | 2463 | ret = find_free_dev_extent(trans, device, |
2418 | max_stripe_size * dev_stripes, | 2464 | max_stripe_size * dev_stripes, |
@@ -2661,7 +2707,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
2661 | 2707 | ||
2662 | ret = find_next_chunk(fs_info->chunk_root, | 2708 | ret = find_next_chunk(fs_info->chunk_root, |
2663 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); | 2709 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); |
2664 | BUG_ON(ret); | 2710 | if (ret) |
2711 | return ret; | ||
2665 | 2712 | ||
2666 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | | 2713 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | |
2667 | (fs_info->metadata_alloc_profile & | 2714 | (fs_info->metadata_alloc_profile & |
@@ -3595,7 +3642,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
3595 | if (!sb) | 3642 | if (!sb) |
3596 | return -ENOMEM; | 3643 | return -ENOMEM; |
3597 | btrfs_set_buffer_uptodate(sb); | 3644 | btrfs_set_buffer_uptodate(sb); |
3598 | btrfs_set_buffer_lockdep_class(sb, 0); | 3645 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); |
3599 | 3646 | ||
3600 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); | 3647 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); |
3601 | array_size = btrfs_super_sys_array_size(super_copy); | 3648 | array_size = btrfs_super_sys_array_size(super_copy); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61ae7ae..6d866db4e177 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -48,6 +48,7 @@ struct btrfs_device { | |||
48 | int writeable; | 48 | int writeable; |
49 | int in_fs_metadata; | 49 | int in_fs_metadata; |
50 | int missing; | 50 | int missing; |
51 | int can_discard; | ||
51 | 52 | ||
52 | spinlock_t io_lock; | 53 | spinlock_t io_lock; |
53 | 54 | ||
@@ -104,6 +105,7 @@ struct btrfs_fs_devices { | |||
104 | u64 rw_devices; | 105 | u64 rw_devices; |
105 | u64 missing_devices; | 106 | u64 missing_devices; |
106 | u64 total_rw_bytes; | 107 | u64 total_rw_bytes; |
108 | u64 num_can_discard; | ||
107 | struct block_device *latest_bdev; | 109 | struct block_device *latest_bdev; |
108 | 110 | ||
109 | /* all of the devices in the FS, protected by a mutex | 111 | /* all of the devices in the FS, protected by a mutex |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 5366fe452ab0..69565e5fc6a0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -102,48 +102,71 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
102 | if (!path) | 102 | if (!path) |
103 | return -ENOMEM; | 103 | return -ENOMEM; |
104 | 104 | ||
105 | /* first lets see if we already have this xattr */ | 105 | if (flags & XATTR_REPLACE) { |
106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, | 106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, |
107 | strlen(name), -1); | 107 | name_len, -1); |
108 | if (IS_ERR(di)) { | 108 | if (IS_ERR(di)) { |
109 | ret = PTR_ERR(di); | 109 | ret = PTR_ERR(di); |
110 | goto out; | 110 | goto out; |
111 | } | 111 | } else if (!di) { |
112 | 112 | ret = -ENODATA; | |
113 | /* ok we already have this xattr, lets remove it */ | ||
114 | if (di) { | ||
115 | /* if we want create only exit */ | ||
116 | if (flags & XATTR_CREATE) { | ||
117 | ret = -EEXIST; | ||
118 | goto out; | 113 | goto out; |
119 | } | 114 | } |
120 | |||
121 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 115 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
122 | BUG_ON(ret); | 116 | if (ret) |
117 | goto out; | ||
123 | btrfs_release_path(path); | 118 | btrfs_release_path(path); |
124 | 119 | ||
125 | /* if we don't have a value then we are removing the xattr */ | 120 | /* |
121 | * remove the attribute | ||
122 | */ | ||
126 | if (!value) | 123 | if (!value) |
127 | goto out; | 124 | goto out; |
128 | } else { | 125 | } |
126 | |||
127 | again: | ||
128 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
129 | name, name_len, value, size); | ||
130 | if (ret == -EEXIST) { | ||
131 | if (flags & XATTR_CREATE) | ||
132 | goto out; | ||
133 | /* | ||
134 | * We can't use the path we already have since we won't have the | ||
135 | * proper locking for a delete, so release the path and | ||
136 | * re-lookup to delete the thing. | ||
137 | */ | ||
129 | btrfs_release_path(path); | 138 | btrfs_release_path(path); |
139 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), | ||
140 | name, name_len, -1); | ||
141 | if (IS_ERR(di)) { | ||
142 | ret = PTR_ERR(di); | ||
143 | goto out; | ||
144 | } else if (!di) { | ||
145 | /* Shouldn't happen but just in case... */ | ||
146 | btrfs_release_path(path); | ||
147 | goto again; | ||
148 | } | ||
130 | 149 | ||
131 | if (flags & XATTR_REPLACE) { | 150 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
132 | /* we couldn't find the attr to replace */ | 151 | if (ret) |
133 | ret = -ENODATA; | ||
134 | goto out; | 152 | goto out; |
153 | |||
154 | /* | ||
155 | * We have a value to set, so go back and try to insert it now. | ||
156 | */ | ||
157 | if (value) { | ||
158 | btrfs_release_path(path); | ||
159 | goto again; | ||
135 | } | 160 | } |
136 | } | 161 | } |
137 | |||
138 | /* ok we have to create a completely new xattr */ | ||
139 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
140 | name, name_len, value, size); | ||
141 | BUG_ON(ret); | ||
142 | out: | 162 | out: |
143 | btrfs_free_path(path); | 163 | btrfs_free_path(path); |
144 | return ret; | 164 | return ret; |
145 | } | 165 | } |
146 | 166 | ||
167 | /* | ||
168 | * @value: "" makes the attribute to empty, NULL removes it | ||
169 | */ | ||
147 | int __btrfs_setxattr(struct btrfs_trans_handle *trans, | 170 | int __btrfs_setxattr(struct btrfs_trans_handle *trans, |
148 | struct inode *inode, const char *name, | 171 | struct inode *inode, const char *name, |
149 | const void *value, size_t size, int flags) | 172 | const void *value, size_t size, int flags) |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 0dba6915712b..fb962efdacee 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -102,7 +102,7 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
102 | path = NULL; | 102 | path = NULL; |
103 | spin_lock(&req->r_old_dentry->d_lock); | 103 | spin_lock(&req->r_old_dentry->d_lock); |
104 | seq_printf(s, " #%llx/%.*s (%s)", | 104 | seq_printf(s, " #%llx/%.*s (%s)", |
105 | ceph_ino(req->r_old_dentry->d_parent->d_inode), | 105 | ceph_ino(req->r_old_dentry_dir), |
106 | req->r_old_dentry->d_name.len, | 106 | req->r_old_dentry->d_name.len, |
107 | req->r_old_dentry->d_name.name, | 107 | req->r_old_dentry->d_name.name, |
108 | path ? path : ""); | 108 | path ? path : ""); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1065ac779840..382abc9a6a54 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry) | |||
40 | if (dentry->d_fsdata) | 40 | if (dentry->d_fsdata) |
41 | return 0; | 41 | return 0; |
42 | 42 | ||
43 | if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ | ||
44 | ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) | ||
45 | d_set_d_op(dentry, &ceph_dentry_ops); | ||
46 | else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) | ||
47 | d_set_d_op(dentry, &ceph_snapdir_dentry_ops); | ||
48 | else | ||
49 | d_set_d_op(dentry, &ceph_snap_dentry_ops); | ||
50 | |||
51 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); | 43 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); |
52 | if (!di) | 44 | if (!di) |
53 | return -ENOMEM; /* oh well */ | 45 | return -ENOMEM; /* oh well */ |
@@ -58,16 +50,42 @@ int ceph_init_dentry(struct dentry *dentry) | |||
58 | kmem_cache_free(ceph_dentry_cachep, di); | 50 | kmem_cache_free(ceph_dentry_cachep, di); |
59 | goto out_unlock; | 51 | goto out_unlock; |
60 | } | 52 | } |
53 | |||
54 | if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ | ||
55 | ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) | ||
56 | d_set_d_op(dentry, &ceph_dentry_ops); | ||
57 | else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) | ||
58 | d_set_d_op(dentry, &ceph_snapdir_dentry_ops); | ||
59 | else | ||
60 | d_set_d_op(dentry, &ceph_snap_dentry_ops); | ||
61 | |||
61 | di->dentry = dentry; | 62 | di->dentry = dentry; |
62 | di->lease_session = NULL; | 63 | di->lease_session = NULL; |
63 | dentry->d_fsdata = di; | ||
64 | dentry->d_time = jiffies; | 64 | dentry->d_time = jiffies; |
65 | /* avoid reordering d_fsdata setup so that the check above is safe */ | ||
66 | smp_mb(); | ||
67 | dentry->d_fsdata = di; | ||
65 | ceph_dentry_lru_add(dentry); | 68 | ceph_dentry_lru_add(dentry); |
66 | out_unlock: | 69 | out_unlock: |
67 | spin_unlock(&dentry->d_lock); | 70 | spin_unlock(&dentry->d_lock); |
68 | return 0; | 71 | return 0; |
69 | } | 72 | } |
70 | 73 | ||
74 | struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) | ||
75 | { | ||
76 | struct inode *inode = NULL; | ||
77 | |||
78 | if (!dentry) | ||
79 | return NULL; | ||
80 | |||
81 | spin_lock(&dentry->d_lock); | ||
82 | if (dentry->d_parent) { | ||
83 | inode = dentry->d_parent->d_inode; | ||
84 | ihold(inode); | ||
85 | } | ||
86 | spin_unlock(&dentry->d_lock); | ||
87 | return inode; | ||
88 | } | ||
71 | 89 | ||
72 | 90 | ||
73 | /* | 91 | /* |
@@ -133,7 +151,7 @@ more: | |||
133 | d_unhashed(dentry) ? "!hashed" : "hashed", | 151 | d_unhashed(dentry) ? "!hashed" : "hashed", |
134 | parent->d_subdirs.prev, parent->d_subdirs.next); | 152 | parent->d_subdirs.prev, parent->d_subdirs.next); |
135 | if (p == &parent->d_subdirs) { | 153 | if (p == &parent->d_subdirs) { |
136 | fi->at_end = 1; | 154 | fi->flags |= CEPH_F_ATEND; |
137 | goto out_unlock; | 155 | goto out_unlock; |
138 | } | 156 | } |
139 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 157 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
@@ -234,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
234 | const int max_bytes = fsc->mount_options->max_readdir_bytes; | 252 | const int max_bytes = fsc->mount_options->max_readdir_bytes; |
235 | 253 | ||
236 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 254 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
237 | if (fi->at_end) | 255 | if (fi->flags & CEPH_F_ATEND) |
238 | return 0; | 256 | return 0; |
239 | 257 | ||
240 | /* always start with . and .. */ | 258 | /* always start with . and .. */ |
@@ -403,7 +421,7 @@ more: | |||
403 | dout("readdir next frag is %x\n", frag); | 421 | dout("readdir next frag is %x\n", frag); |
404 | goto more; | 422 | goto more; |
405 | } | 423 | } |
406 | fi->at_end = 1; | 424 | fi->flags |= CEPH_F_ATEND; |
407 | 425 | ||
408 | /* | 426 | /* |
409 | * if dir_release_count still matches the dir, no dentries | 427 | * if dir_release_count still matches the dir, no dentries |
@@ -435,7 +453,7 @@ static void reset_readdir(struct ceph_file_info *fi) | |||
435 | dput(fi->dentry); | 453 | dput(fi->dentry); |
436 | fi->dentry = NULL; | 454 | fi->dentry = NULL; |
437 | } | 455 | } |
438 | fi->at_end = 0; | 456 | fi->flags &= ~CEPH_F_ATEND; |
439 | } | 457 | } |
440 | 458 | ||
441 | static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | 459 | static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) |
@@ -463,7 +481,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
463 | if (offset != file->f_pos) { | 481 | if (offset != file->f_pos) { |
464 | file->f_pos = offset; | 482 | file->f_pos = offset; |
465 | file->f_version = 0; | 483 | file->f_version = 0; |
466 | fi->at_end = 0; | 484 | fi->flags &= ~CEPH_F_ATEND; |
467 | } | 485 | } |
468 | retval = offset; | 486 | retval = offset; |
469 | 487 | ||
@@ -488,21 +506,13 @@ out: | |||
488 | } | 506 | } |
489 | 507 | ||
490 | /* | 508 | /* |
491 | * Process result of a lookup/open request. | 509 | * Handle lookups for the hidden .snap directory. |
492 | * | ||
493 | * Mainly, make sure we return the final req->r_dentry (if it already | ||
494 | * existed) in place of the original VFS-provided dentry when they | ||
495 | * differ. | ||
496 | * | ||
497 | * Gracefully handle the case where the MDS replies with -ENOENT and | ||
498 | * no trace (which it may do, at its discretion, e.g., if it doesn't | ||
499 | * care to issue a lease on the negative dentry). | ||
500 | */ | 510 | */ |
501 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 511 | int ceph_handle_snapdir(struct ceph_mds_request *req, |
502 | struct dentry *dentry, int err) | 512 | struct dentry *dentry, int err) |
503 | { | 513 | { |
504 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | 514 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
505 | struct inode *parent = dentry->d_parent->d_inode; | 515 | struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */ |
506 | 516 | ||
507 | /* .snap dir? */ | 517 | /* .snap dir? */ |
508 | if (err == -ENOENT && | 518 | if (err == -ENOENT && |
@@ -516,7 +526,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
516 | d_add(dentry, inode); | 526 | d_add(dentry, inode); |
517 | err = 0; | 527 | err = 0; |
518 | } | 528 | } |
529 | return err; | ||
530 | } | ||
519 | 531 | ||
532 | /* | ||
533 | * Figure out final result of a lookup/open request. | ||
534 | * | ||
535 | * Mainly, make sure we return the final req->r_dentry (if it already | ||
536 | * existed) in place of the original VFS-provided dentry when they | ||
537 | * differ. | ||
538 | * | ||
539 | * Gracefully handle the case where the MDS replies with -ENOENT and | ||
540 | * no trace (which it may do, at its discretion, e.g., if it doesn't | ||
541 | * care to issue a lease on the negative dentry). | ||
542 | */ | ||
543 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | ||
544 | struct dentry *dentry, int err) | ||
545 | { | ||
520 | if (err == -ENOENT) { | 546 | if (err == -ENOENT) { |
521 | /* no trace? */ | 547 | /* no trace? */ |
522 | err = 0; | 548 | err = 0; |
@@ -610,6 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
610 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); | 636 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); |
611 | req->r_locked_dir = dir; | 637 | req->r_locked_dir = dir; |
612 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 638 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
639 | err = ceph_handle_snapdir(req, dentry, err); | ||
613 | dentry = ceph_finish_lookup(req, dentry, err); | 640 | dentry = ceph_finish_lookup(req, dentry, err); |
614 | ceph_mdsc_put_request(req); /* will dput(dentry) */ | 641 | ceph_mdsc_put_request(req); /* will dput(dentry) */ |
615 | dout("lookup result=%p\n", dentry); | 642 | dout("lookup result=%p\n", dentry); |
@@ -789,6 +816,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, | |||
789 | req->r_dentry = dget(dentry); | 816 | req->r_dentry = dget(dentry); |
790 | req->r_num_caps = 2; | 817 | req->r_num_caps = 2; |
791 | req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ | 818 | req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ |
819 | req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); | ||
792 | req->r_locked_dir = dir; | 820 | req->r_locked_dir = dir; |
793 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; | 821 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; |
794 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; | 822 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
@@ -887,6 +915,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
887 | req->r_dentry = dget(new_dentry); | 915 | req->r_dentry = dget(new_dentry); |
888 | req->r_num_caps = 2; | 916 | req->r_num_caps = 2; |
889 | req->r_old_dentry = dget(old_dentry); | 917 | req->r_old_dentry = dget(old_dentry); |
918 | req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); | ||
890 | req->r_locked_dir = new_dir; | 919 | req->r_locked_dir = new_dir; |
891 | req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; | 920 | req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; |
892 | req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; | 921 | req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; |
@@ -1002,36 +1031,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) | |||
1002 | */ | 1031 | */ |
1003 | static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) | 1032 | static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) |
1004 | { | 1033 | { |
1034 | int valid = 0; | ||
1005 | struct inode *dir; | 1035 | struct inode *dir; |
1006 | 1036 | ||
1007 | if (nd && nd->flags & LOOKUP_RCU) | 1037 | if (nd && nd->flags & LOOKUP_RCU) |
1008 | return -ECHILD; | 1038 | return -ECHILD; |
1009 | 1039 | ||
1010 | dir = dentry->d_parent->d_inode; | ||
1011 | |||
1012 | dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, | 1040 | dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, |
1013 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode, | 1041 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode, |
1014 | ceph_dentry(dentry)->offset); | 1042 | ceph_dentry(dentry)->offset); |
1015 | 1043 | ||
1044 | dir = ceph_get_dentry_parent_inode(dentry); | ||
1045 | |||
1016 | /* always trust cached snapped dentries, snapdir dentry */ | 1046 | /* always trust cached snapped dentries, snapdir dentry */ |
1017 | if (ceph_snap(dir) != CEPH_NOSNAP) { | 1047 | if (ceph_snap(dir) != CEPH_NOSNAP) { |
1018 | dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, | 1048 | dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, |
1019 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 1049 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
1020 | goto out_touch; | 1050 | valid = 1; |
1051 | } else if (dentry->d_inode && | ||
1052 | ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) { | ||
1053 | valid = 1; | ||
1054 | } else if (dentry_lease_is_valid(dentry) || | ||
1055 | dir_lease_is_valid(dir, dentry)) { | ||
1056 | valid = 1; | ||
1021 | } | 1057 | } |
1022 | if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) | ||
1023 | goto out_touch; | ||
1024 | |||
1025 | if (dentry_lease_is_valid(dentry) || | ||
1026 | dir_lease_is_valid(dir, dentry)) | ||
1027 | goto out_touch; | ||
1028 | 1058 | ||
1029 | dout("d_revalidate %p invalid\n", dentry); | 1059 | dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); |
1030 | d_drop(dentry); | 1060 | if (valid) |
1031 | return 0; | 1061 | ceph_dentry_lru_touch(dentry); |
1032 | out_touch: | 1062 | else |
1033 | ceph_dentry_lru_touch(dentry); | 1063 | d_drop(dentry); |
1034 | return 1; | 1064 | iput(dir); |
1065 | return valid; | ||
1035 | } | 1066 | } |
1036 | 1067 | ||
1037 | /* | 1068 | /* |
@@ -1228,9 +1259,8 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1228 | * Return name hash for a given dentry. This is dependent on | 1259 | * Return name hash for a given dentry. This is dependent on |
1229 | * the parent directory's hash function. | 1260 | * the parent directory's hash function. |
1230 | */ | 1261 | */ |
1231 | unsigned ceph_dentry_hash(struct dentry *dn) | 1262 | unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) |
1232 | { | 1263 | { |
1233 | struct inode *dir = dn->d_parent->d_inode; | ||
1234 | struct ceph_inode_info *dci = ceph_inode(dir); | 1264 | struct ceph_inode_info *dci = ceph_inode(dir); |
1235 | 1265 | ||
1236 | switch (dci->i_dir_layout.dl_dir_hash) { | 1266 | switch (dci->i_dir_layout.dl_dir_hash) { |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index f67b687550de..9fbcdecaaccd 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -46,7 +46,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
46 | int type; | 46 | int type; |
47 | struct ceph_nfs_fh *fh = (void *)rawfh; | 47 | struct ceph_nfs_fh *fh = (void *)rawfh; |
48 | struct ceph_nfs_confh *cfh = (void *)rawfh; | 48 | struct ceph_nfs_confh *cfh = (void *)rawfh; |
49 | struct dentry *parent = dentry->d_parent; | 49 | struct dentry *parent; |
50 | struct inode *inode = dentry->d_inode; | 50 | struct inode *inode = dentry->d_inode; |
51 | int connected_handle_length = sizeof(*cfh)/4; | 51 | int connected_handle_length = sizeof(*cfh)/4; |
52 | int handle_length = sizeof(*fh)/4; | 52 | int handle_length = sizeof(*fh)/4; |
@@ -55,26 +55,33 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
55 | if (ceph_snap(inode) != CEPH_NOSNAP) | 55 | if (ceph_snap(inode) != CEPH_NOSNAP) |
56 | return -EINVAL; | 56 | return -EINVAL; |
57 | 57 | ||
58 | spin_lock(&dentry->d_lock); | ||
59 | parent = dget(dentry->d_parent); | ||
60 | spin_unlock(&dentry->d_lock); | ||
61 | |||
58 | if (*max_len >= connected_handle_length) { | 62 | if (*max_len >= connected_handle_length) { |
59 | dout("encode_fh %p connectable\n", dentry); | 63 | dout("encode_fh %p connectable\n", dentry); |
60 | cfh->ino = ceph_ino(dentry->d_inode); | 64 | cfh->ino = ceph_ino(dentry->d_inode); |
61 | cfh->parent_ino = ceph_ino(parent->d_inode); | 65 | cfh->parent_ino = ceph_ino(parent->d_inode); |
62 | cfh->parent_name_hash = ceph_dentry_hash(parent); | 66 | cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, |
67 | dentry); | ||
63 | *max_len = connected_handle_length; | 68 | *max_len = connected_handle_length; |
64 | type = 2; | 69 | type = 2; |
65 | } else if (*max_len >= handle_length) { | 70 | } else if (*max_len >= handle_length) { |
66 | if (connectable) { | 71 | if (connectable) { |
67 | *max_len = connected_handle_length; | 72 | *max_len = connected_handle_length; |
68 | return 255; | 73 | type = 255; |
74 | } else { | ||
75 | dout("encode_fh %p\n", dentry); | ||
76 | fh->ino = ceph_ino(dentry->d_inode); | ||
77 | *max_len = handle_length; | ||
78 | type = 1; | ||
69 | } | 79 | } |
70 | dout("encode_fh %p\n", dentry); | ||
71 | fh->ino = ceph_ino(dentry->d_inode); | ||
72 | *max_len = handle_length; | ||
73 | type = 1; | ||
74 | } else { | 80 | } else { |
75 | *max_len = handle_length; | 81 | *max_len = handle_length; |
76 | return 255; | 82 | type = 255; |
77 | } | 83 | } |
84 | dput(parent); | ||
78 | return type; | 85 | return type; |
79 | } | 86 | } |
80 | 87 | ||
@@ -123,7 +130,6 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
123 | return dentry; | 130 | return dentry; |
124 | } | 131 | } |
125 | err = ceph_init_dentry(dentry); | 132 | err = ceph_init_dentry(dentry); |
126 | |||
127 | if (err < 0) { | 133 | if (err < 0) { |
128 | iput(inode); | 134 | iput(inode); |
129 | return ERR_PTR(err); | 135 | return ERR_PTR(err); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0d0eae05598f..ce549d31eeb7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -122,7 +122,7 @@ int ceph_open(struct inode *inode, struct file *file) | |||
122 | struct ceph_mds_client *mdsc = fsc->mdsc; | 122 | struct ceph_mds_client *mdsc = fsc->mdsc; |
123 | struct ceph_mds_request *req; | 123 | struct ceph_mds_request *req; |
124 | struct ceph_file_info *cf = file->private_data; | 124 | struct ceph_file_info *cf = file->private_data; |
125 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 125 | struct inode *parent_inode = NULL; |
126 | int err; | 126 | int err; |
127 | int flags, fmode, wanted; | 127 | int flags, fmode, wanted; |
128 | 128 | ||
@@ -194,7 +194,10 @@ int ceph_open(struct inode *inode, struct file *file) | |||
194 | req->r_inode = inode; | 194 | req->r_inode = inode; |
195 | ihold(inode); | 195 | ihold(inode); |
196 | req->r_num_caps = 1; | 196 | req->r_num_caps = 1; |
197 | if (flags & (O_CREAT|O_TRUNC)) | ||
198 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); | ||
197 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 199 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
200 | iput(parent_inode); | ||
198 | if (!err) | 201 | if (!err) |
199 | err = ceph_init_file(inode, file, req->r_fmode); | 202 | err = ceph_init_file(inode, file, req->r_fmode); |
200 | ceph_mdsc_put_request(req); | 203 | ceph_mdsc_put_request(req); |
@@ -222,9 +225,9 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
222 | { | 225 | { |
223 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); | 226 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
224 | struct ceph_mds_client *mdsc = fsc->mdsc; | 227 | struct ceph_mds_client *mdsc = fsc->mdsc; |
225 | struct file *file = nd->intent.open.file; | 228 | struct file *file; |
226 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | ||
227 | struct ceph_mds_request *req; | 229 | struct ceph_mds_request *req; |
230 | struct dentry *ret; | ||
228 | int err; | 231 | int err; |
229 | int flags = nd->intent.open.flags; | 232 | int flags = nd->intent.open.flags; |
230 | 233 | ||
@@ -242,16 +245,24 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
242 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; | 245 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
243 | } | 246 | } |
244 | req->r_locked_dir = dir; /* caller holds dir->i_mutex */ | 247 | req->r_locked_dir = dir; /* caller holds dir->i_mutex */ |
245 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 248 | err = ceph_mdsc_do_request(mdsc, |
246 | dentry = ceph_finish_lookup(req, dentry, err); | 249 | (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, |
247 | if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) | 250 | req); |
251 | err = ceph_handle_snapdir(req, dentry, err); | ||
252 | if (err) | ||
253 | goto out; | ||
254 | if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) | ||
248 | err = ceph_handle_notrace_create(dir, dentry); | 255 | err = ceph_handle_notrace_create(dir, dentry); |
249 | if (!err) | 256 | if (err) |
250 | err = ceph_init_file(req->r_dentry->d_inode, file, | 257 | goto out; |
251 | req->r_fmode); | 258 | file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open); |
259 | if (IS_ERR(file)) | ||
260 | err = PTR_ERR(file); | ||
261 | out: | ||
262 | ret = ceph_finish_lookup(req, dentry, err); | ||
252 | ceph_mdsc_put_request(req); | 263 | ceph_mdsc_put_request(req); |
253 | dout("ceph_lookup_open result=%p\n", dentry); | 264 | dout("ceph_lookup_open result=%p\n", ret); |
254 | return dentry; | 265 | return ret; |
255 | } | 266 | } |
256 | 267 | ||
257 | int ceph_release(struct inode *inode, struct file *file) | 268 | int ceph_release(struct inode *inode, struct file *file) |
@@ -643,7 +654,8 @@ again: | |||
643 | 654 | ||
644 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || | 655 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || |
645 | (iocb->ki_filp->f_flags & O_DIRECT) || | 656 | (iocb->ki_filp->f_flags & O_DIRECT) || |
646 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) | 657 | (inode->i_sb->s_flags & MS_SYNCHRONOUS) || |
658 | (fi->flags & CEPH_F_SYNC)) | ||
647 | /* hmm, this isn't really async... */ | 659 | /* hmm, this isn't really async... */ |
648 | ret = ceph_sync_read(filp, base, len, ppos, &checkeof); | 660 | ret = ceph_sync_read(filp, base, len, ppos, &checkeof); |
649 | else | 661 | else |
@@ -712,7 +724,7 @@ retry_snap: | |||
712 | want = CEPH_CAP_FILE_BUFFER; | 724 | want = CEPH_CAP_FILE_BUFFER; |
713 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | 725 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); |
714 | if (ret < 0) | 726 | if (ret < 0) |
715 | goto out; | 727 | goto out_put; |
716 | 728 | ||
717 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", | 729 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", |
718 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 730 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
@@ -720,12 +732,23 @@ retry_snap: | |||
720 | 732 | ||
721 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || | 733 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
722 | (iocb->ki_filp->f_flags & O_DIRECT) || | 734 | (iocb->ki_filp->f_flags & O_DIRECT) || |
723 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { | 735 | (inode->i_sb->s_flags & MS_SYNCHRONOUS) || |
736 | (fi->flags & CEPH_F_SYNC)) { | ||
724 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 737 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
725 | &iocb->ki_pos); | 738 | &iocb->ki_pos); |
726 | } else { | 739 | } else { |
727 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | 740 | /* |
741 | * buffered write; drop Fw early to avoid slow | ||
742 | * revocation if we get stuck on balance_dirty_pages | ||
743 | */ | ||
744 | int dirty; | ||
728 | 745 | ||
746 | spin_lock(&inode->i_lock); | ||
747 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||
748 | spin_unlock(&inode->i_lock); | ||
749 | ceph_put_cap_refs(ci, got); | ||
750 | |||
751 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
729 | if ((ret >= 0 || ret == -EIOCBQUEUED) && | 752 | if ((ret >= 0 || ret == -EIOCBQUEUED) && |
730 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) | 753 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) |
731 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | 754 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { |
@@ -733,7 +756,12 @@ retry_snap: | |||
733 | if (err < 0) | 756 | if (err < 0) |
734 | ret = err; | 757 | ret = err; |
735 | } | 758 | } |
759 | |||
760 | if (dirty) | ||
761 | __mark_inode_dirty(inode, dirty); | ||
762 | goto out; | ||
736 | } | 763 | } |
764 | |||
737 | if (ret >= 0) { | 765 | if (ret >= 0) { |
738 | int dirty; | 766 | int dirty; |
739 | spin_lock(&inode->i_lock); | 767 | spin_lock(&inode->i_lock); |
@@ -743,12 +771,13 @@ retry_snap: | |||
743 | __mark_inode_dirty(inode, dirty); | 771 | __mark_inode_dirty(inode, dirty); |
744 | } | 772 | } |
745 | 773 | ||
746 | out: | 774 | out_put: |
747 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", | 775 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", |
748 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 776 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
749 | ceph_cap_string(got)); | 777 | ceph_cap_string(got)); |
750 | ceph_put_cap_refs(ci, got); | 778 | ceph_put_cap_refs(ci, got); |
751 | 779 | ||
780 | out: | ||
752 | if (ret == -EOLDSNAPC) { | 781 | if (ret == -EOLDSNAPC) { |
753 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", | 782 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", |
754 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); | 783 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index dfb2831d8d85..095799ba9dd1 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -560,7 +560,8 @@ static int fill_inode(struct inode *inode, | |||
560 | struct ceph_mds_reply_inode *info = iinfo->in; | 560 | struct ceph_mds_reply_inode *info = iinfo->in; |
561 | struct ceph_inode_info *ci = ceph_inode(inode); | 561 | struct ceph_inode_info *ci = ceph_inode(inode); |
562 | int i; | 562 | int i; |
563 | int issued, implemented; | 563 | int issued = 0, implemented; |
564 | int updating_inode = 0; | ||
564 | struct timespec mtime, atime, ctime; | 565 | struct timespec mtime, atime, ctime; |
565 | u32 nsplits; | 566 | u32 nsplits; |
566 | struct ceph_buffer *xattr_blob = NULL; | 567 | struct ceph_buffer *xattr_blob = NULL; |
@@ -599,7 +600,8 @@ static int fill_inode(struct inode *inode, | |||
599 | if (le64_to_cpu(info->version) > 0 && | 600 | if (le64_to_cpu(info->version) > 0 && |
600 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) | 601 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) |
601 | goto no_change; | 602 | goto no_change; |
602 | 603 | ||
604 | updating_inode = 1; | ||
603 | issued = __ceph_caps_issued(ci, &implemented); | 605 | issued = __ceph_caps_issued(ci, &implemented); |
604 | issued |= implemented | __ceph_caps_dirty(ci); | 606 | issued |= implemented | __ceph_caps_dirty(ci); |
605 | 607 | ||
@@ -707,17 +709,6 @@ static int fill_inode(struct inode *inode, | |||
707 | ci->i_rfiles = le64_to_cpu(info->rfiles); | 709 | ci->i_rfiles = le64_to_cpu(info->rfiles); |
708 | ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); | 710 | ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); |
709 | ceph_decode_timespec(&ci->i_rctime, &info->rctime); | 711 | ceph_decode_timespec(&ci->i_rctime, &info->rctime); |
710 | |||
711 | /* set dir completion flag? */ | ||
712 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | ||
713 | ceph_snap(inode) == CEPH_NOSNAP && | ||
714 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && | ||
715 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | ||
716 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
717 | dout(" marking %p complete (empty)\n", inode); | ||
718 | /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ | ||
719 | ci->i_max_offset = 2; | ||
720 | } | ||
721 | break; | 712 | break; |
722 | default: | 713 | default: |
723 | pr_err("fill_inode %llx.%llx BAD mode 0%o\n", | 714 | pr_err("fill_inode %llx.%llx BAD mode 0%o\n", |
@@ -774,6 +765,19 @@ no_change: | |||
774 | __ceph_get_fmode(ci, cap_fmode); | 765 | __ceph_get_fmode(ci, cap_fmode); |
775 | } | 766 | } |
776 | 767 | ||
768 | /* set dir completion flag? */ | ||
769 | if (S_ISDIR(inode->i_mode) && | ||
770 | updating_inode && /* didn't jump to no_change */ | ||
771 | ci->i_files == 0 && ci->i_subdirs == 0 && | ||
772 | ceph_snap(inode) == CEPH_NOSNAP && | ||
773 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && | ||
774 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | ||
775 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
776 | dout(" marking %p complete (empty)\n", inode); | ||
777 | /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ | ||
778 | ci->i_max_offset = 2; | ||
779 | } | ||
780 | |||
777 | /* update delegation info? */ | 781 | /* update delegation info? */ |
778 | if (dirinfo) | 782 | if (dirinfo) |
779 | ceph_fill_dirfrag(inode, dirinfo); | 783 | ceph_fill_dirfrag(inode, dirinfo); |
@@ -805,14 +809,14 @@ static void update_dentry_lease(struct dentry *dentry, | |||
805 | return; | 809 | return; |
806 | 810 | ||
807 | spin_lock(&dentry->d_lock); | 811 | spin_lock(&dentry->d_lock); |
808 | dout("update_dentry_lease %p mask %d duration %lu ms ttl %lu\n", | 812 | dout("update_dentry_lease %p duration %lu ms ttl %lu\n", |
809 | dentry, le16_to_cpu(lease->mask), duration, ttl); | 813 | dentry, duration, ttl); |
810 | 814 | ||
811 | /* make lease_rdcache_gen match directory */ | 815 | /* make lease_rdcache_gen match directory */ |
812 | dir = dentry->d_parent->d_inode; | 816 | dir = dentry->d_parent->d_inode; |
813 | di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; | 817 | di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; |
814 | 818 | ||
815 | if (lease->mask == 0) | 819 | if (duration == 0) |
816 | goto out_unlock; | 820 | goto out_unlock; |
817 | 821 | ||
818 | if (di->lease_gen == session->s_cap_gen && | 822 | if (di->lease_gen == session->s_cap_gen && |
@@ -839,11 +843,13 @@ out_unlock: | |||
839 | /* | 843 | /* |
840 | * Set dentry's directory position based on the current dir's max, and | 844 | * Set dentry's directory position based on the current dir's max, and |
841 | * order it in d_subdirs, so that dcache_readdir behaves. | 845 | * order it in d_subdirs, so that dcache_readdir behaves. |
846 | * | ||
847 | * Always called under directory's i_mutex. | ||
842 | */ | 848 | */ |
843 | static void ceph_set_dentry_offset(struct dentry *dn) | 849 | static void ceph_set_dentry_offset(struct dentry *dn) |
844 | { | 850 | { |
845 | struct dentry *dir = dn->d_parent; | 851 | struct dentry *dir = dn->d_parent; |
846 | struct inode *inode = dn->d_parent->d_inode; | 852 | struct inode *inode = dir->d_inode; |
847 | struct ceph_dentry_info *di; | 853 | struct ceph_dentry_info *di; |
848 | 854 | ||
849 | BUG_ON(!inode); | 855 | BUG_ON(!inode); |
@@ -1022,9 +1028,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1022 | 1028 | ||
1023 | /* do we have a dn lease? */ | 1029 | /* do we have a dn lease? */ |
1024 | have_lease = have_dir_cap || | 1030 | have_lease = have_dir_cap || |
1025 | (le16_to_cpu(rinfo->dlease->mask) & | 1031 | le32_to_cpu(rinfo->dlease->duration_ms); |
1026 | CEPH_LOCK_DN); | ||
1027 | |||
1028 | if (!have_lease) | 1032 | if (!have_lease) |
1029 | dout("fill_trace no dentry lease or dir cap\n"); | 1033 | dout("fill_trace no dentry lease or dir cap\n"); |
1030 | 1034 | ||
@@ -1560,7 +1564,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1560 | { | 1564 | { |
1561 | struct inode *inode = dentry->d_inode; | 1565 | struct inode *inode = dentry->d_inode; |
1562 | struct ceph_inode_info *ci = ceph_inode(inode); | 1566 | struct ceph_inode_info *ci = ceph_inode(inode); |
1563 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1567 | struct inode *parent_inode; |
1564 | const unsigned int ia_valid = attr->ia_valid; | 1568 | const unsigned int ia_valid = attr->ia_valid; |
1565 | struct ceph_mds_request *req; | 1569 | struct ceph_mds_request *req; |
1566 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; | 1570 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
@@ -1743,7 +1747,9 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1743 | req->r_inode_drop = release; | 1747 | req->r_inode_drop = release; |
1744 | req->r_args.setattr.mask = cpu_to_le32(mask); | 1748 | req->r_args.setattr.mask = cpu_to_le32(mask); |
1745 | req->r_num_caps = 1; | 1749 | req->r_num_caps = 1; |
1750 | parent_inode = ceph_get_dentry_parent_inode(dentry); | ||
1746 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 1751 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
1752 | iput(parent_inode); | ||
1747 | } | 1753 | } |
1748 | dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, | 1754 | dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, |
1749 | ceph_cap_string(dirtied), mask); | 1755 | ceph_cap_string(dirtied), mask); |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index ef0b5f48e13a..3b256b50f7d8 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -38,7 +38,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg) | |||
38 | static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | 38 | static long ceph_ioctl_set_layout(struct file *file, void __user *arg) |
39 | { | 39 | { |
40 | struct inode *inode = file->f_dentry->d_inode; | 40 | struct inode *inode = file->f_dentry->d_inode; |
41 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 41 | struct inode *parent_inode; |
42 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 42 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
43 | struct ceph_mds_request *req; | 43 | struct ceph_mds_request *req; |
44 | struct ceph_ioctl_layout l; | 44 | struct ceph_ioctl_layout l; |
@@ -87,7 +87,9 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
87 | req->r_args.setlayout.layout.fl_pg_preferred = | 87 | req->r_args.setlayout.layout.fl_pg_preferred = |
88 | cpu_to_le32(l.preferred_osd); | 88 | cpu_to_le32(l.preferred_osd); |
89 | 89 | ||
90 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); | ||
90 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 91 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
92 | iput(parent_inode); | ||
91 | ceph_mdsc_put_request(req); | 93 | ceph_mdsc_put_request(req); |
92 | return err; | 94 | return err; |
93 | } | 95 | } |
@@ -231,6 +233,14 @@ static long ceph_ioctl_lazyio(struct file *file) | |||
231 | return 0; | 233 | return 0; |
232 | } | 234 | } |
233 | 235 | ||
236 | static long ceph_ioctl_syncio(struct file *file) | ||
237 | { | ||
238 | struct ceph_file_info *fi = file->private_data; | ||
239 | |||
240 | fi->flags |= CEPH_F_SYNC; | ||
241 | return 0; | ||
242 | } | ||
243 | |||
234 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 244 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
235 | { | 245 | { |
236 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); | 246 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); |
@@ -249,6 +259,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
249 | 259 | ||
250 | case CEPH_IOC_LAZYIO: | 260 | case CEPH_IOC_LAZYIO: |
251 | return ceph_ioctl_lazyio(file); | 261 | return ceph_ioctl_lazyio(file); |
262 | |||
263 | case CEPH_IOC_SYNCIO: | ||
264 | return ceph_ioctl_syncio(file); | ||
252 | } | 265 | } |
253 | 266 | ||
254 | return -ENOTTY; | 267 | return -ENOTTY; |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 52e8fd74d450..0c5167e43180 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -40,5 +40,6 @@ struct ceph_ioctl_dataloc { | |||
40 | struct ceph_ioctl_dataloc) | 40 | struct ceph_ioctl_dataloc) |
41 | 41 | ||
42 | #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) | 42 | #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) |
43 | #define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5) | ||
43 | 44 | ||
44 | #endif | 45 | #endif |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0c1d91756528..86c59e16ba74 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -483,22 +483,26 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
483 | destroy_reply_info(&req->r_reply_info); | 483 | destroy_reply_info(&req->r_reply_info); |
484 | } | 484 | } |
485 | if (req->r_inode) { | 485 | if (req->r_inode) { |
486 | ceph_put_cap_refs(ceph_inode(req->r_inode), | 486 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
487 | CEPH_CAP_PIN); | ||
488 | iput(req->r_inode); | 487 | iput(req->r_inode); |
489 | } | 488 | } |
490 | if (req->r_locked_dir) | 489 | if (req->r_locked_dir) |
491 | ceph_put_cap_refs(ceph_inode(req->r_locked_dir), | 490 | ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); |
492 | CEPH_CAP_PIN); | ||
493 | if (req->r_target_inode) | 491 | if (req->r_target_inode) |
494 | iput(req->r_target_inode); | 492 | iput(req->r_target_inode); |
495 | if (req->r_dentry) | 493 | if (req->r_dentry) |
496 | dput(req->r_dentry); | 494 | dput(req->r_dentry); |
497 | if (req->r_old_dentry) { | 495 | if (req->r_old_dentry) { |
498 | ceph_put_cap_refs( | 496 | /* |
499 | ceph_inode(req->r_old_dentry->d_parent->d_inode), | 497 | * track (and drop pins for) r_old_dentry_dir |
500 | CEPH_CAP_PIN); | 498 | * separately, since r_old_dentry's d_parent may have |
499 | * changed between the dir mutex being dropped and | ||
500 | * this request being freed. | ||
501 | */ | ||
502 | ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), | ||
503 | CEPH_CAP_PIN); | ||
501 | dput(req->r_old_dentry); | 504 | dput(req->r_old_dentry); |
505 | iput(req->r_old_dentry_dir); | ||
502 | } | 506 | } |
503 | kfree(req->r_path1); | 507 | kfree(req->r_path1); |
504 | kfree(req->r_path2); | 508 | kfree(req->r_path2); |
@@ -617,6 +621,12 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
617 | */ | 621 | */ |
618 | struct dentry *get_nonsnap_parent(struct dentry *dentry) | 622 | struct dentry *get_nonsnap_parent(struct dentry *dentry) |
619 | { | 623 | { |
624 | /* | ||
625 | * we don't need to worry about protecting the d_parent access | ||
626 | * here because we never renaming inside the snapped namespace | ||
627 | * except to resplice to another snapdir, and either the old or new | ||
628 | * result is a valid result. | ||
629 | */ | ||
620 | while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) | 630 | while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) |
621 | dentry = dentry->d_parent; | 631 | dentry = dentry->d_parent; |
622 | return dentry; | 632 | return dentry; |
@@ -652,7 +662,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
652 | if (req->r_inode) { | 662 | if (req->r_inode) { |
653 | inode = req->r_inode; | 663 | inode = req->r_inode; |
654 | } else if (req->r_dentry) { | 664 | } else if (req->r_dentry) { |
655 | struct inode *dir = req->r_dentry->d_parent->d_inode; | 665 | /* ignore race with rename; old or new d_parent is okay */ |
666 | struct dentry *parent = req->r_dentry->d_parent; | ||
667 | struct inode *dir = parent->d_inode; | ||
656 | 668 | ||
657 | if (dir->i_sb != mdsc->fsc->sb) { | 669 | if (dir->i_sb != mdsc->fsc->sb) { |
658 | /* not this fs! */ | 670 | /* not this fs! */ |
@@ -660,8 +672,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
660 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | 672 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { |
661 | /* direct snapped/virtual snapdir requests | 673 | /* direct snapped/virtual snapdir requests |
662 | * based on parent dir inode */ | 674 | * based on parent dir inode */ |
663 | struct dentry *dn = | 675 | struct dentry *dn = get_nonsnap_parent(parent); |
664 | get_nonsnap_parent(req->r_dentry->d_parent); | ||
665 | inode = dn->d_inode; | 676 | inode = dn->d_inode; |
666 | dout("__choose_mds using nonsnap parent %p\n", inode); | 677 | dout("__choose_mds using nonsnap parent %p\n", inode); |
667 | } else if (req->r_dentry->d_inode) { | 678 | } else if (req->r_dentry->d_inode) { |
@@ -670,7 +681,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
670 | } else { | 681 | } else { |
671 | /* dir + name */ | 682 | /* dir + name */ |
672 | inode = dir; | 683 | inode = dir; |
673 | hash = ceph_dentry_hash(req->r_dentry); | 684 | hash = ceph_dentry_hash(dir, req->r_dentry); |
674 | is_hash = true; | 685 | is_hash = true; |
675 | } | 686 | } |
676 | } | 687 | } |
@@ -1584,7 +1595,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, | |||
1584 | r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); | 1595 | r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); |
1585 | dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, | 1596 | dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, |
1586 | *ppath); | 1597 | *ppath); |
1587 | } else if (rpath) { | 1598 | } else if (rpath || rino) { |
1588 | *ino = rino; | 1599 | *ino = rino; |
1589 | *ppath = rpath; | 1600 | *ppath = rpath; |
1590 | *pathlen = strlen(rpath); | 1601 | *pathlen = strlen(rpath); |
@@ -1931,9 +1942,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
1931 | if (req->r_locked_dir) | 1942 | if (req->r_locked_dir) |
1932 | ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); | 1943 | ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); |
1933 | if (req->r_old_dentry) | 1944 | if (req->r_old_dentry) |
1934 | ceph_get_cap_refs( | 1945 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), |
1935 | ceph_inode(req->r_old_dentry->d_parent->d_inode), | 1946 | CEPH_CAP_PIN); |
1936 | CEPH_CAP_PIN); | ||
1937 | 1947 | ||
1938 | /* issue */ | 1948 | /* issue */ |
1939 | mutex_lock(&mdsc->mutex); | 1949 | mutex_lock(&mdsc->mutex); |
@@ -2714,7 +2724,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2714 | struct ceph_mds_lease *h = msg->front.iov_base; | 2724 | struct ceph_mds_lease *h = msg->front.iov_base; |
2715 | u32 seq; | 2725 | u32 seq; |
2716 | struct ceph_vino vino; | 2726 | struct ceph_vino vino; |
2717 | int mask; | ||
2718 | struct qstr dname; | 2727 | struct qstr dname; |
2719 | int release = 0; | 2728 | int release = 0; |
2720 | 2729 | ||
@@ -2725,7 +2734,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2725 | goto bad; | 2734 | goto bad; |
2726 | vino.ino = le64_to_cpu(h->ino); | 2735 | vino.ino = le64_to_cpu(h->ino); |
2727 | vino.snap = CEPH_NOSNAP; | 2736 | vino.snap = CEPH_NOSNAP; |
2728 | mask = le16_to_cpu(h->mask); | ||
2729 | seq = le32_to_cpu(h->seq); | 2737 | seq = le32_to_cpu(h->seq); |
2730 | dname.name = (void *)h + sizeof(*h) + sizeof(u32); | 2738 | dname.name = (void *)h + sizeof(*h) + sizeof(u32); |
2731 | dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); | 2739 | dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); |
@@ -2737,8 +2745,8 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2737 | 2745 | ||
2738 | /* lookup inode */ | 2746 | /* lookup inode */ |
2739 | inode = ceph_find_inode(sb, vino); | 2747 | inode = ceph_find_inode(sb, vino); |
2740 | dout("handle_lease %s, mask %d, ino %llx %p %.*s\n", | 2748 | dout("handle_lease %s, ino %llx %p %.*s\n", |
2741 | ceph_lease_op_name(h->action), mask, vino.ino, inode, | 2749 | ceph_lease_op_name(h->action), vino.ino, inode, |
2742 | dname.len, dname.name); | 2750 | dname.len, dname.name); |
2743 | if (inode == NULL) { | 2751 | if (inode == NULL) { |
2744 | dout("handle_lease no inode %llx\n", vino.ino); | 2752 | dout("handle_lease no inode %llx\n", vino.ino); |
@@ -2828,7 +2836,6 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
2828 | return; | 2836 | return; |
2829 | lease = msg->front.iov_base; | 2837 | lease = msg->front.iov_base; |
2830 | lease->action = action; | 2838 | lease->action = action; |
2831 | lease->mask = cpu_to_le16(1); | ||
2832 | lease->ino = cpu_to_le64(ceph_vino(inode).ino); | 2839 | lease->ino = cpu_to_le64(ceph_vino(inode).ino); |
2833 | lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); | 2840 | lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); |
2834 | lease->seq = cpu_to_le32(seq); | 2841 | lease->seq = cpu_to_le32(seq); |
@@ -2850,7 +2857,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
2850 | * Pass @inode always, @dentry is optional. | 2857 | * Pass @inode always, @dentry is optional. |
2851 | */ | 2858 | */ |
2852 | void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, | 2859 | void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, |
2853 | struct dentry *dentry, int mask) | 2860 | struct dentry *dentry) |
2854 | { | 2861 | { |
2855 | struct ceph_dentry_info *di; | 2862 | struct ceph_dentry_info *di; |
2856 | struct ceph_mds_session *session; | 2863 | struct ceph_mds_session *session; |
@@ -2858,7 +2865,6 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, | |||
2858 | 2865 | ||
2859 | BUG_ON(inode == NULL); | 2866 | BUG_ON(inode == NULL); |
2860 | BUG_ON(dentry == NULL); | 2867 | BUG_ON(dentry == NULL); |
2861 | BUG_ON(mask == 0); | ||
2862 | 2868 | ||
2863 | /* is dentry lease valid? */ | 2869 | /* is dentry lease valid? */ |
2864 | spin_lock(&dentry->d_lock); | 2870 | spin_lock(&dentry->d_lock); |
@@ -2868,8 +2874,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, | |||
2868 | di->lease_gen != di->lease_session->s_cap_gen || | 2874 | di->lease_gen != di->lease_session->s_cap_gen || |
2869 | !time_before(jiffies, dentry->d_time)) { | 2875 | !time_before(jiffies, dentry->d_time)) { |
2870 | dout("lease_release inode %p dentry %p -- " | 2876 | dout("lease_release inode %p dentry %p -- " |
2871 | "no lease on %d\n", | 2877 | "no lease\n", |
2872 | inode, dentry, mask); | 2878 | inode, dentry); |
2873 | spin_unlock(&dentry->d_lock); | 2879 | spin_unlock(&dentry->d_lock); |
2874 | return; | 2880 | return; |
2875 | } | 2881 | } |
@@ -2880,8 +2886,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, | |||
2880 | __ceph_mdsc_drop_dentry_lease(dentry); | 2886 | __ceph_mdsc_drop_dentry_lease(dentry); |
2881 | spin_unlock(&dentry->d_lock); | 2887 | spin_unlock(&dentry->d_lock); |
2882 | 2888 | ||
2883 | dout("lease_release inode %p dentry %p mask %d to mds%d\n", | 2889 | dout("lease_release inode %p dentry %p to mds%d\n", |
2884 | inode, dentry, mask, session->s_mds); | 2890 | inode, dentry, session->s_mds); |
2885 | ceph_mdsc_lease_send_msg(session, inode, dentry, | 2891 | ceph_mdsc_lease_send_msg(session, inode, dentry, |
2886 | CEPH_MDS_LEASE_RELEASE, seq); | 2892 | CEPH_MDS_LEASE_RELEASE, seq); |
2887 | ceph_put_mds_session(session); | 2893 | ceph_put_mds_session(session); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 7d8a0d662d56..4bb239921dbd 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -171,6 +171,7 @@ struct ceph_mds_request { | |||
171 | struct inode *r_inode; /* arg1 */ | 171 | struct inode *r_inode; /* arg1 */ |
172 | struct dentry *r_dentry; /* arg1 */ | 172 | struct dentry *r_dentry; /* arg1 */ |
173 | struct dentry *r_old_dentry; /* arg2: rename from or link from */ | 173 | struct dentry *r_old_dentry; /* arg2: rename from or link from */ |
174 | struct inode *r_old_dentry_dir; /* arg2: old dentry's parent dir */ | ||
174 | char *r_path1, *r_path2; | 175 | char *r_path1, *r_path2; |
175 | struct ceph_vino r_ino1, r_ino2; | 176 | struct ceph_vino r_ino1, r_ino2; |
176 | 177 | ||
@@ -333,7 +334,7 @@ extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | |||
333 | 334 | ||
334 | extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | 335 | extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, |
335 | struct inode *inode, | 336 | struct inode *inode, |
336 | struct dentry *dn, int mask); | 337 | struct dentry *dn); |
337 | 338 | ||
338 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | 339 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); |
339 | 340 | ||
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 54b14de2e729..e26437191333 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -449,6 +449,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
449 | spin_lock(&inode->i_lock); | 449 | spin_lock(&inode->i_lock); |
450 | used = __ceph_caps_used(ci); | 450 | used = __ceph_caps_used(ci); |
451 | dirty = __ceph_caps_dirty(ci); | 451 | dirty = __ceph_caps_dirty(ci); |
452 | |||
453 | /* | ||
454 | * If there is a write in progress, treat that as a dirty Fw, | ||
455 | * even though it hasn't completed yet; by the time we finish | ||
456 | * up this capsnap it will be. | ||
457 | */ | ||
458 | if (used & CEPH_CAP_FILE_WR) | ||
459 | dirty |= CEPH_CAP_FILE_WR; | ||
460 | |||
452 | if (__ceph_have_pending_cap_snap(ci)) { | 461 | if (__ceph_have_pending_cap_snap(ci)) { |
453 | /* there is no point in queuing multiple "pending" cap_snaps, | 462 | /* there is no point in queuing multiple "pending" cap_snaps, |
454 | as no new writes are allowed to start when pending, so any | 463 | as no new writes are allowed to start when pending, so any |
@@ -456,13 +465,19 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
456 | cap_snap. lucky us. */ | 465 | cap_snap. lucky us. */ |
457 | dout("queue_cap_snap %p already pending\n", inode); | 466 | dout("queue_cap_snap %p already pending\n", inode); |
458 | kfree(capsnap); | 467 | kfree(capsnap); |
459 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) || | 468 | } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| |
460 | (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| | 469 | CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { |
461 | CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { | ||
462 | struct ceph_snap_context *snapc = ci->i_head_snapc; | 470 | struct ceph_snap_context *snapc = ci->i_head_snapc; |
463 | 471 | ||
464 | dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, | 472 | /* |
465 | capsnap, snapc); | 473 | * if we are a sync write, we may need to go to the snaprealm |
474 | * to get the current snapc. | ||
475 | */ | ||
476 | if (!snapc) | ||
477 | snapc = ci->i_snap_realm->cached_context; | ||
478 | |||
479 | dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n", | ||
480 | inode, capsnap, snapc, ceph_cap_string(dirty)); | ||
466 | ihold(inode); | 481 | ihold(inode); |
467 | 482 | ||
468 | atomic_set(&capsnap->nref, 1); | 483 | atomic_set(&capsnap->nref, 1); |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f2f77fd3c14c..88bacaf385d9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -73,8 +73,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
73 | */ | 73 | */ |
74 | buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; | 74 | buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; |
75 | buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); | 75 | buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); |
76 | buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >> | 76 | buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); |
77 | (CEPH_BLOCK_SHIFT-10); | ||
78 | buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); | 77 | buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); |
79 | 78 | ||
80 | buf->f_files = le64_to_cpu(st.num_objects); | 79 | buf->f_files = le64_to_cpu(st.num_objects); |
@@ -780,6 +779,10 @@ static int ceph_register_bdi(struct super_block *sb, | |||
780 | fsc->backing_dev_info.ra_pages = | 779 | fsc->backing_dev_info.ra_pages = |
781 | (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) | 780 | (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) |
782 | >> PAGE_SHIFT; | 781 | >> PAGE_SHIFT; |
782 | else | ||
783 | fsc->backing_dev_info.ra_pages = | ||
784 | default_backing_dev_info.ra_pages; | ||
785 | |||
783 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", | 786 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", |
784 | atomic_long_inc_return(&bdi_seq)); | 787 | atomic_long_inc_return(&bdi_seq)); |
785 | if (!err) | 788 | if (!err) |
@@ -810,8 +813,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, | |||
810 | fsc = create_fs_client(fsopt, opt); | 813 | fsc = create_fs_client(fsopt, opt); |
811 | if (IS_ERR(fsc)) { | 814 | if (IS_ERR(fsc)) { |
812 | res = ERR_CAST(fsc); | 815 | res = ERR_CAST(fsc); |
813 | kfree(fsopt); | 816 | destroy_mount_options(fsopt); |
814 | kfree(opt); | 817 | ceph_destroy_options(opt); |
815 | goto out_final; | 818 | goto out_final; |
816 | } | 819 | } |
817 | 820 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 30446b144e3d..a23eed526f05 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -543,13 +543,16 @@ extern void ceph_reservation_status(struct ceph_fs_client *client, | |||
543 | /* | 543 | /* |
544 | * we keep buffered readdir results attached to file->private_data | 544 | * we keep buffered readdir results attached to file->private_data |
545 | */ | 545 | */ |
546 | #define CEPH_F_SYNC 1 | ||
547 | #define CEPH_F_ATEND 2 | ||
548 | |||
546 | struct ceph_file_info { | 549 | struct ceph_file_info { |
547 | int fmode; /* initialized on open */ | 550 | short fmode; /* initialized on open */ |
551 | short flags; /* CEPH_F_* */ | ||
548 | 552 | ||
549 | /* readdir: position within the dir */ | 553 | /* readdir: position within the dir */ |
550 | u32 frag; | 554 | u32 frag; |
551 | struct ceph_mds_request *last_readdir; | 555 | struct ceph_mds_request *last_readdir; |
552 | int at_end; | ||
553 | 556 | ||
554 | /* readdir: position within a frag */ | 557 | /* readdir: position within a frag */ |
555 | unsigned offset; /* offset of last chunk, adjusted for . and .. */ | 558 | unsigned offset; /* offset of last chunk, adjusted for . and .. */ |
@@ -789,6 +792,8 @@ extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, | |||
789 | ceph_snapdir_dentry_ops; | 792 | ceph_snapdir_dentry_ops; |
790 | 793 | ||
791 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); | 794 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); |
795 | extern int ceph_handle_snapdir(struct ceph_mds_request *req, | ||
796 | struct dentry *dentry, int err); | ||
792 | extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 797 | extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
793 | struct dentry *dentry, int err); | 798 | struct dentry *dentry, int err); |
794 | 799 | ||
@@ -796,7 +801,8 @@ extern void ceph_dentry_lru_add(struct dentry *dn); | |||
796 | extern void ceph_dentry_lru_touch(struct dentry *dn); | 801 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
797 | extern void ceph_dentry_lru_del(struct dentry *dn); | 802 | extern void ceph_dentry_lru_del(struct dentry *dn); |
798 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); | 803 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); |
799 | extern unsigned ceph_dentry_hash(struct dentry *dn); | 804 | extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn); |
805 | extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry); | ||
800 | 806 | ||
801 | /* | 807 | /* |
802 | * our d_ops vary depending on whether the inode is live, | 808 | * our d_ops vary depending on whether the inode is live, |
@@ -819,14 +825,6 @@ extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, | |||
819 | int p_locks, int f_locks); | 825 | int p_locks, int f_locks); |
820 | extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); | 826 | extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); |
821 | 827 | ||
822 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | ||
823 | { | ||
824 | if (dentry && dentry->d_parent) | ||
825 | return dentry->d_parent->d_inode; | ||
826 | |||
827 | return NULL; | ||
828 | } | ||
829 | |||
830 | /* debugfs.c */ | 828 | /* debugfs.c */ |
831 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | 829 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); |
832 | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); | 830 | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index f42d730f1b66..96c6739a0280 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -629,7 +629,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
629 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | 629 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
630 | struct inode *inode = dentry->d_inode; | 630 | struct inode *inode = dentry->d_inode; |
631 | struct ceph_inode_info *ci = ceph_inode(inode); | 631 | struct ceph_inode_info *ci = ceph_inode(inode); |
632 | struct inode *parent_inode = dentry->d_parent->d_inode; | 632 | struct inode *parent_inode; |
633 | struct ceph_mds_request *req; | 633 | struct ceph_mds_request *req; |
634 | struct ceph_mds_client *mdsc = fsc->mdsc; | 634 | struct ceph_mds_client *mdsc = fsc->mdsc; |
635 | int err; | 635 | int err; |
@@ -677,7 +677,9 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
677 | req->r_data_len = size; | 677 | req->r_data_len = size; |
678 | 678 | ||
679 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); | 679 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); |
680 | parent_inode = ceph_get_dentry_parent_inode(dentry); | ||
680 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 681 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
682 | iput(parent_inode); | ||
681 | ceph_mdsc_put_request(req); | 683 | ceph_mdsc_put_request(req); |
682 | dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); | 684 | dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); |
683 | 685 | ||
@@ -788,7 +790,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
788 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | 790 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
789 | struct ceph_mds_client *mdsc = fsc->mdsc; | 791 | struct ceph_mds_client *mdsc = fsc->mdsc; |
790 | struct inode *inode = dentry->d_inode; | 792 | struct inode *inode = dentry->d_inode; |
791 | struct inode *parent_inode = dentry->d_parent->d_inode; | 793 | struct inode *parent_inode; |
792 | struct ceph_mds_request *req; | 794 | struct ceph_mds_request *req; |
793 | int err; | 795 | int err; |
794 | 796 | ||
@@ -802,7 +804,9 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
802 | req->r_num_caps = 1; | 804 | req->r_num_caps = 1; |
803 | req->r_path2 = kstrdup(name, GFP_NOFS); | 805 | req->r_path2 = kstrdup(name, GFP_NOFS); |
804 | 806 | ||
807 | parent_inode = ceph_get_dentry_parent_inode(dentry); | ||
805 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 808 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
809 | iput(parent_inode); | ||
806 | ceph_mdsc_put_request(req); | 810 | ceph_mdsc_put_request(req); |
807 | return err; | 811 | return err; |
808 | } | 812 | } |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 2fe3cf13b2e9..6d40656e1e29 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
176 | 176 | ||
177 | #ifdef CONFIG_CIFS_STATS2 | 177 | #ifdef CONFIG_CIFS_STATS2 |
178 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", | 178 | seq_printf(m, " In Send: %d In MaxReq Wait: %d", |
179 | atomic_read(&server->inSend), | 179 | atomic_read(&server->in_send), |
180 | atomic_read(&server->num_waiters)); | 180 | atomic_read(&server->num_waiters)); |
181 | #endif | 181 | #endif |
182 | 182 | ||
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 8d8f28c94c0f..6873bb634a97 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -141,10 +141,11 @@ char *cifs_compose_mount_options(const char *sb_mountdata, | |||
141 | 141 | ||
142 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); | 142 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); |
143 | if (rc < 0) { | 143 | if (rc < 0) { |
144 | cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", | 144 | cFYI(1, "%s: Failed to resolve server part of %s to IP: %d", |
145 | __func__, *devname, rc); | 145 | __func__, *devname, rc); |
146 | goto compose_mount_options_err; | 146 | goto compose_mount_options_err; |
147 | } | 147 | } |
148 | |||
148 | /* md_len = strlen(...) + 12 for 'sep+prefixpath=' | 149 | /* md_len = strlen(...) + 12 for 'sep+prefixpath=' |
149 | * assuming that we have 'unc=' and 'ip=' in | 150 | * assuming that we have 'unc=' and 'ip=' in |
150 | * the original sb_mountdata | 151 | * the original sb_mountdata |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 21de1d6d5849..d0f59faefb78 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -991,24 +991,6 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, | |||
991 | return pntsd; | 991 | return pntsd; |
992 | } | 992 | } |
993 | 993 | ||
994 | static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, | ||
995 | struct cifs_ntsd *pnntsd, u32 acllen) | ||
996 | { | ||
997 | int xid, rc; | ||
998 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | ||
999 | |||
1000 | if (IS_ERR(tlink)) | ||
1001 | return PTR_ERR(tlink); | ||
1002 | |||
1003 | xid = GetXid(); | ||
1004 | rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen); | ||
1005 | FreeXid(xid); | ||
1006 | cifs_put_tlink(tlink); | ||
1007 | |||
1008 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); | ||
1009 | return rc; | ||
1010 | } | ||
1011 | |||
1012 | static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, | 994 | static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, |
1013 | struct cifs_ntsd *pnntsd, u32 acllen) | 995 | struct cifs_ntsd *pnntsd, u32 acllen) |
1014 | { | 996 | { |
@@ -1047,18 +1029,10 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | |||
1047 | struct inode *inode, const char *path) | 1029 | struct inode *inode, const char *path) |
1048 | { | 1030 | { |
1049 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 1031 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
1050 | struct cifsFileInfo *open_file; | ||
1051 | int rc; | ||
1052 | 1032 | ||
1053 | cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); | 1033 | cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); |
1054 | 1034 | ||
1055 | open_file = find_readable_file(CIFS_I(inode), true); | 1035 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); |
1056 | if (!open_file) | ||
1057 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); | ||
1058 | |||
1059 | rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); | ||
1060 | cifsFileInfo_put(open_file); | ||
1061 | return rc; | ||
1062 | } | 1036 | } |
1063 | 1037 | ||
1064 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ | 1038 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5a0ee7f2af06..e76bfeb68267 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -52,19 +52,29 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, | |||
52 | 52 | ||
53 | rc = crypto_shash_init(&server->secmech.sdescmd5->shash); | 53 | rc = crypto_shash_init(&server->secmech.sdescmd5->shash); |
54 | if (rc) { | 54 | if (rc) { |
55 | cERROR(1, "%s: Oould not init md5\n", __func__); | 55 | cERROR(1, "%s: Could not init md5\n", __func__); |
56 | return rc; | 56 | return rc; |
57 | } | 57 | } |
58 | 58 | ||
59 | crypto_shash_update(&server->secmech.sdescmd5->shash, | 59 | rc = crypto_shash_update(&server->secmech.sdescmd5->shash, |
60 | server->session_key.response, server->session_key.len); | 60 | server->session_key.response, server->session_key.len); |
61 | if (rc) { | ||
62 | cERROR(1, "%s: Could not update with response\n", __func__); | ||
63 | return rc; | ||
64 | } | ||
61 | 65 | ||
62 | crypto_shash_update(&server->secmech.sdescmd5->shash, | 66 | rc = crypto_shash_update(&server->secmech.sdescmd5->shash, |
63 | cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length)); | 67 | cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length)); |
68 | if (rc) { | ||
69 | cERROR(1, "%s: Could not update with payload\n", __func__); | ||
70 | return rc; | ||
71 | } | ||
64 | 72 | ||
65 | rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); | 73 | rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); |
74 | if (rc) | ||
75 | cERROR(1, "%s: Could not generate md5 hash\n", __func__); | ||
66 | 76 | ||
67 | return 0; | 77 | return rc; |
68 | } | 78 | } |
69 | 79 | ||
70 | /* must be called with server->srv_mutex held */ | 80 | /* must be called with server->srv_mutex held */ |
@@ -77,9 +87,15 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | |||
77 | if ((cifs_pdu == NULL) || (server == NULL)) | 87 | if ((cifs_pdu == NULL) || (server == NULL)) |
78 | return -EINVAL; | 88 | return -EINVAL; |
79 | 89 | ||
80 | if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) | 90 | if (!(cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) || |
91 | server->tcpStatus == CifsNeedNegotiate) | ||
81 | return rc; | 92 | return rc; |
82 | 93 | ||
94 | if (!server->session_estab) { | ||
95 | strncpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8); | ||
96 | return rc; | ||
97 | } | ||
98 | |||
83 | cifs_pdu->Signature.Sequence.SequenceNumber = | 99 | cifs_pdu->Signature.Sequence.SequenceNumber = |
84 | cpu_to_le32(server->sequence_number); | 100 | cpu_to_le32(server->sequence_number); |
85 | cifs_pdu->Signature.Sequence.Reserved = 0; | 101 | cifs_pdu->Signature.Sequence.Reserved = 0; |
@@ -112,12 +128,16 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, | |||
112 | 128 | ||
113 | rc = crypto_shash_init(&server->secmech.sdescmd5->shash); | 129 | rc = crypto_shash_init(&server->secmech.sdescmd5->shash); |
114 | if (rc) { | 130 | if (rc) { |
115 | cERROR(1, "%s: Oould not init md5\n", __func__); | 131 | cERROR(1, "%s: Could not init md5\n", __func__); |
116 | return rc; | 132 | return rc; |
117 | } | 133 | } |
118 | 134 | ||
119 | crypto_shash_update(&server->secmech.sdescmd5->shash, | 135 | rc = crypto_shash_update(&server->secmech.sdescmd5->shash, |
120 | server->session_key.response, server->session_key.len); | 136 | server->session_key.response, server->session_key.len); |
137 | if (rc) { | ||
138 | cERROR(1, "%s: Could not update with response\n", __func__); | ||
139 | return rc; | ||
140 | } | ||
121 | 141 | ||
122 | for (i = 0; i < n_vec; i++) { | 142 | for (i = 0; i < n_vec; i++) { |
123 | if (iov[i].iov_len == 0) | 143 | if (iov[i].iov_len == 0) |
@@ -131,14 +151,24 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, | |||
131 | if (i == 0) { | 151 | if (i == 0) { |
132 | if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ | 152 | if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ |
133 | break; /* nothing to sign or corrupt header */ | 153 | break; /* nothing to sign or corrupt header */ |
154 | rc = | ||
134 | crypto_shash_update(&server->secmech.sdescmd5->shash, | 155 | crypto_shash_update(&server->secmech.sdescmd5->shash, |
135 | iov[i].iov_base + 4, iov[i].iov_len - 4); | 156 | iov[i].iov_base + 4, iov[i].iov_len - 4); |
136 | } else | 157 | } else { |
158 | rc = | ||
137 | crypto_shash_update(&server->secmech.sdescmd5->shash, | 159 | crypto_shash_update(&server->secmech.sdescmd5->shash, |
138 | iov[i].iov_base, iov[i].iov_len); | 160 | iov[i].iov_base, iov[i].iov_len); |
161 | } | ||
162 | if (rc) { | ||
163 | cERROR(1, "%s: Could not update with payload\n", | ||
164 | __func__); | ||
165 | return rc; | ||
166 | } | ||
139 | } | 167 | } |
140 | 168 | ||
141 | rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); | 169 | rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); |
170 | if (rc) | ||
171 | cERROR(1, "%s: Could not generate md5 hash\n", __func__); | ||
142 | 172 | ||
143 | return rc; | 173 | return rc; |
144 | } | 174 | } |
@@ -154,8 +184,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
154 | if ((cifs_pdu == NULL) || (server == NULL)) | 184 | if ((cifs_pdu == NULL) || (server == NULL)) |
155 | return -EINVAL; | 185 | return -EINVAL; |
156 | 186 | ||
157 | if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) | 187 | if (!(cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) || |
188 | server->tcpStatus == CifsNeedNegotiate) | ||
189 | return rc; | ||
190 | |||
191 | if (!server->session_estab) { | ||
192 | strncpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8); | ||
158 | return rc; | 193 | return rc; |
194 | } | ||
159 | 195 | ||
160 | cifs_pdu->Signature.Sequence.SequenceNumber = | 196 | cifs_pdu->Signature.Sequence.SequenceNumber = |
161 | cpu_to_le32(server->sequence_number); | 197 | cpu_to_le32(server->sequence_number); |
@@ -463,8 +499,12 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
463 | /* calculate md4 hash of password */ | 499 | /* calculate md4 hash of password */ |
464 | E_md4hash(ses->password, nt_hash); | 500 | E_md4hash(ses->password, nt_hash); |
465 | 501 | ||
466 | crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash, | 502 | rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash, |
467 | CIFS_NTHASH_SIZE); | 503 | CIFS_NTHASH_SIZE); |
504 | if (rc) { | ||
505 | cERROR(1, "%s: Could not set NT Hash as a key", __func__); | ||
506 | return rc; | ||
507 | } | ||
468 | 508 | ||
469 | rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); | 509 | rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); |
470 | if (rc) { | 510 | if (rc) { |
@@ -478,13 +518,18 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
478 | if (user == NULL) { | 518 | if (user == NULL) { |
479 | cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); | 519 | cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); |
480 | rc = -ENOMEM; | 520 | rc = -ENOMEM; |
481 | goto calc_exit_2; | 521 | return rc; |
482 | } | 522 | } |
483 | len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp); | 523 | len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp); |
484 | UniStrupr(user); | 524 | UniStrupr(user); |
485 | 525 | ||
486 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 526 | rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
487 | (char *)user, 2 * len); | 527 | (char *)user, 2 * len); |
528 | kfree(user); | ||
529 | if (rc) { | ||
530 | cERROR(1, "%s: Could not update with user\n", __func__); | ||
531 | return rc; | ||
532 | } | ||
488 | 533 | ||
489 | /* convert ses->domainName to unicode and uppercase */ | 534 | /* convert ses->domainName to unicode and uppercase */ |
490 | if (ses->domainName) { | 535 | if (ses->domainName) { |
@@ -494,13 +539,19 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
494 | if (domain == NULL) { | 539 | if (domain == NULL) { |
495 | cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); | 540 | cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); |
496 | rc = -ENOMEM; | 541 | rc = -ENOMEM; |
497 | goto calc_exit_1; | 542 | return rc; |
498 | } | 543 | } |
499 | len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, | 544 | len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, |
500 | nls_cp); | 545 | nls_cp); |
546 | rc = | ||
501 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 547 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
502 | (char *)domain, 2 * len); | 548 | (char *)domain, 2 * len); |
503 | kfree(domain); | 549 | kfree(domain); |
550 | if (rc) { | ||
551 | cERROR(1, "%s: Could not update with domain\n", | ||
552 | __func__); | ||
553 | return rc; | ||
554 | } | ||
504 | } else if (ses->serverName) { | 555 | } else if (ses->serverName) { |
505 | len = strlen(ses->serverName); | 556 | len = strlen(ses->serverName); |
506 | 557 | ||
@@ -508,21 +559,26 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
508 | if (server == NULL) { | 559 | if (server == NULL) { |
509 | cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); | 560 | cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); |
510 | rc = -ENOMEM; | 561 | rc = -ENOMEM; |
511 | goto calc_exit_1; | 562 | return rc; |
512 | } | 563 | } |
513 | len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, | 564 | len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, |
514 | nls_cp); | 565 | nls_cp); |
566 | rc = | ||
515 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 567 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
516 | (char *)server, 2 * len); | 568 | (char *)server, 2 * len); |
517 | kfree(server); | 569 | kfree(server); |
570 | if (rc) { | ||
571 | cERROR(1, "%s: Could not update with server\n", | ||
572 | __func__); | ||
573 | return rc; | ||
574 | } | ||
518 | } | 575 | } |
519 | 576 | ||
520 | rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, | 577 | rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, |
521 | ntlmv2_hash); | 578 | ntlmv2_hash); |
579 | if (rc) | ||
580 | cERROR(1, "%s: Could not generate md5 hash\n", __func__); | ||
522 | 581 | ||
523 | calc_exit_1: | ||
524 | kfree(user); | ||
525 | calc_exit_2: | ||
526 | return rc; | 582 | return rc; |
527 | } | 583 | } |
528 | 584 | ||
@@ -537,8 +593,12 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) | |||
537 | return -1; | 593 | return -1; |
538 | } | 594 | } |
539 | 595 | ||
540 | crypto_shash_setkey(ses->server->secmech.hmacmd5, | 596 | rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, |
541 | ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); | 597 | ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); |
598 | if (rc) { | ||
599 | cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__); | ||
600 | return rc; | ||
601 | } | ||
542 | 602 | ||
543 | rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); | 603 | rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); |
544 | if (rc) { | 604 | if (rc) { |
@@ -552,11 +612,17 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) | |||
552 | else | 612 | else |
553 | memcpy(ses->auth_key.response + offset, | 613 | memcpy(ses->auth_key.response + offset, |
554 | ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); | 614 | ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); |
555 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 615 | rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
556 | ses->auth_key.response + offset, ses->auth_key.len - offset); | 616 | ses->auth_key.response + offset, ses->auth_key.len - offset); |
617 | if (rc) { | ||
618 | cERROR(1, "%s: Could not update with response\n", __func__); | ||
619 | return rc; | ||
620 | } | ||
557 | 621 | ||
558 | rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, | 622 | rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, |
559 | ses->auth_key.response + CIFS_SESS_KEY_SIZE); | 623 | ses->auth_key.response + CIFS_SESS_KEY_SIZE); |
624 | if (rc) | ||
625 | cERROR(1, "%s: Could not generate md5 hash\n", __func__); | ||
560 | 626 | ||
561 | return rc; | 627 | return rc; |
562 | } | 628 | } |
@@ -626,8 +692,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
626 | } | 692 | } |
627 | 693 | ||
628 | /* now calculate the session key for NTLMv2 */ | 694 | /* now calculate the session key for NTLMv2 */ |
629 | crypto_shash_setkey(ses->server->secmech.hmacmd5, | 695 | rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, |
630 | ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); | 696 | ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); |
697 | if (rc) { | ||
698 | cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__); | ||
699 | goto setup_ntlmv2_rsp_ret; | ||
700 | } | ||
631 | 701 | ||
632 | rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); | 702 | rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); |
633 | if (rc) { | 703 | if (rc) { |
@@ -635,12 +705,18 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
635 | goto setup_ntlmv2_rsp_ret; | 705 | goto setup_ntlmv2_rsp_ret; |
636 | } | 706 | } |
637 | 707 | ||
638 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 708 | rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
639 | ses->auth_key.response + CIFS_SESS_KEY_SIZE, | 709 | ses->auth_key.response + CIFS_SESS_KEY_SIZE, |
640 | CIFS_HMAC_MD5_HASH_SIZE); | 710 | CIFS_HMAC_MD5_HASH_SIZE); |
711 | if (rc) { | ||
712 | cERROR(1, "%s: Could not update with response\n", __func__); | ||
713 | goto setup_ntlmv2_rsp_ret; | ||
714 | } | ||
641 | 715 | ||
642 | rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, | 716 | rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, |
643 | ses->auth_key.response); | 717 | ses->auth_key.response); |
718 | if (rc) | ||
719 | cERROR(1, "%s: Could not generate md5 hash\n", __func__); | ||
644 | 720 | ||
645 | setup_ntlmv2_rsp_ret: | 721 | setup_ntlmv2_rsp_ret: |
646 | kfree(tiblob); | 722 | kfree(tiblob); |
@@ -668,8 +744,12 @@ calc_seckey(struct cifs_ses *ses) | |||
668 | 744 | ||
669 | desc.tfm = tfm_arc4; | 745 | desc.tfm = tfm_arc4; |
670 | 746 | ||
671 | crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response, | 747 | rc = crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response, |
672 | CIFS_SESS_KEY_SIZE); | 748 | CIFS_SESS_KEY_SIZE); |
749 | if (rc) { | ||
750 | cERROR(1, "%s: Could not set response as a key", __func__); | ||
751 | return rc; | ||
752 | } | ||
673 | 753 | ||
674 | sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE); | 754 | sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE); |
675 | sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); | 755 | sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); |
@@ -688,7 +768,7 @@ calc_seckey(struct cifs_ses *ses) | |||
688 | 768 | ||
689 | crypto_free_blkcipher(tfm_arc4); | 769 | crypto_free_blkcipher(tfm_arc4); |
690 | 770 | ||
691 | return 0; | 771 | return rc; |
692 | } | 772 | } |
693 | 773 | ||
694 | void | 774 | void |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 865517470967..f93eb948d071 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -86,24 +86,6 @@ extern mempool_t *cifs_sm_req_poolp; | |||
86 | extern mempool_t *cifs_req_poolp; | 86 | extern mempool_t *cifs_req_poolp; |
87 | extern mempool_t *cifs_mid_poolp; | 87 | extern mempool_t *cifs_mid_poolp; |
88 | 88 | ||
89 | void | ||
90 | cifs_sb_active(struct super_block *sb) | ||
91 | { | ||
92 | struct cifs_sb_info *server = CIFS_SB(sb); | ||
93 | |||
94 | if (atomic_inc_return(&server->active) == 1) | ||
95 | atomic_inc(&sb->s_active); | ||
96 | } | ||
97 | |||
98 | void | ||
99 | cifs_sb_deactive(struct super_block *sb) | ||
100 | { | ||
101 | struct cifs_sb_info *server = CIFS_SB(sb); | ||
102 | |||
103 | if (atomic_dec_and_test(&server->active)) | ||
104 | deactivate_super(sb); | ||
105 | } | ||
106 | |||
107 | static int | 89 | static int |
108 | cifs_read_super(struct super_block *sb) | 90 | cifs_read_super(struct super_block *sb) |
109 | { | 91 | { |
@@ -581,6 +563,10 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
581 | mutex_unlock(&dir->i_mutex); | 563 | mutex_unlock(&dir->i_mutex); |
582 | dput(dentry); | 564 | dput(dentry); |
583 | dentry = child; | 565 | dentry = child; |
566 | if (!dentry->d_inode) { | ||
567 | dput(dentry); | ||
568 | dentry = ERR_PTR(-ENOENT); | ||
569 | } | ||
584 | } while (!IS_ERR(dentry)); | 570 | } while (!IS_ERR(dentry)); |
585 | _FreeXid(xid); | 571 | _FreeXid(xid); |
586 | kfree(full_path); | 572 | kfree(full_path); |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index fbd050c8d52a..95da8027983d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -41,10 +41,6 @@ extern struct file_system_type cifs_fs_type; | |||
41 | extern const struct address_space_operations cifs_addr_ops; | 41 | extern const struct address_space_operations cifs_addr_ops; |
42 | extern const struct address_space_operations cifs_addr_ops_smallbuf; | 42 | extern const struct address_space_operations cifs_addr_ops_smallbuf; |
43 | 43 | ||
44 | /* Functions related to super block operations */ | ||
45 | extern void cifs_sb_active(struct super_block *sb); | ||
46 | extern void cifs_sb_deactive(struct super_block *sb); | ||
47 | |||
48 | /* Functions related to inodes */ | 44 | /* Functions related to inodes */ |
49 | extern const struct inode_operations cifs_dir_inode_ops; | 45 | extern const struct inode_operations cifs_dir_inode_ops; |
50 | extern struct inode *cifs_root_iget(struct super_block *); | 46 | extern struct inode *cifs_root_iget(struct super_block *); |
@@ -129,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
129 | extern const struct export_operations cifs_export_ops; | 125 | extern const struct export_operations cifs_export_ops; |
130 | #endif /* CIFS_NFSD_EXPORT */ | 126 | #endif /* CIFS_NFSD_EXPORT */ |
131 | 127 | ||
132 | #define CIFS_VERSION "1.74" | 128 | #define CIFS_VERSION "1.75" |
133 | #endif /* _CIFSFS_H */ | 129 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6255fa812c7a..95dad9d14cf1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -291,7 +291,7 @@ struct TCP_Server_Info { | |||
291 | struct fscache_cookie *fscache; /* client index cache cookie */ | 291 | struct fscache_cookie *fscache; /* client index cache cookie */ |
292 | #endif | 292 | #endif |
293 | #ifdef CONFIG_CIFS_STATS2 | 293 | #ifdef CONFIG_CIFS_STATS2 |
294 | atomic_t inSend; /* requests trying to send */ | 294 | atomic_t in_send; /* requests trying to send */ |
295 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ | 295 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ |
296 | #endif | 296 | #endif |
297 | }; | 297 | }; |
@@ -501,7 +501,7 @@ struct cifs_search_info { | |||
501 | char *ntwrk_buf_start; | 501 | char *ntwrk_buf_start; |
502 | char *srch_entries_start; | 502 | char *srch_entries_start; |
503 | char *last_entry; | 503 | char *last_entry; |
504 | char *presume_name; | 504 | const char *presume_name; |
505 | unsigned int resume_name_len; | 505 | unsigned int resume_name_len; |
506 | bool endOfSearch:1; | 506 | bool endOfSearch:1; |
507 | bool emptyDir:1; | 507 | bool emptyDir:1; |
@@ -672,12 +672,54 @@ struct mid_q_entry { | |||
672 | bool multiEnd:1; /* both received */ | 672 | bool multiEnd:1; /* both received */ |
673 | }; | 673 | }; |
674 | 674 | ||
675 | struct oplock_q_entry { | 675 | /* Make code in transport.c a little cleaner by moving |
676 | struct list_head qhead; | 676 | update of optional stats into function below */ |
677 | struct inode *pinode; | 677 | #ifdef CONFIG_CIFS_STATS2 |
678 | struct cifs_tcon *tcon; | 678 | |
679 | __u16 netfid; | 679 | static inline void cifs_in_send_inc(struct TCP_Server_Info *server) |
680 | }; | 680 | { |
681 | atomic_inc(&server->in_send); | ||
682 | } | ||
683 | |||
684 | static inline void cifs_in_send_dec(struct TCP_Server_Info *server) | ||
685 | { | ||
686 | atomic_dec(&server->in_send); | ||
687 | } | ||
688 | |||
689 | static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) | ||
690 | { | ||
691 | atomic_inc(&server->num_waiters); | ||
692 | } | ||
693 | |||
694 | static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) | ||
695 | { | ||
696 | atomic_dec(&server->num_waiters); | ||
697 | } | ||
698 | |||
699 | static inline void cifs_save_when_sent(struct mid_q_entry *mid) | ||
700 | { | ||
701 | mid->when_sent = jiffies; | ||
702 | } | ||
703 | #else | ||
704 | static inline void cifs_in_send_inc(struct TCP_Server_Info *server) | ||
705 | { | ||
706 | } | ||
707 | static inline void cifs_in_send_dec(struct TCP_Server_Info *server) | ||
708 | { | ||
709 | } | ||
710 | |||
711 | static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) | ||
712 | { | ||
713 | } | ||
714 | |||
715 | static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) | ||
716 | { | ||
717 | } | ||
718 | |||
719 | static inline void cifs_save_when_sent(struct mid_q_entry *mid) | ||
720 | { | ||
721 | } | ||
722 | #endif | ||
681 | 723 | ||
682 | /* for pending dnotify requests */ | 724 | /* for pending dnotify requests */ |
683 | struct dir_notify_req { | 725 | struct dir_notify_req { |
@@ -942,8 +984,6 @@ GLOBAL_EXTERN spinlock_t siduidlock; | |||
942 | GLOBAL_EXTERN spinlock_t sidgidlock; | 984 | GLOBAL_EXTERN spinlock_t sidgidlock; |
943 | 985 | ||
944 | void cifs_oplock_break(struct work_struct *work); | 986 | void cifs_oplock_break(struct work_struct *work); |
945 | void cifs_oplock_break_get(struct cifsFileInfo *cfile); | ||
946 | void cifs_oplock_break_put(struct cifsFileInfo *cfile); | ||
947 | 987 | ||
948 | extern const struct slow_work_ops cifs_oplock_break_ops; | 988 | extern const struct slow_work_ops cifs_oplock_break_ops; |
949 | 989 | ||
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1a9fe7f816d1..aac37d99a487 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -107,7 +107,7 @@ static void mark_open_files_invalid(struct cifs_tcon *pTcon) | |||
107 | static int | 107 | static int |
108 | cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) | 108 | cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) |
109 | { | 109 | { |
110 | int rc = 0; | 110 | int rc; |
111 | struct cifs_ses *ses; | 111 | struct cifs_ses *ses; |
112 | struct TCP_Server_Info *server; | 112 | struct TCP_Server_Info *server; |
113 | struct nls_table *nls_codepage; | 113 | struct nls_table *nls_codepage; |
@@ -5720,6 +5720,7 @@ CIFSSMBQAllEAs(const int xid, struct cifs_tcon *tcon, | |||
5720 | char *temp_ptr; | 5720 | char *temp_ptr; |
5721 | char *end_of_smb; | 5721 | char *end_of_smb; |
5722 | __u16 params, byte_count, data_offset; | 5722 | __u16 params, byte_count, data_offset; |
5723 | unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0; | ||
5723 | 5724 | ||
5724 | cFYI(1, "In Query All EAs path %s", searchName); | 5725 | cFYI(1, "In Query All EAs path %s", searchName); |
5725 | QAllEAsRetry: | 5726 | QAllEAsRetry: |
@@ -5837,7 +5838,8 @@ QAllEAsRetry: | |||
5837 | } | 5838 | } |
5838 | 5839 | ||
5839 | if (ea_name) { | 5840 | if (ea_name) { |
5840 | if (strncmp(ea_name, temp_ptr, name_len) == 0) { | 5841 | if (ea_name_len == name_len && |
5842 | strncmp(ea_name, temp_ptr, name_len) == 0) { | ||
5841 | temp_ptr += name_len + 1; | 5843 | temp_ptr += name_len + 1; |
5842 | rc = value_len; | 5844 | rc = value_len; |
5843 | if (buf_size == 0) | 5845 | if (buf_size == 0) |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e66297bad412..633c246b6775 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -319,25 +319,328 @@ requeue_echo: | |||
319 | queue_delayed_work(system_nrt_wq, &server->echo, SMB_ECHO_INTERVAL); | 319 | queue_delayed_work(system_nrt_wq, &server->echo, SMB_ECHO_INTERVAL); |
320 | } | 320 | } |
321 | 321 | ||
322 | static bool | ||
323 | allocate_buffers(char **bigbuf, char **smallbuf, unsigned int size, | ||
324 | bool is_large_buf) | ||
325 | { | ||
326 | char *bbuf = *bigbuf, *sbuf = *smallbuf; | ||
327 | |||
328 | if (bbuf == NULL) { | ||
329 | bbuf = (char *)cifs_buf_get(); | ||
330 | if (!bbuf) { | ||
331 | cERROR(1, "No memory for large SMB response"); | ||
332 | msleep(3000); | ||
333 | /* retry will check if exiting */ | ||
334 | return false; | ||
335 | } | ||
336 | } else if (is_large_buf) { | ||
337 | /* we are reusing a dirty large buf, clear its start */ | ||
338 | memset(bbuf, 0, size); | ||
339 | } | ||
340 | |||
341 | if (sbuf == NULL) { | ||
342 | sbuf = (char *)cifs_small_buf_get(); | ||
343 | if (!sbuf) { | ||
344 | cERROR(1, "No memory for SMB response"); | ||
345 | msleep(1000); | ||
346 | /* retry will check if exiting */ | ||
347 | return false; | ||
348 | } | ||
349 | /* beginning of smb buffer is cleared in our buf_get */ | ||
350 | } else { | ||
351 | /* if existing small buf clear beginning */ | ||
352 | memset(sbuf, 0, size); | ||
353 | } | ||
354 | |||
355 | *bigbuf = bbuf; | ||
356 | *smallbuf = sbuf; | ||
357 | |||
358 | return true; | ||
359 | } | ||
360 | |||
361 | static int | ||
362 | read_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg, | ||
363 | struct kvec *iov, unsigned int to_read, | ||
364 | unsigned int *ptotal_read, bool is_header_read) | ||
365 | { | ||
366 | int length, rc = 0; | ||
367 | unsigned int total_read; | ||
368 | char *buf = iov->iov_base; | ||
369 | |||
370 | for (total_read = 0; total_read < to_read; total_read += length) { | ||
371 | length = kernel_recvmsg(server->ssocket, smb_msg, iov, 1, | ||
372 | to_read - total_read, 0); | ||
373 | if (server->tcpStatus == CifsExiting) { | ||
374 | /* then will exit */ | ||
375 | rc = 2; | ||
376 | break; | ||
377 | } else if (server->tcpStatus == CifsNeedReconnect) { | ||
378 | cifs_reconnect(server); | ||
379 | /* Reconnect wakes up rspns q */ | ||
380 | /* Now we will reread sock */ | ||
381 | rc = 1; | ||
382 | break; | ||
383 | } else if (length == -ERESTARTSYS || | ||
384 | length == -EAGAIN || | ||
385 | length == -EINTR) { | ||
386 | /* | ||
387 | * Minimum sleep to prevent looping, allowing socket | ||
388 | * to clear and app threads to set tcpStatus | ||
389 | * CifsNeedReconnect if server hung. | ||
390 | */ | ||
391 | usleep_range(1000, 2000); | ||
392 | length = 0; | ||
393 | if (!is_header_read) | ||
394 | continue; | ||
395 | /* Special handling for header read */ | ||
396 | if (total_read) { | ||
397 | iov->iov_base = (to_read - total_read) + | ||
398 | buf; | ||
399 | iov->iov_len = to_read - total_read; | ||
400 | smb_msg->msg_control = NULL; | ||
401 | smb_msg->msg_controllen = 0; | ||
402 | rc = 3; | ||
403 | } else | ||
404 | rc = 1; | ||
405 | break; | ||
406 | } else if (length <= 0) { | ||
407 | cERROR(1, "Received no data, expecting %d", | ||
408 | to_read - total_read); | ||
409 | cifs_reconnect(server); | ||
410 | rc = 1; | ||
411 | break; | ||
412 | } | ||
413 | } | ||
414 | |||
415 | *ptotal_read = total_read; | ||
416 | return rc; | ||
417 | } | ||
418 | |||
419 | static bool | ||
420 | check_rfc1002_header(struct TCP_Server_Info *server, char *buf) | ||
421 | { | ||
422 | char temp = *buf; | ||
423 | unsigned int pdu_length = be32_to_cpu( | ||
424 | ((struct smb_hdr *)buf)->smb_buf_length); | ||
425 | |||
426 | /* | ||
427 | * The first byte big endian of the length field, | ||
428 | * is actually not part of the length but the type | ||
429 | * with the most common, zero, as regular data. | ||
430 | */ | ||
431 | if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) { | ||
432 | return false; | ||
433 | } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) { | ||
434 | cFYI(1, "Good RFC 1002 session rsp"); | ||
435 | return false; | ||
436 | } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) { | ||
437 | /* | ||
438 | * We get this from Windows 98 instead of an error on | ||
439 | * SMB negprot response. | ||
440 | */ | ||
441 | cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", | ||
442 | pdu_length); | ||
443 | /* give server a second to clean up */ | ||
444 | msleep(1000); | ||
445 | /* | ||
446 | * Always try 445 first on reconnect since we get NACK | ||
447 | * on some if we ever connected to port 139 (the NACK | ||
448 | * is since we do not begin with RFC1001 session | ||
449 | * initialize frame). | ||
450 | */ | ||
451 | cifs_set_port((struct sockaddr *) | ||
452 | &server->dstaddr, CIFS_PORT); | ||
453 | cifs_reconnect(server); | ||
454 | wake_up(&server->response_q); | ||
455 | return false; | ||
456 | } else if (temp != (char) 0) { | ||
457 | cERROR(1, "Unknown RFC 1002 frame"); | ||
458 | cifs_dump_mem(" Received Data: ", buf, 4); | ||
459 | cifs_reconnect(server); | ||
460 | return false; | ||
461 | } | ||
462 | |||
463 | /* else we have an SMB response */ | ||
464 | if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || | ||
465 | (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) { | ||
466 | cERROR(1, "Invalid size SMB length %d pdu_length %d", | ||
467 | 4, pdu_length+4); | ||
468 | cifs_reconnect(server); | ||
469 | wake_up(&server->response_q); | ||
470 | return false; | ||
471 | } | ||
472 | |||
473 | return true; | ||
474 | } | ||
475 | |||
476 | static struct mid_q_entry * | ||
477 | find_cifs_mid(struct TCP_Server_Info *server, struct smb_hdr *buf, | ||
478 | int *length, bool is_large_buf, bool *is_multi_rsp, char **bigbuf) | ||
479 | { | ||
480 | struct mid_q_entry *mid = NULL, *tmp_mid, *ret = NULL; | ||
481 | |||
482 | spin_lock(&GlobalMid_Lock); | ||
483 | list_for_each_entry_safe(mid, tmp_mid, &server->pending_mid_q, qhead) { | ||
484 | if (mid->mid != buf->Mid || | ||
485 | mid->midState != MID_REQUEST_SUBMITTED || | ||
486 | mid->command != buf->Command) | ||
487 | continue; | ||
488 | |||
489 | if (*length == 0 && check2ndT2(buf, server->maxBuf) > 0) { | ||
490 | /* We have a multipart transact2 resp */ | ||
491 | *is_multi_rsp = true; | ||
492 | if (mid->resp_buf) { | ||
493 | /* merge response - fix up 1st*/ | ||
494 | *length = coalesce_t2(buf, mid->resp_buf); | ||
495 | if (*length > 0) { | ||
496 | *length = 0; | ||
497 | mid->multiRsp = true; | ||
498 | break; | ||
499 | } | ||
500 | /* All parts received or packet is malformed. */ | ||
501 | mid->multiEnd = true; | ||
502 | goto multi_t2_fnd; | ||
503 | } | ||
504 | if (!is_large_buf) { | ||
505 | /*FIXME: switch to already allocated largebuf?*/ | ||
506 | cERROR(1, "1st trans2 resp needs bigbuf"); | ||
507 | } else { | ||
508 | /* Have first buffer */ | ||
509 | mid->resp_buf = buf; | ||
510 | mid->largeBuf = true; | ||
511 | *bigbuf = NULL; | ||
512 | } | ||
513 | break; | ||
514 | } | ||
515 | mid->resp_buf = buf; | ||
516 | mid->largeBuf = is_large_buf; | ||
517 | multi_t2_fnd: | ||
518 | if (*length == 0) | ||
519 | mid->midState = MID_RESPONSE_RECEIVED; | ||
520 | else | ||
521 | mid->midState = MID_RESPONSE_MALFORMED; | ||
522 | #ifdef CONFIG_CIFS_STATS2 | ||
523 | mid->when_received = jiffies; | ||
524 | #endif | ||
525 | list_del_init(&mid->qhead); | ||
526 | ret = mid; | ||
527 | break; | ||
528 | } | ||
529 | spin_unlock(&GlobalMid_Lock); | ||
530 | |||
531 | return ret; | ||
532 | } | ||
533 | |||
534 | static void clean_demultiplex_info(struct TCP_Server_Info *server) | ||
535 | { | ||
536 | int length; | ||
537 | |||
538 | /* take it off the list, if it's not already */ | ||
539 | spin_lock(&cifs_tcp_ses_lock); | ||
540 | list_del_init(&server->tcp_ses_list); | ||
541 | spin_unlock(&cifs_tcp_ses_lock); | ||
542 | |||
543 | spin_lock(&GlobalMid_Lock); | ||
544 | server->tcpStatus = CifsExiting; | ||
545 | spin_unlock(&GlobalMid_Lock); | ||
546 | wake_up_all(&server->response_q); | ||
547 | |||
548 | /* | ||
549 | * Check if we have blocked requests that need to free. Note that | ||
550 | * cifs_max_pending is normally 50, but can be set at module install | ||
551 | * time to as little as two. | ||
552 | */ | ||
553 | spin_lock(&GlobalMid_Lock); | ||
554 | if (atomic_read(&server->inFlight) >= cifs_max_pending) | ||
555 | atomic_set(&server->inFlight, cifs_max_pending - 1); | ||
556 | /* | ||
557 | * We do not want to set the max_pending too low or we could end up | ||
558 | * with the counter going negative. | ||
559 | */ | ||
560 | spin_unlock(&GlobalMid_Lock); | ||
561 | /* | ||
562 | * Although there should not be any requests blocked on this queue it | ||
563 | * can not hurt to be paranoid and try to wake up requests that may | ||
564 | * haven been blocked when more than 50 at time were on the wire to the | ||
565 | * same server - they now will see the session is in exit state and get | ||
566 | * out of SendReceive. | ||
567 | */ | ||
568 | wake_up_all(&server->request_q); | ||
569 | /* give those requests time to exit */ | ||
570 | msleep(125); | ||
571 | |||
572 | if (server->ssocket) { | ||
573 | sock_release(server->ssocket); | ||
574 | server->ssocket = NULL; | ||
575 | } | ||
576 | |||
577 | if (!list_empty(&server->pending_mid_q)) { | ||
578 | struct list_head dispose_list; | ||
579 | struct mid_q_entry *mid_entry; | ||
580 | struct list_head *tmp, *tmp2; | ||
581 | |||
582 | INIT_LIST_HEAD(&dispose_list); | ||
583 | spin_lock(&GlobalMid_Lock); | ||
584 | list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { | ||
585 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | ||
586 | cFYI(1, "Clearing mid 0x%x", mid_entry->mid); | ||
587 | mid_entry->midState = MID_SHUTDOWN; | ||
588 | list_move(&mid_entry->qhead, &dispose_list); | ||
589 | } | ||
590 | spin_unlock(&GlobalMid_Lock); | ||
591 | |||
592 | /* now walk dispose list and issue callbacks */ | ||
593 | list_for_each_safe(tmp, tmp2, &dispose_list) { | ||
594 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | ||
595 | cFYI(1, "Callback mid 0x%x", mid_entry->mid); | ||
596 | list_del_init(&mid_entry->qhead); | ||
597 | mid_entry->callback(mid_entry); | ||
598 | } | ||
599 | /* 1/8th of sec is more than enough time for them to exit */ | ||
600 | msleep(125); | ||
601 | } | ||
602 | |||
603 | if (!list_empty(&server->pending_mid_q)) { | ||
604 | /* | ||
605 | * mpx threads have not exited yet give them at least the smb | ||
606 | * send timeout time for long ops. | ||
607 | * | ||
608 | * Due to delays on oplock break requests, we need to wait at | ||
609 | * least 45 seconds before giving up on a request getting a | ||
610 | * response and going ahead and killing cifsd. | ||
611 | */ | ||
612 | cFYI(1, "Wait for exit from demultiplex thread"); | ||
613 | msleep(46000); | ||
614 | /* | ||
615 | * If threads still have not exited they are probably never | ||
616 | * coming home not much else we can do but free the memory. | ||
617 | */ | ||
618 | } | ||
619 | |||
620 | kfree(server->hostname); | ||
621 | kfree(server); | ||
622 | |||
623 | length = atomic_dec_return(&tcpSesAllocCount); | ||
624 | if (length > 0) | ||
625 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | ||
626 | GFP_KERNEL); | ||
627 | } | ||
628 | |||
322 | static int | 629 | static int |
323 | cifs_demultiplex_thread(void *p) | 630 | cifs_demultiplex_thread(void *p) |
324 | { | 631 | { |
325 | int length; | 632 | int length; |
326 | struct TCP_Server_Info *server = p; | 633 | struct TCP_Server_Info *server = p; |
327 | unsigned int pdu_length, total_read; | 634 | unsigned int pdu_length, total_read; |
635 | char *buf = NULL, *bigbuf = NULL, *smallbuf = NULL; | ||
328 | struct smb_hdr *smb_buffer = NULL; | 636 | struct smb_hdr *smb_buffer = NULL; |
329 | struct smb_hdr *bigbuf = NULL; | ||
330 | struct smb_hdr *smallbuf = NULL; | ||
331 | struct msghdr smb_msg; | 637 | struct msghdr smb_msg; |
332 | struct kvec iov; | 638 | struct kvec iov; |
333 | struct socket *csocket = server->ssocket; | ||
334 | struct list_head *tmp, *tmp2; | ||
335 | struct task_struct *task_to_wake = NULL; | 639 | struct task_struct *task_to_wake = NULL; |
336 | struct mid_q_entry *mid_entry; | 640 | struct mid_q_entry *mid_entry; |
337 | char temp; | ||
338 | bool isLargeBuf = false; | 641 | bool isLargeBuf = false; |
339 | bool isMultiRsp; | 642 | bool isMultiRsp = false; |
340 | int reconnect; | 643 | int rc; |
341 | 644 | ||
342 | current->flags |= PF_MEMALLOC; | 645 | current->flags |= PF_MEMALLOC; |
343 | cFYI(1, "Demultiplex PID: %d", task_pid_nr(current)); | 646 | cFYI(1, "Demultiplex PID: %d", task_pid_nr(current)); |
@@ -351,35 +654,16 @@ cifs_demultiplex_thread(void *p) | |||
351 | while (server->tcpStatus != CifsExiting) { | 654 | while (server->tcpStatus != CifsExiting) { |
352 | if (try_to_freeze()) | 655 | if (try_to_freeze()) |
353 | continue; | 656 | continue; |
354 | if (bigbuf == NULL) { | ||
355 | bigbuf = cifs_buf_get(); | ||
356 | if (!bigbuf) { | ||
357 | cERROR(1, "No memory for large SMB response"); | ||
358 | msleep(3000); | ||
359 | /* retry will check if exiting */ | ||
360 | continue; | ||
361 | } | ||
362 | } else if (isLargeBuf) { | ||
363 | /* we are reusing a dirty large buf, clear its start */ | ||
364 | memset(bigbuf, 0, sizeof(struct smb_hdr)); | ||
365 | } | ||
366 | 657 | ||
367 | if (smallbuf == NULL) { | 658 | if (!allocate_buffers(&bigbuf, &smallbuf, |
368 | smallbuf = cifs_small_buf_get(); | 659 | sizeof(struct smb_hdr), isLargeBuf)) |
369 | if (!smallbuf) { | 660 | continue; |
370 | cERROR(1, "No memory for SMB response"); | ||
371 | msleep(1000); | ||
372 | /* retry will check if exiting */ | ||
373 | continue; | ||
374 | } | ||
375 | /* beginning of smb buffer is cleared in our buf_get */ | ||
376 | } else /* if existing small buf clear beginning */ | ||
377 | memset(smallbuf, 0, sizeof(struct smb_hdr)); | ||
378 | 661 | ||
379 | isLargeBuf = false; | 662 | isLargeBuf = false; |
380 | isMultiRsp = false; | 663 | isMultiRsp = false; |
381 | smb_buffer = smallbuf; | 664 | smb_buffer = (struct smb_hdr *)smallbuf; |
382 | iov.iov_base = smb_buffer; | 665 | buf = smallbuf; |
666 | iov.iov_base = buf; | ||
383 | iov.iov_len = 4; | 667 | iov.iov_len = 4; |
384 | smb_msg.msg_control = NULL; | 668 | smb_msg.msg_control = NULL; |
385 | smb_msg.msg_controllen = 0; | 669 | smb_msg.msg_controllen = 0; |
@@ -393,158 +677,50 @@ incomplete_rcv: | |||
393 | "Reconnecting...", server->hostname, | 677 | "Reconnecting...", server->hostname, |
394 | (echo_retries * SMB_ECHO_INTERVAL / HZ)); | 678 | (echo_retries * SMB_ECHO_INTERVAL / HZ)); |
395 | cifs_reconnect(server); | 679 | cifs_reconnect(server); |
396 | csocket = server->ssocket; | ||
397 | wake_up(&server->response_q); | 680 | wake_up(&server->response_q); |
398 | continue; | 681 | continue; |
399 | } | 682 | } |
400 | 683 | ||
401 | length = | 684 | rc = read_from_socket(server, &smb_msg, &iov, pdu_length, |
402 | kernel_recvmsg(csocket, &smb_msg, | 685 | &total_read, true /* header read */); |
403 | &iov, 1, pdu_length, 0 /* BB other flags? */); | 686 | if (rc == 3) |
404 | 687 | goto incomplete_rcv; | |
405 | if (server->tcpStatus == CifsExiting) { | 688 | else if (rc == 2) |
406 | break; | 689 | break; |
407 | } else if (server->tcpStatus == CifsNeedReconnect) { | 690 | else if (rc == 1) |
408 | cFYI(1, "Reconnect after server stopped responding"); | ||
409 | cifs_reconnect(server); | ||
410 | cFYI(1, "call to reconnect done"); | ||
411 | csocket = server->ssocket; | ||
412 | continue; | ||
413 | } else if (length == -ERESTARTSYS || | ||
414 | length == -EAGAIN || | ||
415 | length == -EINTR) { | ||
416 | msleep(1); /* minimum sleep to prevent looping | ||
417 | allowing socket to clear and app threads to set | ||
418 | tcpStatus CifsNeedReconnect if server hung */ | ||
419 | if (pdu_length < 4) { | ||
420 | iov.iov_base = (4 - pdu_length) + | ||
421 | (char *)smb_buffer; | ||
422 | iov.iov_len = pdu_length; | ||
423 | smb_msg.msg_control = NULL; | ||
424 | smb_msg.msg_controllen = 0; | ||
425 | goto incomplete_rcv; | ||
426 | } else | ||
427 | continue; | ||
428 | } else if (length <= 0) { | ||
429 | cFYI(1, "Reconnect after unexpected peek error %d", | ||
430 | length); | ||
431 | cifs_reconnect(server); | ||
432 | csocket = server->ssocket; | ||
433 | wake_up(&server->response_q); | ||
434 | continue; | 691 | continue; |
435 | } else if (length < pdu_length) { | ||
436 | cFYI(1, "requested %d bytes but only got %d bytes", | ||
437 | pdu_length, length); | ||
438 | pdu_length -= length; | ||
439 | msleep(1); | ||
440 | goto incomplete_rcv; | ||
441 | } | ||
442 | |||
443 | /* The right amount was read from socket - 4 bytes */ | ||
444 | /* so we can now interpret the length field */ | ||
445 | 692 | ||
446 | /* the first byte big endian of the length field, | 693 | /* |
447 | is actually not part of the length but the type | 694 | * The right amount was read from socket - 4 bytes, |
448 | with the most common, zero, as regular data */ | 695 | * so we can now interpret the length field. |
449 | temp = *((char *) smb_buffer); | 696 | */ |
450 | 697 | ||
451 | /* Note that FC 1001 length is big endian on the wire, | 698 | /* |
452 | but we convert it here so it is always manipulated | 699 | * Note that RFC 1001 length is big endian on the wire, |
453 | as host byte order */ | 700 | * but we convert it here so it is always manipulated |
701 | * as host byte order. | ||
702 | */ | ||
454 | pdu_length = be32_to_cpu(smb_buffer->smb_buf_length); | 703 | pdu_length = be32_to_cpu(smb_buffer->smb_buf_length); |
455 | 704 | ||
456 | cFYI(1, "rfc1002 length 0x%x", pdu_length+4); | 705 | cFYI(1, "rfc1002 length 0x%x", pdu_length+4); |
457 | 706 | if (!check_rfc1002_header(server, buf)) | |
458 | if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) { | ||
459 | continue; | ||
460 | } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) { | ||
461 | cFYI(1, "Good RFC 1002 session rsp"); | ||
462 | continue; | ||
463 | } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) { | ||
464 | /* we get this from Windows 98 instead of | ||
465 | an error on SMB negprot response */ | ||
466 | cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", | ||
467 | pdu_length); | ||
468 | /* give server a second to clean up */ | ||
469 | msleep(1000); | ||
470 | /* always try 445 first on reconnect since we get NACK | ||
471 | * on some if we ever connected to port 139 (the NACK | ||
472 | * is since we do not begin with RFC1001 session | ||
473 | * initialize frame) | ||
474 | */ | ||
475 | cifs_set_port((struct sockaddr *) | ||
476 | &server->dstaddr, CIFS_PORT); | ||
477 | cifs_reconnect(server); | ||
478 | csocket = server->ssocket; | ||
479 | wake_up(&server->response_q); | ||
480 | continue; | ||
481 | } else if (temp != (char) 0) { | ||
482 | cERROR(1, "Unknown RFC 1002 frame"); | ||
483 | cifs_dump_mem(" Received Data: ", (char *)smb_buffer, | ||
484 | length); | ||
485 | cifs_reconnect(server); | ||
486 | csocket = server->ssocket; | ||
487 | continue; | 707 | continue; |
488 | } | ||
489 | |||
490 | /* else we have an SMB response */ | ||
491 | if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || | ||
492 | (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) { | ||
493 | cERROR(1, "Invalid size SMB length %d pdu_length %d", | ||
494 | length, pdu_length+4); | ||
495 | cifs_reconnect(server); | ||
496 | csocket = server->ssocket; | ||
497 | wake_up(&server->response_q); | ||
498 | continue; | ||
499 | } | ||
500 | 708 | ||
501 | /* else length ok */ | 709 | /* else length ok */ |
502 | reconnect = 0; | ||
503 | |||
504 | if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { | 710 | if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { |
505 | isLargeBuf = true; | 711 | isLargeBuf = true; |
506 | memcpy(bigbuf, smallbuf, 4); | 712 | memcpy(bigbuf, smallbuf, 4); |
507 | smb_buffer = bigbuf; | 713 | smb_buffer = (struct smb_hdr *)bigbuf; |
714 | buf = bigbuf; | ||
508 | } | 715 | } |
509 | length = 0; | 716 | |
510 | iov.iov_base = 4 + (char *)smb_buffer; | 717 | iov.iov_base = 4 + buf; |
511 | iov.iov_len = pdu_length; | 718 | iov.iov_len = pdu_length; |
512 | for (total_read = 0; total_read < pdu_length; | 719 | rc = read_from_socket(server, &smb_msg, &iov, pdu_length, |
513 | total_read += length) { | 720 | &total_read, false); |
514 | length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, | 721 | if (rc == 2) |
515 | pdu_length - total_read, 0); | ||
516 | if (server->tcpStatus == CifsExiting) { | ||
517 | /* then will exit */ | ||
518 | reconnect = 2; | ||
519 | break; | ||
520 | } else if (server->tcpStatus == CifsNeedReconnect) { | ||
521 | cifs_reconnect(server); | ||
522 | csocket = server->ssocket; | ||
523 | /* Reconnect wakes up rspns q */ | ||
524 | /* Now we will reread sock */ | ||
525 | reconnect = 1; | ||
526 | break; | ||
527 | } else if (length == -ERESTARTSYS || | ||
528 | length == -EAGAIN || | ||
529 | length == -EINTR) { | ||
530 | msleep(1); /* minimum sleep to prevent looping, | ||
531 | allowing socket to clear and app | ||
532 | threads to set tcpStatus | ||
533 | CifsNeedReconnect if server hung*/ | ||
534 | length = 0; | ||
535 | continue; | ||
536 | } else if (length <= 0) { | ||
537 | cERROR(1, "Received no data, expecting %d", | ||
538 | pdu_length - total_read); | ||
539 | cifs_reconnect(server); | ||
540 | csocket = server->ssocket; | ||
541 | reconnect = 1; | ||
542 | break; | ||
543 | } | ||
544 | } | ||
545 | if (reconnect == 2) | ||
546 | break; | 722 | break; |
547 | else if (reconnect == 1) | 723 | else if (rc == 1) |
548 | continue; | 724 | continue; |
549 | 725 | ||
550 | total_read += 4; /* account for rfc1002 hdr */ | 726 | total_read += 4; /* account for rfc1002 hdr */ |
@@ -562,75 +738,13 @@ incomplete_rcv: | |||
562 | */ | 738 | */ |
563 | length = checkSMB(smb_buffer, smb_buffer->Mid, total_read); | 739 | length = checkSMB(smb_buffer, smb_buffer->Mid, total_read); |
564 | if (length != 0) | 740 | if (length != 0) |
565 | cifs_dump_mem("Bad SMB: ", smb_buffer, | 741 | cifs_dump_mem("Bad SMB: ", buf, |
566 | min_t(unsigned int, total_read, 48)); | 742 | min_t(unsigned int, total_read, 48)); |
567 | 743 | ||
568 | mid_entry = NULL; | ||
569 | server->lstrp = jiffies; | 744 | server->lstrp = jiffies; |
570 | 745 | ||
571 | spin_lock(&GlobalMid_Lock); | 746 | mid_entry = find_cifs_mid(server, smb_buffer, &length, |
572 | list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { | 747 | isLargeBuf, &isMultiRsp, &bigbuf); |
573 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | ||
574 | |||
575 | if (mid_entry->mid != smb_buffer->Mid || | ||
576 | mid_entry->midState != MID_REQUEST_SUBMITTED || | ||
577 | mid_entry->command != smb_buffer->Command) { | ||
578 | mid_entry = NULL; | ||
579 | continue; | ||
580 | } | ||
581 | |||
582 | if (length == 0 && | ||
583 | check2ndT2(smb_buffer, server->maxBuf) > 0) { | ||
584 | /* We have a multipart transact2 resp */ | ||
585 | isMultiRsp = true; | ||
586 | if (mid_entry->resp_buf) { | ||
587 | /* merge response - fix up 1st*/ | ||
588 | length = coalesce_t2(smb_buffer, | ||
589 | mid_entry->resp_buf); | ||
590 | if (length > 0) { | ||
591 | length = 0; | ||
592 | mid_entry->multiRsp = true; | ||
593 | break; | ||
594 | } else { | ||
595 | /* all parts received or | ||
596 | * packet is malformed | ||
597 | */ | ||
598 | mid_entry->multiEnd = true; | ||
599 | goto multi_t2_fnd; | ||
600 | } | ||
601 | } else { | ||
602 | if (!isLargeBuf) { | ||
603 | /* | ||
604 | * FIXME: switch to already | ||
605 | * allocated largebuf? | ||
606 | */ | ||
607 | cERROR(1, "1st trans2 resp " | ||
608 | "needs bigbuf"); | ||
609 | } else { | ||
610 | /* Have first buffer */ | ||
611 | mid_entry->resp_buf = | ||
612 | smb_buffer; | ||
613 | mid_entry->largeBuf = true; | ||
614 | bigbuf = NULL; | ||
615 | } | ||
616 | } | ||
617 | break; | ||
618 | } | ||
619 | mid_entry->resp_buf = smb_buffer; | ||
620 | mid_entry->largeBuf = isLargeBuf; | ||
621 | multi_t2_fnd: | ||
622 | if (length == 0) | ||
623 | mid_entry->midState = MID_RESPONSE_RECEIVED; | ||
624 | else | ||
625 | mid_entry->midState = MID_RESPONSE_MALFORMED; | ||
626 | #ifdef CONFIG_CIFS_STATS2 | ||
627 | mid_entry->when_received = jiffies; | ||
628 | #endif | ||
629 | list_del_init(&mid_entry->qhead); | ||
630 | break; | ||
631 | } | ||
632 | spin_unlock(&GlobalMid_Lock); | ||
633 | |||
634 | if (mid_entry != NULL) { | 748 | if (mid_entry != NULL) { |
635 | mid_entry->callback(mid_entry); | 749 | mid_entry->callback(mid_entry); |
636 | /* Was previous buf put in mpx struct for multi-rsp? */ | 750 | /* Was previous buf put in mpx struct for multi-rsp? */ |
@@ -648,7 +762,7 @@ multi_t2_fnd: | |||
648 | !isMultiRsp) { | 762 | !isMultiRsp) { |
649 | cERROR(1, "No task to wake, unknown frame received! " | 763 | cERROR(1, "No task to wake, unknown frame received! " |
650 | "NumMids %d", atomic_read(&midCount)); | 764 | "NumMids %d", atomic_read(&midCount)); |
651 | cifs_dump_mem("Received Data is: ", (char *)smb_buffer, | 765 | cifs_dump_mem("Received Data is: ", buf, |
652 | sizeof(struct smb_hdr)); | 766 | sizeof(struct smb_hdr)); |
653 | #ifdef CONFIG_CIFS_DEBUG2 | 767 | #ifdef CONFIG_CIFS_DEBUG2 |
654 | cifs_dump_detail(smb_buffer); | 768 | cifs_dump_detail(smb_buffer); |
@@ -658,88 +772,13 @@ multi_t2_fnd: | |||
658 | } | 772 | } |
659 | } /* end while !EXITING */ | 773 | } /* end while !EXITING */ |
660 | 774 | ||
661 | /* take it off the list, if it's not already */ | ||
662 | spin_lock(&cifs_tcp_ses_lock); | ||
663 | list_del_init(&server->tcp_ses_list); | ||
664 | spin_unlock(&cifs_tcp_ses_lock); | ||
665 | |||
666 | spin_lock(&GlobalMid_Lock); | ||
667 | server->tcpStatus = CifsExiting; | ||
668 | spin_unlock(&GlobalMid_Lock); | ||
669 | wake_up_all(&server->response_q); | ||
670 | |||
671 | /* check if we have blocked requests that need to free */ | ||
672 | /* Note that cifs_max_pending is normally 50, but | ||
673 | can be set at module install time to as little as two */ | ||
674 | spin_lock(&GlobalMid_Lock); | ||
675 | if (atomic_read(&server->inFlight) >= cifs_max_pending) | ||
676 | atomic_set(&server->inFlight, cifs_max_pending - 1); | ||
677 | /* We do not want to set the max_pending too low or we | ||
678 | could end up with the counter going negative */ | ||
679 | spin_unlock(&GlobalMid_Lock); | ||
680 | /* Although there should not be any requests blocked on | ||
681 | this queue it can not hurt to be paranoid and try to wake up requests | ||
682 | that may haven been blocked when more than 50 at time were on the wire | ||
683 | to the same server - they now will see the session is in exit state | ||
684 | and get out of SendReceive. */ | ||
685 | wake_up_all(&server->request_q); | ||
686 | /* give those requests time to exit */ | ||
687 | msleep(125); | ||
688 | |||
689 | if (server->ssocket) { | ||
690 | sock_release(csocket); | ||
691 | server->ssocket = NULL; | ||
692 | } | ||
693 | /* buffer usually freed in free_mid - need to free it here on exit */ | 775 | /* buffer usually freed in free_mid - need to free it here on exit */ |
694 | cifs_buf_release(bigbuf); | 776 | cifs_buf_release(bigbuf); |
695 | if (smallbuf) /* no sense logging a debug message if NULL */ | 777 | if (smallbuf) /* no sense logging a debug message if NULL */ |
696 | cifs_small_buf_release(smallbuf); | 778 | cifs_small_buf_release(smallbuf); |
697 | 779 | ||
698 | if (!list_empty(&server->pending_mid_q)) { | ||
699 | struct list_head dispose_list; | ||
700 | |||
701 | INIT_LIST_HEAD(&dispose_list); | ||
702 | spin_lock(&GlobalMid_Lock); | ||
703 | list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { | ||
704 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | ||
705 | cFYI(1, "Clearing mid 0x%x", mid_entry->mid); | ||
706 | mid_entry->midState = MID_SHUTDOWN; | ||
707 | list_move(&mid_entry->qhead, &dispose_list); | ||
708 | } | ||
709 | spin_unlock(&GlobalMid_Lock); | ||
710 | |||
711 | /* now walk dispose list and issue callbacks */ | ||
712 | list_for_each_safe(tmp, tmp2, &dispose_list) { | ||
713 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | ||
714 | cFYI(1, "Callback mid 0x%x", mid_entry->mid); | ||
715 | list_del_init(&mid_entry->qhead); | ||
716 | mid_entry->callback(mid_entry); | ||
717 | } | ||
718 | /* 1/8th of sec is more than enough time for them to exit */ | ||
719 | msleep(125); | ||
720 | } | ||
721 | |||
722 | if (!list_empty(&server->pending_mid_q)) { | ||
723 | /* mpx threads have not exited yet give them | ||
724 | at least the smb send timeout time for long ops */ | ||
725 | /* due to delays on oplock break requests, we need | ||
726 | to wait at least 45 seconds before giving up | ||
727 | on a request getting a response and going ahead | ||
728 | and killing cifsd */ | ||
729 | cFYI(1, "Wait for exit from demultiplex thread"); | ||
730 | msleep(46000); | ||
731 | /* if threads still have not exited they are probably never | ||
732 | coming home not much else we can do but free the memory */ | ||
733 | } | ||
734 | |||
735 | kfree(server->hostname); | ||
736 | task_to_wake = xchg(&server->tsk, NULL); | 780 | task_to_wake = xchg(&server->tsk, NULL); |
737 | kfree(server); | 781 | clean_demultiplex_info(server); |
738 | |||
739 | length = atomic_dec_return(&tcpSesAllocCount); | ||
740 | if (length > 0) | ||
741 | mempool_resize(cifs_req_poolp, length + cifs_min_rcv, | ||
742 | GFP_KERNEL); | ||
743 | 782 | ||
744 | /* if server->tsk was NULL then wait for a signal before exiting */ | 783 | /* if server->tsk was NULL then wait for a signal before exiting */ |
745 | if (!task_to_wake) { | 784 | if (!task_to_wake) { |
@@ -2839,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) | |||
2839 | kfree(volume_info->username); | 2878 | kfree(volume_info->username); |
2840 | kzfree(volume_info->password); | 2879 | kzfree(volume_info->password); |
2841 | kfree(volume_info->UNC); | 2880 | kfree(volume_info->UNC); |
2842 | kfree(volume_info->UNCip); | 2881 | if (volume_info->UNCip != volume_info->UNC + 2) |
2882 | kfree(volume_info->UNCip); | ||
2843 | kfree(volume_info->domainname); | 2883 | kfree(volume_info->domainname); |
2844 | kfree(volume_info->iocharset); | 2884 | kfree(volume_info->iocharset); |
2845 | kfree(volume_info->prepath); | 2885 | kfree(volume_info->prepath); |
@@ -3193,15 +3233,9 @@ mount_fail_check: | |||
3193 | else | 3233 | else |
3194 | cifs_put_tcp_session(srvTcp); | 3234 | cifs_put_tcp_session(srvTcp); |
3195 | bdi_destroy(&cifs_sb->bdi); | 3235 | bdi_destroy(&cifs_sb->bdi); |
3196 | goto out; | ||
3197 | } | 3236 | } |
3198 | 3237 | ||
3199 | /* volume_info->password is freed above when existing session found | ||
3200 | (in which case it is not needed anymore) but when new sesion is created | ||
3201 | the password ptr is put in the new session structure (in which case the | ||
3202 | password will be freed at unmount time) */ | ||
3203 | out: | 3238 | out: |
3204 | /* zero out password before freeing */ | ||
3205 | FreeXid(xid); | 3239 | FreeXid(xid); |
3206 | return rc; | 3240 | return rc; |
3207 | } | 3241 | } |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 499f27fc8576..72d448bf96ce 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -57,11 +57,6 @@ build_path_from_dentry(struct dentry *direntry) | |||
57 | struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); | 57 | struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); |
58 | unsigned seq; | 58 | unsigned seq; |
59 | 59 | ||
60 | if (direntry == NULL) | ||
61 | return NULL; /* not much we can do if dentry is freed and | ||
62 | we need to reopen the file after it was closed implicitly | ||
63 | when the server crashed */ | ||
64 | |||
65 | dirsep = CIFS_DIR_SEP(cifs_sb); | 60 | dirsep = CIFS_DIR_SEP(cifs_sb); |
66 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) | 61 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) |
67 | dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); | 62 | dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); |
@@ -110,8 +105,8 @@ cifs_bp_rename_retry: | |||
110 | } | 105 | } |
111 | rcu_read_unlock(); | 106 | rcu_read_unlock(); |
112 | if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { | 107 | if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { |
113 | cERROR(1, "did not end path lookup where expected namelen is %d", | 108 | cFYI(1, "did not end path lookup where expected. namelen=%d " |
114 | namelen); | 109 | "dfsplen=%d", namelen, dfsplen); |
115 | /* presumably this is only possible if racing with a rename | 110 | /* presumably this is only possible if racing with a rename |
116 | of one of the parent directories (we can not lock the dentries | 111 | of one of the parent directories (we can not lock the dentries |
117 | above us to prevent this, but retrying should be harmless) */ | 112 | above us to prevent this, but retrying should be harmless) */ |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 548f06230a6d..1d2d91d9bf65 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -79,8 +79,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
79 | /* Perform the upcall */ | 79 | /* Perform the upcall */ |
80 | rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); | 80 | rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); |
81 | if (rc < 0) | 81 | if (rc < 0) |
82 | cERROR(1, "%s: unable to resolve: %*.*s", | 82 | cFYI(1, "%s: unable to resolve: %*.*s", |
83 | __func__, len, len, hostname); | 83 | __func__, len, len, hostname); |
84 | else | 84 | else |
85 | cFYI(1, "%s: resolved: %*.*s to %s", | 85 | cFYI(1, "%s: resolved: %*.*s to %s", |
86 | __func__, len, len, hostname, *ip_addr); | 86 | __func__, len, len, hostname, *ip_addr); |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 378acdafa356..9f41a10523a1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -314,6 +314,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
314 | } | 314 | } |
315 | spin_unlock(&cifs_file_list_lock); | 315 | spin_unlock(&cifs_file_list_lock); |
316 | 316 | ||
317 | cancel_work_sync(&cifs_file->oplock_break); | ||
318 | |||
317 | if (!tcon->need_reconnect && !cifs_file->invalidHandle) { | 319 | if (!tcon->need_reconnect && !cifs_file->invalidHandle) { |
318 | int xid, rc; | 320 | int xid, rc; |
319 | 321 | ||
@@ -2418,31 +2420,6 @@ void cifs_oplock_break(struct work_struct *work) | |||
2418 | cinode->clientCanCacheRead ? 1 : 0); | 2420 | cinode->clientCanCacheRead ? 1 : 0); |
2419 | cFYI(1, "Oplock release rc = %d", rc); | 2421 | cFYI(1, "Oplock release rc = %d", rc); |
2420 | } | 2422 | } |
2421 | |||
2422 | /* | ||
2423 | * We might have kicked in before is_valid_oplock_break() | ||
2424 | * finished grabbing reference for us. Make sure it's done by | ||
2425 | * waiting for cifs_file_list_lock. | ||
2426 | */ | ||
2427 | spin_lock(&cifs_file_list_lock); | ||
2428 | spin_unlock(&cifs_file_list_lock); | ||
2429 | |||
2430 | cifs_oplock_break_put(cfile); | ||
2431 | } | ||
2432 | |||
2433 | /* must be called while holding cifs_file_list_lock */ | ||
2434 | void cifs_oplock_break_get(struct cifsFileInfo *cfile) | ||
2435 | { | ||
2436 | cifs_sb_active(cfile->dentry->d_sb); | ||
2437 | cifsFileInfo_get(cfile); | ||
2438 | } | ||
2439 | |||
2440 | void cifs_oplock_break_put(struct cifsFileInfo *cfile) | ||
2441 | { | ||
2442 | struct super_block *sb = cfile->dentry->d_sb; | ||
2443 | |||
2444 | cifsFileInfo_put(cfile); | ||
2445 | cifs_sb_deactive(sb); | ||
2446 | } | 2423 | } |
2447 | 2424 | ||
2448 | const struct address_space_operations cifs_addr_ops = { | 2425 | const struct address_space_operations cifs_addr_ops = { |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9b018c8334fa..a7b2dcd4a53e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, | |||
764 | if (full_path == NULL) | 764 | if (full_path == NULL) |
765 | return full_path; | 765 | return full_path; |
766 | 766 | ||
767 | if (dfsplen) { | 767 | if (dfsplen) |
768 | strncpy(full_path, tcon->treeName, dfsplen); | 768 | strncpy(full_path, tcon->treeName, dfsplen); |
769 | /* switch slash direction in prepath depending on whether | ||
770 | * windows or posix style path names | ||
771 | */ | ||
772 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { | ||
773 | int i; | ||
774 | for (i = 0; i < dfsplen; i++) { | ||
775 | if (full_path[i] == '\\') | ||
776 | full_path[i] = '/'; | ||
777 | } | ||
778 | } | ||
779 | } | ||
780 | strncpy(full_path + dfsplen, vol->prepath, pplen); | 769 | strncpy(full_path + dfsplen, vol->prepath, pplen); |
770 | convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); | ||
781 | full_path[dfsplen + pplen] = 0; /* add trailing null */ | 771 | full_path[dfsplen + pplen] = 0; /* add trailing null */ |
782 | return full_path; | 772 | return full_path; |
783 | } | 773 | } |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 556b1a0b54de..db3f18cdf024 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -74,8 +74,14 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) | |||
74 | cERROR(1, "%s: Could not init md5 shash\n", __func__); | 74 | cERROR(1, "%s: Could not init md5 shash\n", __func__); |
75 | goto symlink_hash_err; | 75 | goto symlink_hash_err; |
76 | } | 76 | } |
77 | crypto_shash_update(&sdescmd5->shash, link_str, link_len); | 77 | rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); |
78 | if (rc) { | ||
79 | cERROR(1, "%s: Could not update iwth link_str\n", __func__); | ||
80 | goto symlink_hash_err; | ||
81 | } | ||
78 | rc = crypto_shash_final(&sdescmd5->shash, md5_hash); | 82 | rc = crypto_shash_final(&sdescmd5->shash, md5_hash); |
83 | if (rc) | ||
84 | cERROR(1, "%s: Could not generate md5 hash\n", __func__); | ||
79 | 85 | ||
80 | symlink_hash_err: | 86 | symlink_hash_err: |
81 | crypto_free_shash(md5); | 87 | crypto_free_shash(md5); |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 03a1f491d39b..7c1693392598 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -585,15 +585,8 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
585 | 585 | ||
586 | cifs_set_oplock_level(pCifsInode, | 586 | cifs_set_oplock_level(pCifsInode, |
587 | pSMB->OplockLevel ? OPLOCK_READ : 0); | 587 | pSMB->OplockLevel ? OPLOCK_READ : 0); |
588 | /* | 588 | queue_work(system_nrt_wq, |
589 | * cifs_oplock_break_put() can't be called | 589 | &netfile->oplock_break); |
590 | * from here. Get reference after queueing | ||
591 | * succeeded. cifs_oplock_break() will | ||
592 | * synchronize using cifs_file_list_lock. | ||
593 | */ | ||
594 | if (queue_work(system_nrt_wq, | ||
595 | &netfile->oplock_break)) | ||
596 | cifs_oplock_break_get(netfile); | ||
597 | netfile->oplock_break_cancelled = false; | 590 | netfile->oplock_break_cancelled = false; |
598 | 591 | ||
599 | spin_unlock(&cifs_file_list_lock); | 592 | spin_unlock(&cifs_file_list_lock); |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 965a3af186a1..5de03ec20144 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Directory search handling | 4 | * Directory search handling |
5 | * | 5 | * |
6 | * Copyright (C) International Business Machines Corp., 2004, 2008 | 6 | * Copyright (C) International Business Machines Corp., 2004, 2008 |
7 | * Copyright (C) Red Hat, Inc., 2011 | ||
7 | * Author(s): Steve French (sfrench@us.ibm.com) | 8 | * Author(s): Steve French (sfrench@us.ibm.com) |
8 | * | 9 | * |
9 | * This library is free software; you can redistribute it and/or modify | 10 | * This library is free software; you can redistribute it and/or modify |
@@ -290,10 +291,10 @@ error_exit: | |||
290 | } | 291 | } |
291 | 292 | ||
292 | /* return length of unicode string in bytes */ | 293 | /* return length of unicode string in bytes */ |
293 | static int cifs_unicode_bytelen(char *str) | 294 | static int cifs_unicode_bytelen(const char *str) |
294 | { | 295 | { |
295 | int len; | 296 | int len; |
296 | __le16 *ustr = (__le16 *)str; | 297 | const __le16 *ustr = (const __le16 *)str; |
297 | 298 | ||
298 | for (len = 0; len <= PATH_MAX; len++) { | 299 | for (len = 0; len <= PATH_MAX; len++) { |
299 | if (ustr[len] == 0) | 300 | if (ustr[len] == 0) |
@@ -334,78 +335,128 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) | |||
334 | 335 | ||
335 | } | 336 | } |
336 | 337 | ||
338 | struct cifs_dirent { | ||
339 | const char *name; | ||
340 | size_t namelen; | ||
341 | u32 resume_key; | ||
342 | u64 ino; | ||
343 | }; | ||
344 | |||
345 | static void cifs_fill_dirent_unix(struct cifs_dirent *de, | ||
346 | const FILE_UNIX_INFO *info, bool is_unicode) | ||
347 | { | ||
348 | de->name = &info->FileName[0]; | ||
349 | if (is_unicode) | ||
350 | de->namelen = cifs_unicode_bytelen(de->name); | ||
351 | else | ||
352 | de->namelen = strnlen(de->name, PATH_MAX); | ||
353 | de->resume_key = info->ResumeKey; | ||
354 | de->ino = le64_to_cpu(info->basic.UniqueId); | ||
355 | } | ||
356 | |||
357 | static void cifs_fill_dirent_dir(struct cifs_dirent *de, | ||
358 | const FILE_DIRECTORY_INFO *info) | ||
359 | { | ||
360 | de->name = &info->FileName[0]; | ||
361 | de->namelen = le32_to_cpu(info->FileNameLength); | ||
362 | de->resume_key = info->FileIndex; | ||
363 | } | ||
364 | |||
365 | static void cifs_fill_dirent_full(struct cifs_dirent *de, | ||
366 | const FILE_FULL_DIRECTORY_INFO *info) | ||
367 | { | ||
368 | de->name = &info->FileName[0]; | ||
369 | de->namelen = le32_to_cpu(info->FileNameLength); | ||
370 | de->resume_key = info->FileIndex; | ||
371 | } | ||
372 | |||
373 | static void cifs_fill_dirent_search(struct cifs_dirent *de, | ||
374 | const SEARCH_ID_FULL_DIR_INFO *info) | ||
375 | { | ||
376 | de->name = &info->FileName[0]; | ||
377 | de->namelen = le32_to_cpu(info->FileNameLength); | ||
378 | de->resume_key = info->FileIndex; | ||
379 | de->ino = le64_to_cpu(info->UniqueId); | ||
380 | } | ||
381 | |||
382 | static void cifs_fill_dirent_both(struct cifs_dirent *de, | ||
383 | const FILE_BOTH_DIRECTORY_INFO *info) | ||
384 | { | ||
385 | de->name = &info->FileName[0]; | ||
386 | de->namelen = le32_to_cpu(info->FileNameLength); | ||
387 | de->resume_key = info->FileIndex; | ||
388 | } | ||
389 | |||
390 | static void cifs_fill_dirent_std(struct cifs_dirent *de, | ||
391 | const FIND_FILE_STANDARD_INFO *info) | ||
392 | { | ||
393 | de->name = &info->FileName[0]; | ||
394 | /* one byte length, no endianess conversion */ | ||
395 | de->namelen = info->FileNameLength; | ||
396 | de->resume_key = info->ResumeKey; | ||
397 | } | ||
398 | |||
399 | static int cifs_fill_dirent(struct cifs_dirent *de, const void *info, | ||
400 | u16 level, bool is_unicode) | ||
401 | { | ||
402 | memset(de, 0, sizeof(*de)); | ||
403 | |||
404 | switch (level) { | ||
405 | case SMB_FIND_FILE_UNIX: | ||
406 | cifs_fill_dirent_unix(de, info, is_unicode); | ||
407 | break; | ||
408 | case SMB_FIND_FILE_DIRECTORY_INFO: | ||
409 | cifs_fill_dirent_dir(de, info); | ||
410 | break; | ||
411 | case SMB_FIND_FILE_FULL_DIRECTORY_INFO: | ||
412 | cifs_fill_dirent_full(de, info); | ||
413 | break; | ||
414 | case SMB_FIND_FILE_ID_FULL_DIR_INFO: | ||
415 | cifs_fill_dirent_search(de, info); | ||
416 | break; | ||
417 | case SMB_FIND_FILE_BOTH_DIRECTORY_INFO: | ||
418 | cifs_fill_dirent_both(de, info); | ||
419 | break; | ||
420 | case SMB_FIND_FILE_INFO_STANDARD: | ||
421 | cifs_fill_dirent_std(de, info); | ||
422 | break; | ||
423 | default: | ||
424 | cFYI(1, "Unknown findfirst level %d", level); | ||
425 | return -EINVAL; | ||
426 | } | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
337 | #define UNICODE_DOT cpu_to_le16(0x2e) | 431 | #define UNICODE_DOT cpu_to_le16(0x2e) |
338 | 432 | ||
339 | /* return 0 if no match and 1 for . (current directory) and 2 for .. (parent) */ | 433 | /* return 0 if no match and 1 for . (current directory) and 2 for .. (parent) */ |
340 | static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) | 434 | static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode) |
341 | { | 435 | { |
342 | int rc = 0; | 436 | int rc = 0; |
343 | char *filename = NULL; | ||
344 | int len = 0; | ||
345 | |||
346 | if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) { | ||
347 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
348 | filename = &pFindData->FileName[0]; | ||
349 | if (cfile->srch_inf.unicode) { | ||
350 | len = cifs_unicode_bytelen(filename); | ||
351 | } else { | ||
352 | /* BB should we make this strnlen of PATH_MAX? */ | ||
353 | len = strnlen(filename, 5); | ||
354 | } | ||
355 | } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
356 | FILE_DIRECTORY_INFO *pFindData = | ||
357 | (FILE_DIRECTORY_INFO *)current_entry; | ||
358 | filename = &pFindData->FileName[0]; | ||
359 | len = le32_to_cpu(pFindData->FileNameLength); | ||
360 | } else if (cfile->srch_inf.info_level == | ||
361 | SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
362 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
363 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
364 | filename = &pFindData->FileName[0]; | ||
365 | len = le32_to_cpu(pFindData->FileNameLength); | ||
366 | } else if (cfile->srch_inf.info_level == | ||
367 | SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
368 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
369 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
370 | filename = &pFindData->FileName[0]; | ||
371 | len = le32_to_cpu(pFindData->FileNameLength); | ||
372 | } else if (cfile->srch_inf.info_level == | ||
373 | SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
374 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
375 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
376 | filename = &pFindData->FileName[0]; | ||
377 | len = le32_to_cpu(pFindData->FileNameLength); | ||
378 | } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) { | ||
379 | FIND_FILE_STANDARD_INFO *pFindData = | ||
380 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
381 | filename = &pFindData->FileName[0]; | ||
382 | len = pFindData->FileNameLength; | ||
383 | } else { | ||
384 | cFYI(1, "Unknown findfirst level %d", | ||
385 | cfile->srch_inf.info_level); | ||
386 | } | ||
387 | 437 | ||
388 | if (filename) { | 438 | if (!de->name) |
389 | if (cfile->srch_inf.unicode) { | 439 | return 0; |
390 | __le16 *ufilename = (__le16 *)filename; | 440 | |
391 | if (len == 2) { | 441 | if (is_unicode) { |
392 | /* check for . */ | 442 | __le16 *ufilename = (__le16 *)de->name; |
393 | if (ufilename[0] == UNICODE_DOT) | 443 | if (de->namelen == 2) { |
394 | rc = 1; | 444 | /* check for . */ |
395 | } else if (len == 4) { | 445 | if (ufilename[0] == UNICODE_DOT) |
396 | /* check for .. */ | 446 | rc = 1; |
397 | if ((ufilename[0] == UNICODE_DOT) | 447 | } else if (de->namelen == 4) { |
398 | && (ufilename[1] == UNICODE_DOT)) | 448 | /* check for .. */ |
399 | rc = 2; | 449 | if (ufilename[0] == UNICODE_DOT && |
400 | } | 450 | ufilename[1] == UNICODE_DOT) |
401 | } else /* ASCII */ { | 451 | rc = 2; |
402 | if (len == 1) { | 452 | } |
403 | if (filename[0] == '.') | 453 | } else /* ASCII */ { |
404 | rc = 1; | 454 | if (de->namelen == 1) { |
405 | } else if (len == 2) { | 455 | if (de->name[0] == '.') |
406 | if ((filename[0] == '.') && (filename[1] == '.')) | 456 | rc = 1; |
407 | rc = 2; | 457 | } else if (de->namelen == 2) { |
408 | } | 458 | if (de->name[0] == '.' && de->name[1] == '.') |
459 | rc = 2; | ||
409 | } | 460 | } |
410 | } | 461 | } |
411 | 462 | ||
@@ -427,66 +478,18 @@ static int is_dir_changed(struct file *file) | |||
427 | } | 478 | } |
428 | 479 | ||
429 | static int cifs_save_resume_key(const char *current_entry, | 480 | static int cifs_save_resume_key(const char *current_entry, |
430 | struct cifsFileInfo *cifsFile) | 481 | struct cifsFileInfo *file_info) |
431 | { | 482 | { |
432 | int rc = 0; | 483 | struct cifs_dirent de; |
433 | unsigned int len = 0; | 484 | int rc; |
434 | __u16 level; | ||
435 | char *filename; | ||
436 | |||
437 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
438 | return -EINVAL; | ||
439 | |||
440 | level = cifsFile->srch_inf.info_level; | ||
441 | |||
442 | if (level == SMB_FIND_FILE_UNIX) { | ||
443 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
444 | 485 | ||
445 | filename = &pFindData->FileName[0]; | 486 | rc = cifs_fill_dirent(&de, current_entry, file_info->srch_inf.info_level, |
446 | if (cifsFile->srch_inf.unicode) { | 487 | file_info->srch_inf.unicode); |
447 | len = cifs_unicode_bytelen(filename); | 488 | if (!rc) { |
448 | } else { | 489 | file_info->srch_inf.presume_name = de.name; |
449 | /* BB should we make this strnlen of PATH_MAX? */ | 490 | file_info->srch_inf.resume_name_len = de.namelen; |
450 | len = strnlen(filename, PATH_MAX); | 491 | file_info->srch_inf.resume_key = de.resume_key; |
451 | } | ||
452 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
453 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
454 | FILE_DIRECTORY_INFO *pFindData = | ||
455 | (FILE_DIRECTORY_INFO *)current_entry; | ||
456 | filename = &pFindData->FileName[0]; | ||
457 | len = le32_to_cpu(pFindData->FileNameLength); | ||
458 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
459 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
460 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
461 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
462 | filename = &pFindData->FileName[0]; | ||
463 | len = le32_to_cpu(pFindData->FileNameLength); | ||
464 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
465 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
466 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
467 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
468 | filename = &pFindData->FileName[0]; | ||
469 | len = le32_to_cpu(pFindData->FileNameLength); | ||
470 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
471 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
472 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
473 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
474 | filename = &pFindData->FileName[0]; | ||
475 | len = le32_to_cpu(pFindData->FileNameLength); | ||
476 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
477 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
478 | FIND_FILE_STANDARD_INFO *pFindData = | ||
479 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
480 | filename = &pFindData->FileName[0]; | ||
481 | /* one byte length, no name conversion */ | ||
482 | len = (unsigned int)pFindData->FileNameLength; | ||
483 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
484 | } else { | ||
485 | cFYI(1, "Unknown findfirst level %d", level); | ||
486 | return -EINVAL; | ||
487 | } | 492 | } |
488 | cifsFile->srch_inf.resume_name_len = len; | ||
489 | cifsFile->srch_inf.presume_name = filename; | ||
490 | return rc; | 493 | return rc; |
491 | } | 494 | } |
492 | 495 | ||
@@ -605,136 +608,70 @@ static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon, | |||
605 | return rc; | 608 | return rc; |
606 | } | 609 | } |
607 | 610 | ||
608 | /* inode num, inode type and filename returned */ | 611 | static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, |
609 | static int cifs_get_name_from_search_buf(struct qstr *pqst, | 612 | void *dirent, char *scratch_buf, unsigned int max_len) |
610 | char *current_entry, __u16 level, unsigned int unicode, | ||
611 | struct cifs_sb_info *cifs_sb, unsigned int max_len, __u64 *pinum) | ||
612 | { | 613 | { |
614 | struct cifsFileInfo *file_info = file->private_data; | ||
615 | struct super_block *sb = file->f_path.dentry->d_sb; | ||
616 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
617 | struct cifs_dirent de = { NULL, }; | ||
618 | struct cifs_fattr fattr; | ||
619 | struct dentry *dentry; | ||
620 | struct qstr name; | ||
613 | int rc = 0; | 621 | int rc = 0; |
614 | unsigned int len = 0; | 622 | ino_t ino; |
615 | char *filename; | ||
616 | struct nls_table *nlt = cifs_sb->local_nls; | ||
617 | |||
618 | *pinum = 0; | ||
619 | |||
620 | if (level == SMB_FIND_FILE_UNIX) { | ||
621 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
622 | |||
623 | filename = &pFindData->FileName[0]; | ||
624 | if (unicode) { | ||
625 | len = cifs_unicode_bytelen(filename); | ||
626 | } else { | ||
627 | /* BB should we make this strnlen of PATH_MAX? */ | ||
628 | len = strnlen(filename, PATH_MAX); | ||
629 | } | ||
630 | 623 | ||
631 | *pinum = le64_to_cpu(pFindData->basic.UniqueId); | 624 | rc = cifs_fill_dirent(&de, find_entry, file_info->srch_inf.info_level, |
632 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | 625 | file_info->srch_inf.unicode); |
633 | FILE_DIRECTORY_INFO *pFindData = | 626 | if (rc) |
634 | (FILE_DIRECTORY_INFO *)current_entry; | 627 | return rc; |
635 | filename = &pFindData->FileName[0]; | ||
636 | len = le32_to_cpu(pFindData->FileNameLength); | ||
637 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
638 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
639 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
640 | filename = &pFindData->FileName[0]; | ||
641 | len = le32_to_cpu(pFindData->FileNameLength); | ||
642 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
643 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
644 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
645 | filename = &pFindData->FileName[0]; | ||
646 | len = le32_to_cpu(pFindData->FileNameLength); | ||
647 | *pinum = le64_to_cpu(pFindData->UniqueId); | ||
648 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
649 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
650 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
651 | filename = &pFindData->FileName[0]; | ||
652 | len = le32_to_cpu(pFindData->FileNameLength); | ||
653 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
654 | FIND_FILE_STANDARD_INFO *pFindData = | ||
655 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
656 | filename = &pFindData->FileName[0]; | ||
657 | /* one byte length, no name conversion */ | ||
658 | len = (unsigned int)pFindData->FileNameLength; | ||
659 | } else { | ||
660 | cFYI(1, "Unknown findfirst level %d", level); | ||
661 | return -EINVAL; | ||
662 | } | ||
663 | 628 | ||
664 | if (len > max_len) { | 629 | if (de.namelen > max_len) { |
665 | cERROR(1, "bad search response length %d past smb end", len); | 630 | cERROR(1, "bad search response length %zd past smb end", |
631 | de.namelen); | ||
666 | return -EINVAL; | 632 | return -EINVAL; |
667 | } | 633 | } |
668 | 634 | ||
669 | if (unicode) { | ||
670 | pqst->len = cifs_from_ucs2((char *) pqst->name, | ||
671 | (__le16 *) filename, | ||
672 | UNICODE_NAME_MAX, | ||
673 | min(len, max_len), nlt, | ||
674 | cifs_sb->mnt_cifs_flags & | ||
675 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
676 | pqst->len -= nls_nullsize(nlt); | ||
677 | } else { | ||
678 | pqst->name = filename; | ||
679 | pqst->len = len; | ||
680 | } | ||
681 | return rc; | ||
682 | } | ||
683 | |||
684 | static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, | ||
685 | void *direntry, char *scratch_buf, unsigned int max_len) | ||
686 | { | ||
687 | int rc = 0; | ||
688 | struct qstr qstring; | ||
689 | struct cifsFileInfo *pCifsF; | ||
690 | u64 inum; | ||
691 | ino_t ino; | ||
692 | struct super_block *sb; | ||
693 | struct cifs_sb_info *cifs_sb; | ||
694 | struct dentry *tmp_dentry; | ||
695 | struct cifs_fattr fattr; | ||
696 | |||
697 | /* get filename and len into qstring */ | ||
698 | /* get dentry */ | ||
699 | /* decide whether to create and populate ionde */ | ||
700 | if ((direntry == NULL) || (file == NULL)) | ||
701 | return -EINVAL; | ||
702 | |||
703 | pCifsF = file->private_data; | ||
704 | |||
705 | if ((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL)) | ||
706 | return -ENOENT; | ||
707 | |||
708 | rc = cifs_entry_is_dot(pfindEntry, pCifsF); | ||
709 | /* skip . and .. since we added them first */ | 635 | /* skip . and .. since we added them first */ |
710 | if (rc != 0) | 636 | if (cifs_entry_is_dot(&de, file_info->srch_inf.unicode)) |
711 | return 0; | 637 | return 0; |
712 | 638 | ||
713 | sb = file->f_path.dentry->d_sb; | 639 | if (file_info->srch_inf.unicode) { |
714 | cifs_sb = CIFS_SB(sb); | 640 | struct nls_table *nlt = cifs_sb->local_nls; |
715 | |||
716 | qstring.name = scratch_buf; | ||
717 | rc = cifs_get_name_from_search_buf(&qstring, pfindEntry, | ||
718 | pCifsF->srch_inf.info_level, | ||
719 | pCifsF->srch_inf.unicode, cifs_sb, | ||
720 | max_len, &inum /* returned */); | ||
721 | 641 | ||
722 | if (rc) | 642 | name.name = scratch_buf; |
723 | return rc; | 643 | name.len = |
644 | cifs_from_ucs2((char *)name.name, (__le16 *)de.name, | ||
645 | UNICODE_NAME_MAX, | ||
646 | min(de.namelen, (size_t)max_len), nlt, | ||
647 | cifs_sb->mnt_cifs_flags & | ||
648 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
649 | name.len -= nls_nullsize(nlt); | ||
650 | } else { | ||
651 | name.name = de.name; | ||
652 | name.len = de.namelen; | ||
653 | } | ||
724 | 654 | ||
725 | if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) | 655 | switch (file_info->srch_inf.info_level) { |
656 | case SMB_FIND_FILE_UNIX: | ||
726 | cifs_unix_basic_to_fattr(&fattr, | 657 | cifs_unix_basic_to_fattr(&fattr, |
727 | &((FILE_UNIX_INFO *) pfindEntry)->basic, | 658 | &((FILE_UNIX_INFO *)find_entry)->basic, |
728 | cifs_sb); | 659 | cifs_sb); |
729 | else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) | 660 | break; |
730 | cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *) | 661 | case SMB_FIND_FILE_INFO_STANDARD: |
731 | pfindEntry, cifs_sb); | 662 | cifs_std_info_to_fattr(&fattr, |
732 | else | 663 | (FIND_FILE_STANDARD_INFO *)find_entry, |
733 | cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *) | 664 | cifs_sb); |
734 | pfindEntry, cifs_sb); | 665 | break; |
666 | default: | ||
667 | cifs_dir_info_to_fattr(&fattr, | ||
668 | (FILE_DIRECTORY_INFO *)find_entry, | ||
669 | cifs_sb); | ||
670 | break; | ||
671 | } | ||
735 | 672 | ||
736 | if (inum && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { | 673 | if (de.ino && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { |
737 | fattr.cf_uniqueid = inum; | 674 | fattr.cf_uniqueid = de.ino; |
738 | } else { | 675 | } else { |
739 | fattr.cf_uniqueid = iunique(sb, ROOT_I); | 676 | fattr.cf_uniqueid = iunique(sb, ROOT_I); |
740 | cifs_autodisable_serverino(cifs_sb); | 677 | cifs_autodisable_serverino(cifs_sb); |
@@ -750,12 +687,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, | |||
750 | fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; | 687 | fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; |
751 | 688 | ||
752 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); | 689 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); |
753 | tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr); | 690 | dentry = cifs_readdir_lookup(file->f_dentry, &name, &fattr); |
754 | 691 | ||
755 | rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, | 692 | rc = filldir(dirent, name.name, name.len, file->f_pos, ino, |
756 | ino, fattr.cf_dtype); | 693 | fattr.cf_dtype); |
757 | 694 | ||
758 | dput(tmp_dentry); | 695 | dput(dentry); |
759 | return rc; | 696 | return rc; |
760 | } | 697 | } |
761 | 698 | ||
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 1c5b770c3141..42b9fff48751 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c | |||
@@ -157,8 +157,14 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) | |||
157 | cERROR(1, "%s: Could not init md4 shash\n", __func__); | 157 | cERROR(1, "%s: Could not init md4 shash\n", __func__); |
158 | goto mdfour_err; | 158 | goto mdfour_err; |
159 | } | 159 | } |
160 | crypto_shash_update(&sdescmd4->shash, link_str, link_len); | 160 | rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len); |
161 | if (rc) { | ||
162 | cERROR(1, "%s: Could not update with link_str\n", __func__); | ||
163 | goto mdfour_err; | ||
164 | } | ||
161 | rc = crypto_shash_final(&sdescmd4->shash, md4_hash); | 165 | rc = crypto_shash_final(&sdescmd4->shash, md4_hash); |
166 | if (rc) | ||
167 | cERROR(1, "%s: Could not genereate md4 hash\n", __func__); | ||
162 | 168 | ||
163 | mdfour_err: | 169 | mdfour_err: |
164 | crypto_free_shash(md4); | 170 | crypto_free_shash(md4); |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 147aa22c3c3a..10ca6b2c26b7 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -266,15 +266,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server, | |||
266 | while (1) { | 266 | while (1) { |
267 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { | 267 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { |
268 | spin_unlock(&GlobalMid_Lock); | 268 | spin_unlock(&GlobalMid_Lock); |
269 | #ifdef CONFIG_CIFS_STATS2 | 269 | cifs_num_waiters_inc(server); |
270 | atomic_inc(&server->num_waiters); | ||
271 | #endif | ||
272 | wait_event(server->request_q, | 270 | wait_event(server->request_q, |
273 | atomic_read(&server->inFlight) | 271 | atomic_read(&server->inFlight) |
274 | < cifs_max_pending); | 272 | < cifs_max_pending); |
275 | #ifdef CONFIG_CIFS_STATS2 | 273 | cifs_num_waiters_dec(server); |
276 | atomic_dec(&server->num_waiters); | ||
277 | #endif | ||
278 | spin_lock(&GlobalMid_Lock); | 274 | spin_lock(&GlobalMid_Lock); |
279 | } else { | 275 | } else { |
280 | if (server->tcpStatus == CifsExiting) { | 276 | if (server->tcpStatus == CifsExiting) { |
@@ -362,6 +358,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
362 | mid = AllocMidQEntry(hdr, server); | 358 | mid = AllocMidQEntry(hdr, server); |
363 | if (mid == NULL) { | 359 | if (mid == NULL) { |
364 | mutex_unlock(&server->srv_mutex); | 360 | mutex_unlock(&server->srv_mutex); |
361 | atomic_dec(&server->inFlight); | ||
362 | wake_up(&server->request_q); | ||
365 | return -ENOMEM; | 363 | return -ENOMEM; |
366 | } | 364 | } |
367 | 365 | ||
@@ -379,15 +377,13 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, | |||
379 | mid->callback = callback; | 377 | mid->callback = callback; |
380 | mid->callback_data = cbdata; | 378 | mid->callback_data = cbdata; |
381 | mid->midState = MID_REQUEST_SUBMITTED; | 379 | mid->midState = MID_REQUEST_SUBMITTED; |
382 | #ifdef CONFIG_CIFS_STATS2 | 380 | |
383 | atomic_inc(&server->inSend); | 381 | cifs_in_send_inc(server); |
384 | #endif | ||
385 | rc = smb_sendv(server, iov, nvec); | 382 | rc = smb_sendv(server, iov, nvec); |
386 | #ifdef CONFIG_CIFS_STATS2 | 383 | cifs_in_send_dec(server); |
387 | atomic_dec(&server->inSend); | 384 | cifs_save_when_sent(mid); |
388 | mid->when_sent = jiffies; | ||
389 | #endif | ||
390 | mutex_unlock(&server->srv_mutex); | 385 | mutex_unlock(&server->srv_mutex); |
386 | |||
391 | if (rc) | 387 | if (rc) |
392 | goto out_err; | 388 | goto out_err; |
393 | 389 | ||
@@ -573,14 +569,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, | |||
573 | } | 569 | } |
574 | 570 | ||
575 | midQ->midState = MID_REQUEST_SUBMITTED; | 571 | midQ->midState = MID_REQUEST_SUBMITTED; |
576 | #ifdef CONFIG_CIFS_STATS2 | 572 | cifs_in_send_inc(ses->server); |
577 | atomic_inc(&ses->server->inSend); | ||
578 | #endif | ||
579 | rc = smb_sendv(ses->server, iov, n_vec); | 573 | rc = smb_sendv(ses->server, iov, n_vec); |
580 | #ifdef CONFIG_CIFS_STATS2 | 574 | cifs_in_send_dec(ses->server); |
581 | atomic_dec(&ses->server->inSend); | 575 | cifs_save_when_sent(midQ); |
582 | midQ->when_sent = jiffies; | ||
583 | #endif | ||
584 | 576 | ||
585 | mutex_unlock(&ses->server->srv_mutex); | 577 | mutex_unlock(&ses->server->srv_mutex); |
586 | 578 | ||
@@ -701,14 +693,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, | |||
701 | } | 693 | } |
702 | 694 | ||
703 | midQ->midState = MID_REQUEST_SUBMITTED; | 695 | midQ->midState = MID_REQUEST_SUBMITTED; |
704 | #ifdef CONFIG_CIFS_STATS2 | 696 | |
705 | atomic_inc(&ses->server->inSend); | 697 | cifs_in_send_inc(ses->server); |
706 | #endif | ||
707 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); | 698 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); |
708 | #ifdef CONFIG_CIFS_STATS2 | 699 | cifs_in_send_dec(ses->server); |
709 | atomic_dec(&ses->server->inSend); | 700 | cifs_save_when_sent(midQ); |
710 | midQ->when_sent = jiffies; | ||
711 | #endif | ||
712 | mutex_unlock(&ses->server->srv_mutex); | 701 | mutex_unlock(&ses->server->srv_mutex); |
713 | 702 | ||
714 | if (rc < 0) | 703 | if (rc < 0) |
@@ -841,14 +830,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, | |||
841 | } | 830 | } |
842 | 831 | ||
843 | midQ->midState = MID_REQUEST_SUBMITTED; | 832 | midQ->midState = MID_REQUEST_SUBMITTED; |
844 | #ifdef CONFIG_CIFS_STATS2 | 833 | cifs_in_send_inc(ses->server); |
845 | atomic_inc(&ses->server->inSend); | ||
846 | #endif | ||
847 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); | 834 | rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); |
848 | #ifdef CONFIG_CIFS_STATS2 | 835 | cifs_in_send_dec(ses->server); |
849 | atomic_dec(&ses->server->inSend); | 836 | cifs_save_when_sent(midQ); |
850 | midQ->when_sent = jiffies; | ||
851 | #endif | ||
852 | mutex_unlock(&ses->server->srv_mutex); | 837 | mutex_unlock(&ses->server->srv_mutex); |
853 | 838 | ||
854 | if (rc < 0) { | 839 | if (rc < 0) { |
diff --git a/fs/compat.c b/fs/compat.c index 0b48d018e38a..58b1da459893 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1675,11 +1675,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | |||
1675 | } | 1675 | } |
1676 | #endif /* HAVE_SET_RESTORE_SIGMASK */ | 1676 | #endif /* HAVE_SET_RESTORE_SIGMASK */ |
1677 | 1677 | ||
1678 | long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) | ||
1679 | { | ||
1680 | return sys_ni_syscall(); | ||
1681 | } | ||
1682 | |||
1683 | #ifdef CONFIG_EPOLL | 1678 | #ifdef CONFIG_EPOLL |
1684 | 1679 | ||
1685 | #ifdef HAVE_SET_RESTORE_SIGMASK | 1680 | #ifdef HAVE_SET_RESTORE_SIGMASK |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 8be086e9abe4..51352de88ef1 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -1003,6 +1003,7 @@ COMPATIBLE_IOCTL(PPPIOCCONNECT) | |||
1003 | COMPATIBLE_IOCTL(PPPIOCDISCONN) | 1003 | COMPATIBLE_IOCTL(PPPIOCDISCONN) |
1004 | COMPATIBLE_IOCTL(PPPIOCATTCHAN) | 1004 | COMPATIBLE_IOCTL(PPPIOCATTCHAN) |
1005 | COMPATIBLE_IOCTL(PPPIOCGCHAN) | 1005 | COMPATIBLE_IOCTL(PPPIOCGCHAN) |
1006 | COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS) | ||
1006 | /* PPPOX */ | 1007 | /* PPPOX */ |
1007 | COMPATIBLE_IOCTL(PPPOEIOCSFWD) | 1008 | COMPATIBLE_IOCTL(PPPOEIOCSFWD) |
1008 | COMPATIBLE_IOCTL(PPPOEIOCDFWD) | 1009 | COMPATIBLE_IOCTL(PPPOEIOCDFWD) |
diff --git a/fs/dcache.c b/fs/dcache.c index be18598c7fd7..a88948b8bd17 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -301,6 +301,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) | |||
301 | return parent; | 301 | return parent; |
302 | } | 302 | } |
303 | 303 | ||
304 | /* | ||
305 | * Unhash a dentry without inserting an RCU walk barrier or checking that | ||
306 | * dentry->d_lock is locked. The caller must take care of that, if | ||
307 | * appropriate. | ||
308 | */ | ||
309 | static void __d_shrink(struct dentry *dentry) | ||
310 | { | ||
311 | if (!d_unhashed(dentry)) { | ||
312 | struct hlist_bl_head *b; | ||
313 | if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) | ||
314 | b = &dentry->d_sb->s_anon; | ||
315 | else | ||
316 | b = d_hash(dentry->d_parent, dentry->d_name.hash); | ||
317 | |||
318 | hlist_bl_lock(b); | ||
319 | __hlist_bl_del(&dentry->d_hash); | ||
320 | dentry->d_hash.pprev = NULL; | ||
321 | hlist_bl_unlock(b); | ||
322 | } | ||
323 | } | ||
324 | |||
304 | /** | 325 | /** |
305 | * d_drop - drop a dentry | 326 | * d_drop - drop a dentry |
306 | * @dentry: dentry to drop | 327 | * @dentry: dentry to drop |
@@ -319,17 +340,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) | |||
319 | void __d_drop(struct dentry *dentry) | 340 | void __d_drop(struct dentry *dentry) |
320 | { | 341 | { |
321 | if (!d_unhashed(dentry)) { | 342 | if (!d_unhashed(dentry)) { |
322 | struct hlist_bl_head *b; | 343 | __d_shrink(dentry); |
323 | if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) | ||
324 | b = &dentry->d_sb->s_anon; | ||
325 | else | ||
326 | b = d_hash(dentry->d_parent, dentry->d_name.hash); | ||
327 | |||
328 | hlist_bl_lock(b); | ||
329 | __hlist_bl_del(&dentry->d_hash); | ||
330 | dentry->d_hash.pprev = NULL; | ||
331 | hlist_bl_unlock(b); | ||
332 | |||
333 | dentry_rcuwalk_barrier(dentry); | 344 | dentry_rcuwalk_barrier(dentry); |
334 | } | 345 | } |
335 | } | 346 | } |
@@ -784,6 +795,7 @@ relock: | |||
784 | 795 | ||
785 | /** | 796 | /** |
786 | * prune_dcache_sb - shrink the dcache | 797 | * prune_dcache_sb - shrink the dcache |
798 | * @sb: superblock | ||
787 | * @nr_to_scan: number of entries to try to free | 799 | * @nr_to_scan: number of entries to try to free |
788 | * | 800 | * |
789 | * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is | 801 | * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is |
@@ -828,44 +840,24 @@ EXPORT_SYMBOL(shrink_dcache_sb); | |||
828 | static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | 840 | static void shrink_dcache_for_umount_subtree(struct dentry *dentry) |
829 | { | 841 | { |
830 | struct dentry *parent; | 842 | struct dentry *parent; |
831 | unsigned detached = 0; | ||
832 | 843 | ||
833 | BUG_ON(!IS_ROOT(dentry)); | 844 | BUG_ON(!IS_ROOT(dentry)); |
834 | 845 | ||
835 | /* detach this root from the system */ | ||
836 | spin_lock(&dentry->d_lock); | ||
837 | dentry_lru_del(dentry); | ||
838 | __d_drop(dentry); | ||
839 | spin_unlock(&dentry->d_lock); | ||
840 | |||
841 | for (;;) { | 846 | for (;;) { |
842 | /* descend to the first leaf in the current subtree */ | 847 | /* descend to the first leaf in the current subtree */ |
843 | while (!list_empty(&dentry->d_subdirs)) { | 848 | while (!list_empty(&dentry->d_subdirs)) |
844 | struct dentry *loop; | ||
845 | |||
846 | /* this is a branch with children - detach all of them | ||
847 | * from the system in one go */ | ||
848 | spin_lock(&dentry->d_lock); | ||
849 | list_for_each_entry(loop, &dentry->d_subdirs, | ||
850 | d_u.d_child) { | ||
851 | spin_lock_nested(&loop->d_lock, | ||
852 | DENTRY_D_LOCK_NESTED); | ||
853 | dentry_lru_del(loop); | ||
854 | __d_drop(loop); | ||
855 | spin_unlock(&loop->d_lock); | ||
856 | } | ||
857 | spin_unlock(&dentry->d_lock); | ||
858 | |||
859 | /* move to the first child */ | ||
860 | dentry = list_entry(dentry->d_subdirs.next, | 849 | dentry = list_entry(dentry->d_subdirs.next, |
861 | struct dentry, d_u.d_child); | 850 | struct dentry, d_u.d_child); |
862 | } | ||
863 | 851 | ||
864 | /* consume the dentries from this leaf up through its parents | 852 | /* consume the dentries from this leaf up through its parents |
865 | * until we find one with children or run out altogether */ | 853 | * until we find one with children or run out altogether */ |
866 | do { | 854 | do { |
867 | struct inode *inode; | 855 | struct inode *inode; |
868 | 856 | ||
857 | /* detach from the system */ | ||
858 | dentry_lru_del(dentry); | ||
859 | __d_shrink(dentry); | ||
860 | |||
869 | if (dentry->d_count != 0) { | 861 | if (dentry->d_count != 0) { |
870 | printk(KERN_ERR | 862 | printk(KERN_ERR |
871 | "BUG: Dentry %p{i=%lx,n=%s}" | 863 | "BUG: Dentry %p{i=%lx,n=%s}" |
@@ -886,14 +878,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
886 | list_del(&dentry->d_u.d_child); | 878 | list_del(&dentry->d_u.d_child); |
887 | } else { | 879 | } else { |
888 | parent = dentry->d_parent; | 880 | parent = dentry->d_parent; |
889 | spin_lock(&parent->d_lock); | ||
890 | parent->d_count--; | 881 | parent->d_count--; |
891 | list_del(&dentry->d_u.d_child); | 882 | list_del(&dentry->d_u.d_child); |
892 | spin_unlock(&parent->d_lock); | ||
893 | } | 883 | } |
894 | 884 | ||
895 | detached++; | ||
896 | |||
897 | inode = dentry->d_inode; | 885 | inode = dentry->d_inode; |
898 | if (inode) { | 886 | if (inode) { |
899 | dentry->d_inode = NULL; | 887 | dentry->d_inode = NULL; |
@@ -938,9 +926,7 @@ void shrink_dcache_for_umount(struct super_block *sb) | |||
938 | 926 | ||
939 | dentry = sb->s_root; | 927 | dentry = sb->s_root; |
940 | sb->s_root = NULL; | 928 | sb->s_root = NULL; |
941 | spin_lock(&dentry->d_lock); | ||
942 | dentry->d_count--; | 929 | dentry->d_count--; |
943 | spin_unlock(&dentry->d_lock); | ||
944 | shrink_dcache_for_umount_subtree(dentry); | 930 | shrink_dcache_for_umount_subtree(dentry); |
945 | 931 | ||
946 | while (!hlist_bl_empty(&sb->s_anon)) { | 932 | while (!hlist_bl_empty(&sb->s_anon)) { |
@@ -1743,7 +1729,7 @@ seqretry: | |||
1743 | */ | 1729 | */ |
1744 | if (read_seqcount_retry(&dentry->d_seq, *seq)) | 1730 | if (read_seqcount_retry(&dentry->d_seq, *seq)) |
1745 | goto seqretry; | 1731 | goto seqretry; |
1746 | if (parent->d_flags & DCACHE_OP_COMPARE) { | 1732 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { |
1747 | if (parent->d_op->d_compare(parent, *inode, | 1733 | if (parent->d_op->d_compare(parent, *inode, |
1748 | dentry, i, | 1734 | dentry, i, |
1749 | tlen, tname, name)) | 1735 | tlen, tname, name)) |
@@ -2138,8 +2124,9 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, | |||
2138 | * @target: new dentry | 2124 | * @target: new dentry |
2139 | * | 2125 | * |
2140 | * Update the dcache to reflect the move of a file name. Negative | 2126 | * Update the dcache to reflect the move of a file name. Negative |
2141 | * dcache entries should not be moved in this way. Caller hold | 2127 | * dcache entries should not be moved in this way. Caller must hold |
2142 | * rename_lock. | 2128 | * rename_lock, the i_mutex of the source and target directories, |
2129 | * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). | ||
2143 | */ | 2130 | */ |
2144 | static void __d_move(struct dentry * dentry, struct dentry * target) | 2131 | static void __d_move(struct dentry * dentry, struct dentry * target) |
2145 | { | 2132 | { |
@@ -2202,7 +2189,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) | |||
2202 | * @target: new dentry | 2189 | * @target: new dentry |
2203 | * | 2190 | * |
2204 | * Update the dcache to reflect the move of a file name. Negative | 2191 | * Update the dcache to reflect the move of a file name. Negative |
2205 | * dcache entries should not be moved in this way. | 2192 | * dcache entries should not be moved in this way. See the locking |
2193 | * requirements for __d_move. | ||
2206 | */ | 2194 | */ |
2207 | void d_move(struct dentry *dentry, struct dentry *target) | 2195 | void d_move(struct dentry *dentry, struct dentry *target) |
2208 | { | 2196 | { |
@@ -2320,7 +2308,8 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) | |||
2320 | * @inode: inode to bind to the dentry, to which aliases may be attached | 2308 | * @inode: inode to bind to the dentry, to which aliases may be attached |
2321 | * | 2309 | * |
2322 | * Introduces an dentry into the tree, substituting an extant disconnected | 2310 | * Introduces an dentry into the tree, substituting an extant disconnected |
2323 | * root directory alias in its place if there is one | 2311 | * root directory alias in its place if there is one. Caller must hold the |
2312 | * i_mutex of the parent directory. | ||
2324 | */ | 2313 | */ |
2325 | struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | 2314 | struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) |
2326 | { | 2315 | { |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 01d2d9ef609c..44a360ca8046 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include <linux/buffer_head.h> | 35 | #include <linux/buffer_head.h> |
36 | #include <linux/rwsem.h> | 36 | #include <linux/rwsem.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <asm/atomic.h> | 38 | #include <linux/atomic.h> |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * How many user pages to map in one call to get_user_pages(). This determines | 41 | * How many user pages to map in one call to get_user_pages(). This determines |
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index 1cd6d9d3e29a..cc16562654de 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config ECRYPT_FS | 1 | config ECRYPT_FS |
2 | tristate "eCrypt filesystem layer support (EXPERIMENTAL)" | 2 | tristate "eCrypt filesystem layer support (EXPERIMENTAL)" |
3 | depends on EXPERIMENTAL && KEYS && CRYPTO | 3 | depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) |
4 | select CRYPTO_ECB | 4 | select CRYPTO_ECB |
5 | select CRYPTO_CBC | 5 | select CRYPTO_CBC |
6 | select CRYPTO_MD5 | 6 | select CRYPTO_MD5 |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 43c7c43b06f5..b36c5572b3f3 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #define ECRYPTFS_KERNEL_H | 29 | #define ECRYPTFS_KERNEL_H |
30 | 30 | ||
31 | #include <keys/user-type.h> | 31 | #include <keys/user-type.h> |
32 | #include <keys/encrypted-type.h> | ||
32 | #include <linux/fs.h> | 33 | #include <linux/fs.h> |
33 | #include <linux/fs_stack.h> | 34 | #include <linux/fs_stack.h> |
34 | #include <linux/namei.h> | 35 | #include <linux/namei.h> |
@@ -36,125 +37,18 @@ | |||
36 | #include <linux/hash.h> | 37 | #include <linux/hash.h> |
37 | #include <linux/nsproxy.h> | 38 | #include <linux/nsproxy.h> |
38 | #include <linux/backing-dev.h> | 39 | #include <linux/backing-dev.h> |
40 | #include <linux/ecryptfs.h> | ||
39 | 41 | ||
40 | /* Version verification for shared data structures w/ userspace */ | ||
41 | #define ECRYPTFS_VERSION_MAJOR 0x00 | ||
42 | #define ECRYPTFS_VERSION_MINOR 0x04 | ||
43 | #define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 | ||
44 | /* These flags indicate which features are supported by the kernel | ||
45 | * module; userspace tools such as the mount helper read | ||
46 | * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine | ||
47 | * how to behave. */ | ||
48 | #define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 | ||
49 | #define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 | ||
50 | #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 | ||
51 | #define ECRYPTFS_VERSIONING_POLICY 0x00000008 | ||
52 | #define ECRYPTFS_VERSIONING_XATTR 0x00000010 | ||
53 | #define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 | ||
54 | #define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 | ||
55 | #define ECRYPTFS_VERSIONING_HMAC 0x00000080 | ||
56 | #define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 | ||
57 | #define ECRYPTFS_VERSIONING_GCM 0x00000200 | ||
58 | #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ | ||
59 | | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ | ||
60 | | ECRYPTFS_VERSIONING_PUBKEY \ | ||
61 | | ECRYPTFS_VERSIONING_XATTR \ | ||
62 | | ECRYPTFS_VERSIONING_MULTKEY \ | ||
63 | | ECRYPTFS_VERSIONING_DEVMISC \ | ||
64 | | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) | ||
65 | #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 | ||
66 | #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH | ||
67 | #define ECRYPTFS_SALT_SIZE 8 | ||
68 | #define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2) | ||
69 | /* The original signature size is only for what is stored on disk; all | ||
70 | * in-memory representations are expanded hex, so it better adapted to | ||
71 | * be passed around or referenced on the command line */ | ||
72 | #define ECRYPTFS_SIG_SIZE 8 | ||
73 | #define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2) | ||
74 | #define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX | ||
75 | #define ECRYPTFS_MAX_KEY_BYTES 64 | ||
76 | #define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 | ||
77 | #define ECRYPTFS_DEFAULT_IV_BYTES 16 | 42 | #define ECRYPTFS_DEFAULT_IV_BYTES 16 |
78 | #define ECRYPTFS_FILE_VERSION 0x03 | ||
79 | #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 | 43 | #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 |
80 | #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 | 44 | #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 |
81 | #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 | 45 | #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 |
82 | #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ | 46 | #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ |
83 | #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) | 47 | #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) |
84 | #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 | ||
85 | #define ECRYPTFS_DEFAULT_NUM_USERS 4 | 48 | #define ECRYPTFS_DEFAULT_NUM_USERS 4 |
86 | #define ECRYPTFS_MAX_NUM_USERS 32768 | 49 | #define ECRYPTFS_MAX_NUM_USERS 32768 |
87 | #define ECRYPTFS_XATTR_NAME "user.ecryptfs" | 50 | #define ECRYPTFS_XATTR_NAME "user.ecryptfs" |
88 | 51 | ||
89 | #define RFC2440_CIPHER_DES3_EDE 0x02 | ||
90 | #define RFC2440_CIPHER_CAST_5 0x03 | ||
91 | #define RFC2440_CIPHER_BLOWFISH 0x04 | ||
92 | #define RFC2440_CIPHER_AES_128 0x07 | ||
93 | #define RFC2440_CIPHER_AES_192 0x08 | ||
94 | #define RFC2440_CIPHER_AES_256 0x09 | ||
95 | #define RFC2440_CIPHER_TWOFISH 0x0a | ||
96 | #define RFC2440_CIPHER_CAST_6 0x0b | ||
97 | |||
98 | #define RFC2440_CIPHER_RSA 0x01 | ||
99 | |||
100 | /** | ||
101 | * For convenience, we may need to pass around the encrypted session | ||
102 | * key between kernel and userspace because the authentication token | ||
103 | * may not be extractable. For example, the TPM may not release the | ||
104 | * private key, instead requiring the encrypted data and returning the | ||
105 | * decrypted data. | ||
106 | */ | ||
107 | struct ecryptfs_session_key { | ||
108 | #define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001 | ||
109 | #define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002 | ||
110 | #define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004 | ||
111 | #define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008 | ||
112 | u32 flags; | ||
113 | u32 encrypted_key_size; | ||
114 | u32 decrypted_key_size; | ||
115 | u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; | ||
116 | u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES]; | ||
117 | }; | ||
118 | |||
119 | struct ecryptfs_password { | ||
120 | u32 password_bytes; | ||
121 | s32 hash_algo; | ||
122 | u32 hash_iterations; | ||
123 | u32 session_key_encryption_key_bytes; | ||
124 | #define ECRYPTFS_PERSISTENT_PASSWORD 0x01 | ||
125 | #define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02 | ||
126 | u32 flags; | ||
127 | /* Iterated-hash concatenation of salt and passphrase */ | ||
128 | u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; | ||
129 | u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; | ||
130 | /* Always in expanded hex */ | ||
131 | u8 salt[ECRYPTFS_SALT_SIZE]; | ||
132 | }; | ||
133 | |||
134 | enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY}; | ||
135 | |||
136 | struct ecryptfs_private_key { | ||
137 | u32 key_size; | ||
138 | u32 data_len; | ||
139 | u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; | ||
140 | char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1]; | ||
141 | u8 data[]; | ||
142 | }; | ||
143 | |||
144 | /* May be a password or a private key */ | ||
145 | struct ecryptfs_auth_tok { | ||
146 | u16 version; /* 8-bit major and 8-bit minor */ | ||
147 | u16 token_type; | ||
148 | #define ECRYPTFS_ENCRYPT_ONLY 0x00000001 | ||
149 | u32 flags; | ||
150 | struct ecryptfs_session_key session_key; | ||
151 | u8 reserved[32]; | ||
152 | union { | ||
153 | struct ecryptfs_password password; | ||
154 | struct ecryptfs_private_key private_key; | ||
155 | } token; | ||
156 | } __attribute__ ((packed)); | ||
157 | |||
158 | void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok); | 52 | void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok); |
159 | extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size); | 53 | extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size); |
160 | extern void ecryptfs_from_hex(char *dst, char *src, int dst_size); | 54 | extern void ecryptfs_from_hex(char *dst, char *src, int dst_size); |
@@ -185,11 +79,47 @@ struct ecryptfs_page_crypt_context { | |||
185 | } param; | 79 | } param; |
186 | }; | 80 | }; |
187 | 81 | ||
82 | #if defined(CONFIG_ENCRYPTED_KEYS) || defined(CONFIG_ENCRYPTED_KEYS_MODULE) | ||
83 | static inline struct ecryptfs_auth_tok * | ||
84 | ecryptfs_get_encrypted_key_payload_data(struct key *key) | ||
85 | { | ||
86 | if (key->type == &key_type_encrypted) | ||
87 | return (struct ecryptfs_auth_tok *) | ||
88 | (&((struct encrypted_key_payload *)key->payload.data)->payload_data); | ||
89 | else | ||
90 | return NULL; | ||
91 | } | ||
92 | |||
93 | static inline struct key *ecryptfs_get_encrypted_key(char *sig) | ||
94 | { | ||
95 | return request_key(&key_type_encrypted, sig, NULL); | ||
96 | } | ||
97 | |||
98 | #else | ||
99 | static inline struct ecryptfs_auth_tok * | ||
100 | ecryptfs_get_encrypted_key_payload_data(struct key *key) | ||
101 | { | ||
102 | return NULL; | ||
103 | } | ||
104 | |||
105 | static inline struct key *ecryptfs_get_encrypted_key(char *sig) | ||
106 | { | ||
107 | return ERR_PTR(-ENOKEY); | ||
108 | } | ||
109 | |||
110 | #endif /* CONFIG_ENCRYPTED_KEYS */ | ||
111 | |||
188 | static inline struct ecryptfs_auth_tok * | 112 | static inline struct ecryptfs_auth_tok * |
189 | ecryptfs_get_key_payload_data(struct key *key) | 113 | ecryptfs_get_key_payload_data(struct key *key) |
190 | { | 114 | { |
191 | return (struct ecryptfs_auth_tok *) | 115 | struct ecryptfs_auth_tok *auth_tok; |
192 | (((struct user_key_payload*)key->payload.data)->data); | 116 | |
117 | auth_tok = ecryptfs_get_encrypted_key_payload_data(key); | ||
118 | if (!auth_tok) | ||
119 | return (struct ecryptfs_auth_tok *) | ||
120 | (((struct user_key_payload *)key->payload.data)->data); | ||
121 | else | ||
122 | return auth_tok; | ||
193 | } | 123 | } |
194 | 124 | ||
195 | #define ECRYPTFS_MAX_KEYSET_SIZE 1024 | 125 | #define ECRYPTFS_MAX_KEYSET_SIZE 1024 |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 340c657a108c..11f8582d7218 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -69,6 +69,7 @@ static int ecryptfs_inode_set(struct inode *inode, void *opaque) | |||
69 | inode->i_ino = lower_inode->i_ino; | 69 | inode->i_ino = lower_inode->i_ino; |
70 | inode->i_version++; | 70 | inode->i_version++; |
71 | inode->i_mapping->a_ops = &ecryptfs_aops; | 71 | inode->i_mapping->a_ops = &ecryptfs_aops; |
72 | inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; | ||
72 | 73 | ||
73 | if (S_ISLNK(inode->i_mode)) | 74 | if (S_ISLNK(inode->i_mode)) |
74 | inode->i_op = &ecryptfs_symlink_iops; | 75 | inode->i_op = &ecryptfs_symlink_iops; |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index fa8049ecdc64..ac1ad48c2376 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -1635,11 +1635,14 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, | |||
1635 | 1635 | ||
1636 | (*auth_tok_key) = request_key(&key_type_user, sig, NULL); | 1636 | (*auth_tok_key) = request_key(&key_type_user, sig, NULL); |
1637 | if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { | 1637 | if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { |
1638 | printk(KERN_ERR "Could not find key with description: [%s]\n", | 1638 | (*auth_tok_key) = ecryptfs_get_encrypted_key(sig); |
1639 | sig); | 1639 | if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { |
1640 | rc = process_request_key_err(PTR_ERR(*auth_tok_key)); | 1640 | printk(KERN_ERR "Could not find key with description: [%s]\n", |
1641 | (*auth_tok_key) = NULL; | 1641 | sig); |
1642 | goto out; | 1642 | rc = process_request_key_err(PTR_ERR(*auth_tok_key)); |
1643 | (*auth_tok_key) = NULL; | ||
1644 | goto out; | ||
1645 | } | ||
1643 | } | 1646 | } |
1644 | down_write(&(*auth_tok_key)->sem); | 1647 | down_write(&(*auth_tok_key)->sem); |
1645 | rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok); | 1648 | rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok); |
@@ -1868,11 +1871,6 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, | |||
1868 | * just one will be sufficient to decrypt to get the FEK. */ | 1871 | * just one will be sufficient to decrypt to get the FEK. */ |
1869 | find_next_matching_auth_tok: | 1872 | find_next_matching_auth_tok: |
1870 | found_auth_tok = 0; | 1873 | found_auth_tok = 0; |
1871 | if (auth_tok_key) { | ||
1872 | up_write(&(auth_tok_key->sem)); | ||
1873 | key_put(auth_tok_key); | ||
1874 | auth_tok_key = NULL; | ||
1875 | } | ||
1876 | list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) { | 1874 | list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) { |
1877 | candidate_auth_tok = &auth_tok_list_item->auth_tok; | 1875 | candidate_auth_tok = &auth_tok_list_item->auth_tok; |
1878 | if (unlikely(ecryptfs_verbosity > 0)) { | 1876 | if (unlikely(ecryptfs_verbosity > 0)) { |
@@ -1909,14 +1907,22 @@ found_matching_auth_tok: | |||
1909 | memcpy(&(candidate_auth_tok->token.private_key), | 1907 | memcpy(&(candidate_auth_tok->token.private_key), |
1910 | &(matching_auth_tok->token.private_key), | 1908 | &(matching_auth_tok->token.private_key), |
1911 | sizeof(struct ecryptfs_private_key)); | 1909 | sizeof(struct ecryptfs_private_key)); |
1910 | up_write(&(auth_tok_key->sem)); | ||
1911 | key_put(auth_tok_key); | ||
1912 | rc = decrypt_pki_encrypted_session_key(candidate_auth_tok, | 1912 | rc = decrypt_pki_encrypted_session_key(candidate_auth_tok, |
1913 | crypt_stat); | 1913 | crypt_stat); |
1914 | } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) { | 1914 | } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) { |
1915 | memcpy(&(candidate_auth_tok->token.password), | 1915 | memcpy(&(candidate_auth_tok->token.password), |
1916 | &(matching_auth_tok->token.password), | 1916 | &(matching_auth_tok->token.password), |
1917 | sizeof(struct ecryptfs_password)); | 1917 | sizeof(struct ecryptfs_password)); |
1918 | up_write(&(auth_tok_key->sem)); | ||
1919 | key_put(auth_tok_key); | ||
1918 | rc = decrypt_passphrase_encrypted_session_key( | 1920 | rc = decrypt_passphrase_encrypted_session_key( |
1919 | candidate_auth_tok, crypt_stat); | 1921 | candidate_auth_tok, crypt_stat); |
1922 | } else { | ||
1923 | up_write(&(auth_tok_key->sem)); | ||
1924 | key_put(auth_tok_key); | ||
1925 | rc = -EINVAL; | ||
1920 | } | 1926 | } |
1921 | if (rc) { | 1927 | if (rc) { |
1922 | struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp; | 1928 | struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp; |
@@ -1956,21 +1962,18 @@ found_matching_auth_tok: | |||
1956 | out_wipe_list: | 1962 | out_wipe_list: |
1957 | wipe_auth_tok_list(&auth_tok_list); | 1963 | wipe_auth_tok_list(&auth_tok_list); |
1958 | out: | 1964 | out: |
1959 | if (auth_tok_key) { | ||
1960 | up_write(&(auth_tok_key->sem)); | ||
1961 | key_put(auth_tok_key); | ||
1962 | } | ||
1963 | return rc; | 1965 | return rc; |
1964 | } | 1966 | } |
1965 | 1967 | ||
1966 | static int | 1968 | static int |
1967 | pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, | 1969 | pki_encrypt_session_key(struct key *auth_tok_key, |
1970 | struct ecryptfs_auth_tok *auth_tok, | ||
1968 | struct ecryptfs_crypt_stat *crypt_stat, | 1971 | struct ecryptfs_crypt_stat *crypt_stat, |
1969 | struct ecryptfs_key_record *key_rec) | 1972 | struct ecryptfs_key_record *key_rec) |
1970 | { | 1973 | { |
1971 | struct ecryptfs_msg_ctx *msg_ctx = NULL; | 1974 | struct ecryptfs_msg_ctx *msg_ctx = NULL; |
1972 | char *payload = NULL; | 1975 | char *payload = NULL; |
1973 | size_t payload_len; | 1976 | size_t payload_len = 0; |
1974 | struct ecryptfs_message *msg; | 1977 | struct ecryptfs_message *msg; |
1975 | int rc; | 1978 | int rc; |
1976 | 1979 | ||
@@ -1979,6 +1982,8 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
1979 | crypt_stat->cipher, | 1982 | crypt_stat->cipher, |
1980 | crypt_stat->key_size), | 1983 | crypt_stat->key_size), |
1981 | crypt_stat, &payload, &payload_len); | 1984 | crypt_stat, &payload, &payload_len); |
1985 | up_write(&(auth_tok_key->sem)); | ||
1986 | key_put(auth_tok_key); | ||
1982 | if (rc) { | 1987 | if (rc) { |
1983 | ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); | 1988 | ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); |
1984 | goto out; | 1989 | goto out; |
@@ -2008,6 +2013,8 @@ out: | |||
2008 | * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet | 2013 | * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet |
2009 | * @dest: Buffer into which to write the packet | 2014 | * @dest: Buffer into which to write the packet |
2010 | * @remaining_bytes: Maximum number of bytes that can be writtn | 2015 | * @remaining_bytes: Maximum number of bytes that can be writtn |
2016 | * @auth_tok_key: The authentication token key to unlock and put when done with | ||
2017 | * @auth_tok | ||
2011 | * @auth_tok: The authentication token used for generating the tag 1 packet | 2018 | * @auth_tok: The authentication token used for generating the tag 1 packet |
2012 | * @crypt_stat: The cryptographic context | 2019 | * @crypt_stat: The cryptographic context |
2013 | * @key_rec: The key record struct for the tag 1 packet | 2020 | * @key_rec: The key record struct for the tag 1 packet |
@@ -2018,7 +2025,7 @@ out: | |||
2018 | */ | 2025 | */ |
2019 | static int | 2026 | static int |
2020 | write_tag_1_packet(char *dest, size_t *remaining_bytes, | 2027 | write_tag_1_packet(char *dest, size_t *remaining_bytes, |
2021 | struct ecryptfs_auth_tok *auth_tok, | 2028 | struct key *auth_tok_key, struct ecryptfs_auth_tok *auth_tok, |
2022 | struct ecryptfs_crypt_stat *crypt_stat, | 2029 | struct ecryptfs_crypt_stat *crypt_stat, |
2023 | struct ecryptfs_key_record *key_rec, size_t *packet_size) | 2030 | struct ecryptfs_key_record *key_rec, size_t *packet_size) |
2024 | { | 2031 | { |
@@ -2039,12 +2046,15 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, | |||
2039 | memcpy(key_rec->enc_key, | 2046 | memcpy(key_rec->enc_key, |
2040 | auth_tok->session_key.encrypted_key, | 2047 | auth_tok->session_key.encrypted_key, |
2041 | auth_tok->session_key.encrypted_key_size); | 2048 | auth_tok->session_key.encrypted_key_size); |
2049 | up_write(&(auth_tok_key->sem)); | ||
2050 | key_put(auth_tok_key); | ||
2042 | goto encrypted_session_key_set; | 2051 | goto encrypted_session_key_set; |
2043 | } | 2052 | } |
2044 | if (auth_tok->session_key.encrypted_key_size == 0) | 2053 | if (auth_tok->session_key.encrypted_key_size == 0) |
2045 | auth_tok->session_key.encrypted_key_size = | 2054 | auth_tok->session_key.encrypted_key_size = |
2046 | auth_tok->token.private_key.key_size; | 2055 | auth_tok->token.private_key.key_size; |
2047 | rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); | 2056 | rc = pki_encrypt_session_key(auth_tok_key, auth_tok, crypt_stat, |
2057 | key_rec); | ||
2048 | if (rc) { | 2058 | if (rc) { |
2049 | printk(KERN_ERR "Failed to encrypt session key via a key " | 2059 | printk(KERN_ERR "Failed to encrypt session key via a key " |
2050 | "module; rc = [%d]\n", rc); | 2060 | "module; rc = [%d]\n", rc); |
@@ -2421,6 +2431,8 @@ ecryptfs_generate_key_packet_set(char *dest_base, | |||
2421 | &max, auth_tok, | 2431 | &max, auth_tok, |
2422 | crypt_stat, key_rec, | 2432 | crypt_stat, key_rec, |
2423 | &written); | 2433 | &written); |
2434 | up_write(&(auth_tok_key->sem)); | ||
2435 | key_put(auth_tok_key); | ||
2424 | if (rc) { | 2436 | if (rc) { |
2425 | ecryptfs_printk(KERN_WARNING, "Error " | 2437 | ecryptfs_printk(KERN_WARNING, "Error " |
2426 | "writing tag 3 packet\n"); | 2438 | "writing tag 3 packet\n"); |
@@ -2438,8 +2450,8 @@ ecryptfs_generate_key_packet_set(char *dest_base, | |||
2438 | } | 2450 | } |
2439 | (*len) += written; | 2451 | (*len) += written; |
2440 | } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { | 2452 | } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { |
2441 | rc = write_tag_1_packet(dest_base + (*len), | 2453 | rc = write_tag_1_packet(dest_base + (*len), &max, |
2442 | &max, auth_tok, | 2454 | auth_tok_key, auth_tok, |
2443 | crypt_stat, key_rec, &written); | 2455 | crypt_stat, key_rec, &written); |
2444 | if (rc) { | 2456 | if (rc) { |
2445 | ecryptfs_printk(KERN_WARNING, "Error " | 2457 | ecryptfs_printk(KERN_WARNING, "Error " |
@@ -2448,14 +2460,13 @@ ecryptfs_generate_key_packet_set(char *dest_base, | |||
2448 | } | 2460 | } |
2449 | (*len) += written; | 2461 | (*len) += written; |
2450 | } else { | 2462 | } else { |
2463 | up_write(&(auth_tok_key->sem)); | ||
2464 | key_put(auth_tok_key); | ||
2451 | ecryptfs_printk(KERN_WARNING, "Unsupported " | 2465 | ecryptfs_printk(KERN_WARNING, "Unsupported " |
2452 | "authentication token type\n"); | 2466 | "authentication token type\n"); |
2453 | rc = -EINVAL; | 2467 | rc = -EINVAL; |
2454 | goto out_free; | 2468 | goto out_free; |
2455 | } | 2469 | } |
2456 | up_write(&(auth_tok_key->sem)); | ||
2457 | key_put(auth_tok_key); | ||
2458 | auth_tok_key = NULL; | ||
2459 | } | 2470 | } |
2460 | if (likely(max > 0)) { | 2471 | if (likely(max > 0)) { |
2461 | dest_base[(*len)] = 0x00; | 2472 | dest_base[(*len)] = 0x00; |
@@ -2468,11 +2479,6 @@ out_free: | |||
2468 | out: | 2479 | out: |
2469 | if (rc) | 2480 | if (rc) |
2470 | (*len) = 0; | 2481 | (*len) = 0; |
2471 | if (auth_tok_key) { | ||
2472 | up_write(&(auth_tok_key->sem)); | ||
2473 | key_put(auth_tok_key); | ||
2474 | } | ||
2475 | |||
2476 | mutex_unlock(&crypt_stat->keysig_list_mutex); | 2482 | mutex_unlock(&crypt_stat->keysig_list_mutex); |
2477 | return rc; | 2483 | return rc; |
2478 | } | 2484 | } |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9f1bb747d77d..b4a6befb1216 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | |||
175 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, | 175 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, |
176 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, | 176 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, |
177 | ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only, | 177 | ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only, |
178 | ecryptfs_opt_check_dev_ruid, | ||
178 | ecryptfs_opt_err }; | 179 | ecryptfs_opt_err }; |
179 | 180 | ||
180 | static const match_table_t tokens = { | 181 | static const match_table_t tokens = { |
@@ -191,6 +192,7 @@ static const match_table_t tokens = { | |||
191 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, | 192 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, |
192 | {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, | 193 | {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, |
193 | {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"}, | 194 | {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"}, |
195 | {ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"}, | ||
194 | {ecryptfs_opt_err, NULL} | 196 | {ecryptfs_opt_err, NULL} |
195 | }; | 197 | }; |
196 | 198 | ||
@@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat( | |||
236 | * ecryptfs_parse_options | 238 | * ecryptfs_parse_options |
237 | * @sb: The ecryptfs super block | 239 | * @sb: The ecryptfs super block |
238 | * @options: The options passed to the kernel | 240 | * @options: The options passed to the kernel |
241 | * @check_ruid: set to 1 if device uid should be checked against the ruid | ||
239 | * | 242 | * |
240 | * Parse mount options: | 243 | * Parse mount options: |
241 | * debug=N - ecryptfs_verbosity level for debug output | 244 | * debug=N - ecryptfs_verbosity level for debug output |
@@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat( | |||
251 | * | 254 | * |
252 | * Returns zero on success; non-zero on error | 255 | * Returns zero on success; non-zero on error |
253 | */ | 256 | */ |
254 | static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) | 257 | static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, |
258 | uid_t *check_ruid) | ||
255 | { | 259 | { |
256 | char *p; | 260 | char *p; |
257 | int rc = 0; | 261 | int rc = 0; |
@@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) | |||
276 | char *cipher_key_bytes_src; | 280 | char *cipher_key_bytes_src; |
277 | char *fn_cipher_key_bytes_src; | 281 | char *fn_cipher_key_bytes_src; |
278 | 282 | ||
283 | *check_ruid = 0; | ||
284 | |||
279 | if (!options) { | 285 | if (!options) { |
280 | rc = -EINVAL; | 286 | rc = -EINVAL; |
281 | goto out; | 287 | goto out; |
@@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) | |||
380 | mount_crypt_stat->flags |= | 386 | mount_crypt_stat->flags |= |
381 | ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY; | 387 | ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY; |
382 | break; | 388 | break; |
389 | case ecryptfs_opt_check_dev_ruid: | ||
390 | *check_ruid = 1; | ||
391 | break; | ||
383 | case ecryptfs_opt_err: | 392 | case ecryptfs_opt_err: |
384 | default: | 393 | default: |
385 | printk(KERN_WARNING | 394 | printk(KERN_WARNING |
@@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
475 | const char *err = "Getting sb failed"; | 484 | const char *err = "Getting sb failed"; |
476 | struct inode *inode; | 485 | struct inode *inode; |
477 | struct path path; | 486 | struct path path; |
487 | uid_t check_ruid; | ||
478 | int rc; | 488 | int rc; |
479 | 489 | ||
480 | sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); | 490 | sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); |
@@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
483 | goto out; | 493 | goto out; |
484 | } | 494 | } |
485 | 495 | ||
486 | rc = ecryptfs_parse_options(sbi, raw_data); | 496 | rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid); |
487 | if (rc) { | 497 | if (rc) { |
488 | err = "Error parsing options"; | 498 | err = "Error parsing options"; |
489 | goto out; | 499 | goto out; |
@@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
521 | "known incompatibilities\n"); | 531 | "known incompatibilities\n"); |
522 | goto out_free; | 532 | goto out_free; |
523 | } | 533 | } |
534 | |||
535 | if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) { | ||
536 | rc = -EPERM; | ||
537 | printk(KERN_ERR "Mount of device (uid: %d) not owned by " | ||
538 | "requested user (uid: %d)\n", | ||
539 | path.dentry->d_inode->i_uid, current_uid()); | ||
540 | goto out_free; | ||
541 | } | ||
542 | |||
524 | ecryptfs_set_superblock_lower(s, path.dentry->d_sb); | 543 | ecryptfs_set_superblock_lower(s, path.dentry->d_sb); |
525 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 544 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
526 | s->s_blocksize = path.dentry->d_sb->s_blocksize; | 545 | s->s_blocksize = path.dentry->d_sb->s_blocksize; |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 85d430963116..3745f7c2b9c2 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -39,15 +39,16 @@ | |||
39 | int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, | 39 | int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, |
40 | loff_t offset, size_t size) | 40 | loff_t offset, size_t size) |
41 | { | 41 | { |
42 | struct ecryptfs_inode_info *inode_info; | 42 | struct file *lower_file; |
43 | mm_segment_t fs_save; | 43 | mm_segment_t fs_save; |
44 | ssize_t rc; | 44 | ssize_t rc; |
45 | 45 | ||
46 | inode_info = ecryptfs_inode_to_private(ecryptfs_inode); | 46 | lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; |
47 | BUG_ON(!inode_info->lower_file); | 47 | if (!lower_file) |
48 | return -EIO; | ||
48 | fs_save = get_fs(); | 49 | fs_save = get_fs(); |
49 | set_fs(get_ds()); | 50 | set_fs(get_ds()); |
50 | rc = vfs_write(inode_info->lower_file, data, size, &offset); | 51 | rc = vfs_write(lower_file, data, size, &offset); |
51 | set_fs(fs_save); | 52 | set_fs(fs_save); |
52 | mark_inode_dirty_sync(ecryptfs_inode); | 53 | mark_inode_dirty_sync(ecryptfs_inode); |
53 | return rc; | 54 | return rc; |
@@ -225,15 +226,16 @@ out: | |||
225 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, | 226 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, |
226 | struct inode *ecryptfs_inode) | 227 | struct inode *ecryptfs_inode) |
227 | { | 228 | { |
228 | struct ecryptfs_inode_info *inode_info = | 229 | struct file *lower_file; |
229 | ecryptfs_inode_to_private(ecryptfs_inode); | ||
230 | mm_segment_t fs_save; | 230 | mm_segment_t fs_save; |
231 | ssize_t rc; | 231 | ssize_t rc; |
232 | 232 | ||
233 | BUG_ON(!inode_info->lower_file); | 233 | lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; |
234 | if (!lower_file) | ||
235 | return -EIO; | ||
234 | fs_save = get_fs(); | 236 | fs_save = get_fs(); |
235 | set_fs(get_ds()); | 237 | set_fs(get_ds()); |
236 | rc = vfs_read(inode_info->lower_file, data, size, &offset); | 238 | rc = vfs_read(lower_file, data, size, &offset); |
237 | set_fs(fs_save); | 239 | set_fs(fs_save); |
238 | return rc; | 240 | return rc; |
239 | } | 241 | } |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5e480d555049..9026fc91fe3b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -37,7 +37,7 @@ | |||
37 | #include <asm/system.h> | 37 | #include <asm/system.h> |
38 | #include <asm/io.h> | 38 | #include <asm/io.h> |
39 | #include <asm/mman.h> | 39 | #include <asm/mman.h> |
40 | #include <asm/atomic.h> | 40 | #include <linux/atomic.h> |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * LOCKING: | 43 | * LOCKING: |
@@ -181,14 +181,7 @@ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) | |||
181 | return; | 181 | return; |
182 | 182 | ||
183 | bprm->vma_pages = pages; | 183 | bprm->vma_pages = pages; |
184 | |||
185 | #ifdef SPLIT_RSS_COUNTING | ||
186 | add_mm_counter(mm, MM_ANONPAGES, diff); | ||
187 | #else | ||
188 | spin_lock(&mm->page_table_lock); | ||
189 | add_mm_counter(mm, MM_ANONPAGES, diff); | 184 | add_mm_counter(mm, MM_ANONPAGES, diff); |
190 | spin_unlock(&mm->page_table_lock); | ||
191 | #endif | ||
192 | } | 185 | } |
193 | 186 | ||
194 | static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | 187 | static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, |
@@ -277,7 +270,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) | |||
277 | * use STACK_TOP because that can depend on attributes which aren't | 270 | * use STACK_TOP because that can depend on attributes which aren't |
278 | * configured yet. | 271 | * configured yet. |
279 | */ | 272 | */ |
280 | BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); | 273 | BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); |
281 | vma->vm_end = STACK_TOP_MAX; | 274 | vma->vm_end = STACK_TOP_MAX; |
282 | vma->vm_start = vma->vm_end - PAGE_SIZE; | 275 | vma->vm_start = vma->vm_end - PAGE_SIZE; |
283 | vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; | 276 | vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; |
@@ -1430,9 +1423,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1430 | } | 1423 | } |
1431 | } | 1424 | } |
1432 | read_unlock(&binfmt_lock); | 1425 | read_unlock(&binfmt_lock); |
1426 | #ifdef CONFIG_MODULES | ||
1433 | if (retval != -ENOEXEC || bprm->mm == NULL) { | 1427 | if (retval != -ENOEXEC || bprm->mm == NULL) { |
1434 | break; | 1428 | break; |
1435 | #ifdef CONFIG_MODULES | ||
1436 | } else { | 1429 | } else { |
1437 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) | 1430 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) |
1438 | if (printable(bprm->buf[0]) && | 1431 | if (printable(bprm->buf[0]) && |
@@ -1440,9 +1433,13 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1440 | printable(bprm->buf[2]) && | 1433 | printable(bprm->buf[2]) && |
1441 | printable(bprm->buf[3])) | 1434 | printable(bprm->buf[3])) |
1442 | break; /* -ENOEXEC */ | 1435 | break; /* -ENOEXEC */ |
1436 | if (try) | ||
1437 | break; /* -ENOEXEC */ | ||
1443 | request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); | 1438 | request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); |
1444 | #endif | ||
1445 | } | 1439 | } |
1440 | #else | ||
1441 | break; | ||
1442 | #endif | ||
1446 | } | 1443 | } |
1447 | return retval; | 1444 | return retval; |
1448 | } | 1445 | } |
@@ -1462,6 +1459,23 @@ static int do_execve_common(const char *filename, | |||
1462 | struct files_struct *displaced; | 1459 | struct files_struct *displaced; |
1463 | bool clear_in_exec; | 1460 | bool clear_in_exec; |
1464 | int retval; | 1461 | int retval; |
1462 | const struct cred *cred = current_cred(); | ||
1463 | |||
1464 | /* | ||
1465 | * We move the actual failure in case of RLIMIT_NPROC excess from | ||
1466 | * set*uid() to execve() because too many poorly written programs | ||
1467 | * don't check setuid() return code. Here we additionally recheck | ||
1468 | * whether NPROC limit is still exceeded. | ||
1469 | */ | ||
1470 | if ((current->flags & PF_NPROC_EXCEEDED) && | ||
1471 | atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { | ||
1472 | retval = -EAGAIN; | ||
1473 | goto out_ret; | ||
1474 | } | ||
1475 | |||
1476 | /* We're below the limit (still or again), so we don't want to make | ||
1477 | * further execve() calls fail. */ | ||
1478 | current->flags &= ~PF_NPROC_EXCEEDED; | ||
1465 | 1479 | ||
1466 | retval = unshare_files(&displaced); | 1480 | retval = unshare_files(&displaced); |
1467 | if (retval) | 1481 | if (retval) |
@@ -1649,15 +1663,26 @@ expand_fail: | |||
1649 | return ret; | 1663 | return ret; |
1650 | } | 1664 | } |
1651 | 1665 | ||
1666 | static void cn_escape(char *str) | ||
1667 | { | ||
1668 | for (; *str; str++) | ||
1669 | if (*str == '/') | ||
1670 | *str = '!'; | ||
1671 | } | ||
1672 | |||
1652 | static int cn_print_exe_file(struct core_name *cn) | 1673 | static int cn_print_exe_file(struct core_name *cn) |
1653 | { | 1674 | { |
1654 | struct file *exe_file; | 1675 | struct file *exe_file; |
1655 | char *pathbuf, *path, *p; | 1676 | char *pathbuf, *path; |
1656 | int ret; | 1677 | int ret; |
1657 | 1678 | ||
1658 | exe_file = get_mm_exe_file(current->mm); | 1679 | exe_file = get_mm_exe_file(current->mm); |
1659 | if (!exe_file) | 1680 | if (!exe_file) { |
1660 | return cn_printf(cn, "(unknown)"); | 1681 | char *commstart = cn->corename + cn->used; |
1682 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
1683 | cn_escape(commstart); | ||
1684 | return ret; | ||
1685 | } | ||
1661 | 1686 | ||
1662 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | 1687 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); |
1663 | if (!pathbuf) { | 1688 | if (!pathbuf) { |
@@ -1671,9 +1696,7 @@ static int cn_print_exe_file(struct core_name *cn) | |||
1671 | goto free_buf; | 1696 | goto free_buf; |
1672 | } | 1697 | } |
1673 | 1698 | ||
1674 | for (p = path; *p; p++) | 1699 | cn_escape(path); |
1675 | if (*p == '/') | ||
1676 | *p = '!'; | ||
1677 | 1700 | ||
1678 | ret = cn_printf(cn, "%s", path); | 1701 | ret = cn_printf(cn, "%s", path); |
1679 | 1702 | ||
@@ -1745,16 +1768,22 @@ static int format_corename(struct core_name *cn, long signr) | |||
1745 | break; | 1768 | break; |
1746 | } | 1769 | } |
1747 | /* hostname */ | 1770 | /* hostname */ |
1748 | case 'h': | 1771 | case 'h': { |
1772 | char *namestart = cn->corename + cn->used; | ||
1749 | down_read(&uts_sem); | 1773 | down_read(&uts_sem); |
1750 | err = cn_printf(cn, "%s", | 1774 | err = cn_printf(cn, "%s", |
1751 | utsname()->nodename); | 1775 | utsname()->nodename); |
1752 | up_read(&uts_sem); | 1776 | up_read(&uts_sem); |
1777 | cn_escape(namestart); | ||
1753 | break; | 1778 | break; |
1779 | } | ||
1754 | /* executable */ | 1780 | /* executable */ |
1755 | case 'e': | 1781 | case 'e': { |
1782 | char *commstart = cn->corename + cn->used; | ||
1756 | err = cn_printf(cn, "%s", current->comm); | 1783 | err = cn_printf(cn, "%s", current->comm); |
1784 | cn_escape(commstart); | ||
1757 | break; | 1785 | break; |
1786 | } | ||
1758 | case 'E': | 1787 | case 'E': |
1759 | err = cn_print_exe_file(cn); | 1788 | err = cn_print_exe_file(cn); |
1760 | break; | 1789 | break; |
@@ -2118,16 +2147,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
2118 | 2147 | ||
2119 | ispipe = format_corename(&cn, signr); | 2148 | ispipe = format_corename(&cn, signr); |
2120 | 2149 | ||
2121 | if (ispipe == -ENOMEM) { | ||
2122 | printk(KERN_WARNING "format_corename failed\n"); | ||
2123 | printk(KERN_WARNING "Aborting core\n"); | ||
2124 | goto fail_corename; | ||
2125 | } | ||
2126 | |||
2127 | if (ispipe) { | 2150 | if (ispipe) { |
2128 | int dump_count; | 2151 | int dump_count; |
2129 | char **helper_argv; | 2152 | char **helper_argv; |
2130 | 2153 | ||
2154 | if (ispipe < 0) { | ||
2155 | printk(KERN_WARNING "format_corename failed\n"); | ||
2156 | printk(KERN_WARNING "Aborting core\n"); | ||
2157 | goto fail_corename; | ||
2158 | } | ||
2159 | |||
2131 | if (cprm.limit == 1) { | 2160 | if (cprm.limit == 1) { |
2132 | /* | 2161 | /* |
2133 | * Normally core limits are irrelevant to pipes, since | 2162 | * Normally core limits are irrelevant to pipes, since |
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index 2d0f757fda3e..c5a5855a6c44 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild | |||
@@ -12,5 +12,8 @@ | |||
12 | # Kbuild - Gets included from the Kernels Makefile and build system | 12 | # Kbuild - Gets included from the Kernels Makefile and build system |
13 | # | 13 | # |
14 | 14 | ||
15 | exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o | 15 | # ore module library |
16 | obj-$(CONFIG_ORE) += ore.o | ||
17 | |||
18 | exofs-y := inode.o file.o symlink.o namei.o dir.o super.o | ||
16 | obj-$(CONFIG_EXOFS_FS) += exofs.o | 19 | obj-$(CONFIG_EXOFS_FS) += exofs.o |
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index 86194b2f799d..70bae4149291 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig | |||
@@ -1,6 +1,10 @@ | |||
1 | config ORE | ||
2 | tristate | ||
3 | |||
1 | config EXOFS_FS | 4 | config EXOFS_FS |
2 | tristate "exofs: OSD based file system support" | 5 | tristate "exofs: OSD based file system support" |
3 | depends on SCSI_OSD_ULD | 6 | depends on SCSI_OSD_ULD |
7 | select ORE | ||
4 | help | 8 | help |
5 | EXOFS is a file system that uses an OSD storage device, | 9 | EXOFS is a file system that uses an OSD storage device, |
6 | as its backing storage. | 10 | as its backing storage. |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index c965806c2821..f4e442ec7445 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -36,12 +36,9 @@ | |||
36 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
37 | #include <linux/time.h> | 37 | #include <linux/time.h> |
38 | #include <linux/backing-dev.h> | 38 | #include <linux/backing-dev.h> |
39 | #include "common.h" | 39 | #include <scsi/osd_ore.h> |
40 | 40 | ||
41 | /* FIXME: Remove once pnfs hits mainline | 41 | #include "common.h" |
42 | * #include <linux/exportfs/pnfs_osd_xdr.h> | ||
43 | */ | ||
44 | #include "pnfs.h" | ||
45 | 42 | ||
46 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) | 43 | #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) |
47 | 44 | ||
@@ -56,27 +53,11 @@ | |||
56 | /* u64 has problems with printk this will cast it to unsigned long long */ | 53 | /* u64 has problems with printk this will cast it to unsigned long long */ |
57 | #define _LLU(x) (unsigned long long)(x) | 54 | #define _LLU(x) (unsigned long long)(x) |
58 | 55 | ||
59 | struct exofs_layout { | ||
60 | osd_id s_pid; /* partition ID of file system*/ | ||
61 | |||
62 | /* Our way of looking at the data_map */ | ||
63 | unsigned stripe_unit; | ||
64 | unsigned mirrors_p1; | ||
65 | |||
66 | unsigned group_width; | ||
67 | u64 group_depth; | ||
68 | unsigned group_count; | ||
69 | |||
70 | enum exofs_inode_layout_gen_functions lay_func; | ||
71 | |||
72 | unsigned s_numdevs; /* Num of devices in array */ | ||
73 | struct osd_dev *s_ods[0]; /* Variable length */ | ||
74 | }; | ||
75 | |||
76 | /* | 56 | /* |
77 | * our extension to the in-memory superblock | 57 | * our extension to the in-memory superblock |
78 | */ | 58 | */ |
79 | struct exofs_sb_info { | 59 | struct exofs_sb_info { |
60 | struct backing_dev_info bdi; /* register our bdi with VFS */ | ||
80 | struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ | 61 | struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ |
81 | int s_timeout; /* timeout for OSD operations */ | 62 | int s_timeout; /* timeout for OSD operations */ |
82 | uint64_t s_nextid; /* highest object ID used */ | 63 | uint64_t s_nextid; /* highest object ID used */ |
@@ -84,16 +65,13 @@ struct exofs_sb_info { | |||
84 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ | 65 | spinlock_t s_next_gen_lock; /* spinlock for gen # update */ |
85 | u32 s_next_generation; /* next gen # to use */ | 66 | u32 s_next_generation; /* next gen # to use */ |
86 | atomic_t s_curr_pending; /* number of pending commands */ | 67 | atomic_t s_curr_pending; /* number of pending commands */ |
87 | uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ | ||
88 | struct backing_dev_info bdi; /* register our bdi with VFS */ | ||
89 | 68 | ||
90 | struct pnfs_osd_data_map data_map; /* Default raid to use | 69 | struct pnfs_osd_data_map data_map; /* Default raid to use |
91 | * FIXME: Needed ? | 70 | * FIXME: Needed ? |
92 | */ | 71 | */ |
93 | /* struct exofs_layout dir_layout;*/ /* Default dir layout */ | 72 | struct ore_layout layout; /* Default files layout */ |
94 | struct exofs_layout layout; /* Default files layout, | 73 | struct ore_comp one_comp; /* id & cred of partition id=0*/ |
95 | * contains the variable osd_dev | 74 | struct ore_components comps; /* comps for the partition */ |
96 | * array. Keep last */ | ||
97 | struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ | 75 | struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ |
98 | }; | 76 | }; |
99 | 77 | ||
@@ -107,7 +85,8 @@ struct exofs_i_info { | |||
107 | uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ | 85 | uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ |
108 | uint32_t i_dir_start_lookup; /* which page to start lookup */ | 86 | uint32_t i_dir_start_lookup; /* which page to start lookup */ |
109 | uint64_t i_commit_size; /* the object's written length */ | 87 | uint64_t i_commit_size; /* the object's written length */ |
110 | uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ | 88 | struct ore_comp one_comp; /* same component for all devices */ |
89 | struct ore_components comps; /* inode view of the device table */ | ||
111 | }; | 90 | }; |
112 | 91 | ||
113 | static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) | 92 | static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) |
@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) | |||
115 | return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; | 94 | return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; |
116 | } | 95 | } |
117 | 96 | ||
118 | struct exofs_io_state; | ||
119 | typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); | ||
120 | |||
121 | struct exofs_io_state { | ||
122 | struct kref kref; | ||
123 | |||
124 | void *private; | ||
125 | exofs_io_done_fn done; | ||
126 | |||
127 | struct exofs_layout *layout; | ||
128 | struct osd_obj_id obj; | ||
129 | u8 *cred; | ||
130 | |||
131 | /* Global read/write IO*/ | ||
132 | loff_t offset; | ||
133 | unsigned long length; | ||
134 | void *kern_buff; | ||
135 | |||
136 | struct page **pages; | ||
137 | unsigned nr_pages; | ||
138 | unsigned pgbase; | ||
139 | unsigned pages_consumed; | ||
140 | |||
141 | /* Attributes */ | ||
142 | unsigned in_attr_len; | ||
143 | struct osd_attr *in_attr; | ||
144 | unsigned out_attr_len; | ||
145 | struct osd_attr *out_attr; | ||
146 | |||
147 | /* Variable array of size numdevs */ | ||
148 | unsigned numdevs; | ||
149 | struct exofs_per_dev_state { | ||
150 | struct osd_request *or; | ||
151 | struct bio *bio; | ||
152 | loff_t offset; | ||
153 | unsigned length; | ||
154 | unsigned dev; | ||
155 | } per_dev[]; | ||
156 | }; | ||
157 | |||
158 | static inline unsigned exofs_io_state_size(unsigned numdevs) | ||
159 | { | ||
160 | return sizeof(struct exofs_io_state) + | ||
161 | sizeof(struct exofs_per_dev_state) * numdevs; | ||
162 | } | ||
163 | |||
164 | /* | 97 | /* |
165 | * our inode flags | 98 | * our inode flags |
166 | */ | 99 | */ |
@@ -205,12 +138,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) | |||
205 | } | 138 | } |
206 | 139 | ||
207 | /* | 140 | /* |
208 | * Given a layout, object_number and stripe_index return the associated global | ||
209 | * dev_index | ||
210 | */ | ||
211 | unsigned exofs_layout_od_id(struct exofs_layout *layout, | ||
212 | osd_id obj_no, unsigned layout_index); | ||
213 | /* | ||
214 | * Maximum count of links to a file | 141 | * Maximum count of links to a file |
215 | */ | 142 | */ |
216 | #define EXOFS_LINK_MAX 32000 | 143 | #define EXOFS_LINK_MAX 32000 |
@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout, | |||
219 | * function declarations * | 146 | * function declarations * |
220 | *************************/ | 147 | *************************/ |
221 | 148 | ||
222 | /* ios.c */ | ||
223 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
224 | const struct osd_obj_id *obj); | ||
225 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | ||
226 | u64 offset, void *p, unsigned length); | ||
227 | |||
228 | int exofs_get_io_state(struct exofs_layout *layout, | ||
229 | struct exofs_io_state **ios); | ||
230 | void exofs_put_io_state(struct exofs_io_state *ios); | ||
231 | |||
232 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid); | ||
233 | |||
234 | int exofs_sbi_create(struct exofs_io_state *ios); | ||
235 | int exofs_sbi_remove(struct exofs_io_state *ios); | ||
236 | int exofs_sbi_write(struct exofs_io_state *ios); | ||
237 | int exofs_sbi_read(struct exofs_io_state *ios); | ||
238 | |||
239 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); | ||
240 | |||
241 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); | ||
242 | static inline int exofs_oi_write(struct exofs_i_info *oi, | ||
243 | struct exofs_io_state *ios) | ||
244 | { | ||
245 | ios->obj.id = exofs_oi_objno(oi); | ||
246 | ios->cred = oi->i_cred; | ||
247 | return exofs_sbi_write(ios); | ||
248 | } | ||
249 | |||
250 | static inline int exofs_oi_read(struct exofs_i_info *oi, | ||
251 | struct exofs_io_state *ios) | ||
252 | { | ||
253 | ios->obj.id = exofs_oi_objno(oi); | ||
254 | ios->cred = oi->i_cred; | ||
255 | return exofs_sbi_read(ios); | ||
256 | } | ||
257 | |||
258 | /* inode.c */ | 149 | /* inode.c */ |
259 | unsigned exofs_max_io_pages(struct exofs_layout *layout, | 150 | unsigned exofs_max_io_pages(struct ore_layout *layout, |
260 | unsigned expected_pages); | 151 | unsigned expected_pages); |
261 | int exofs_setattr(struct dentry *, struct iattr *); | 152 | int exofs_setattr(struct dentry *, struct iattr *); |
262 | int exofs_write_begin(struct file *file, struct address_space *mapping, | 153 | int exofs_write_begin(struct file *file, struct address_space *mapping, |
@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, | |||
281 | struct inode *); | 172 | struct inode *); |
282 | 173 | ||
283 | /* super.c */ | 174 | /* super.c */ |
175 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], | ||
176 | const struct osd_obj_id *obj); | ||
284 | int exofs_sbi_write_stats(struct exofs_sb_info *sbi); | 177 | int exofs_sbi_write_stats(struct exofs_sb_info *sbi); |
285 | 178 | ||
286 | /********************* | 179 | /********************* |
@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations; | |||
295 | 188 | ||
296 | /* inode.c */ | 189 | /* inode.c */ |
297 | extern const struct address_space_operations exofs_aops; | 190 | extern const struct address_space_operations exofs_aops; |
298 | extern const struct osd_attr g_attr_logical_length; | ||
299 | 191 | ||
300 | /* namei.c */ | 192 | /* namei.c */ |
301 | extern const struct inode_operations exofs_dir_inode_operations; | 193 | extern const struct inode_operations exofs_dir_inode_operations; |
@@ -305,4 +197,33 @@ extern const struct inode_operations exofs_special_inode_operations; | |||
305 | extern const struct inode_operations exofs_symlink_inode_operations; | 197 | extern const struct inode_operations exofs_symlink_inode_operations; |
306 | extern const struct inode_operations exofs_fast_symlink_inode_operations; | 198 | extern const struct inode_operations exofs_fast_symlink_inode_operations; |
307 | 199 | ||
200 | /* exofs_init_comps will initialize an ore_components device array | ||
201 | * pointing to a single ore_comp struct, and a round-robin view | ||
202 | * of the device table. | ||
203 | * The first device of each inode is the [inode->ino % num_devices] | ||
204 | * and the rest of the devices sequentially following where the | ||
205 | * first device is after the last device. | ||
206 | * It is assumed that the global device array at @sbi is twice | ||
207 | * bigger and that the device table repeats twice. | ||
208 | * See: exofs_read_lookup_dev_table() | ||
209 | */ | ||
210 | static inline void exofs_init_comps(struct ore_components *comps, | ||
211 | struct ore_comp *one_comp, | ||
212 | struct exofs_sb_info *sbi, osd_id oid) | ||
213 | { | ||
214 | unsigned dev_mod = (unsigned)oid, first_dev; | ||
215 | |||
216 | one_comp->obj.partition = sbi->one_comp.obj.partition; | ||
217 | one_comp->obj.id = oid; | ||
218 | exofs_make_credential(one_comp->cred, &one_comp->obj); | ||
219 | |||
220 | comps->numdevs = sbi->comps.numdevs; | ||
221 | comps->single_comp = EC_SINGLE_COMP; | ||
222 | comps->comps = one_comp; | ||
223 | |||
224 | /* Round robin device view of the table */ | ||
225 | first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs; | ||
226 | comps->ods = sbi->comps.ods + first_dev; | ||
227 | } | ||
228 | |||
308 | #endif | 229 | #endif |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 8472c098445d..f39a38fc2349 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC = | |||
43 | PAGE_SIZE / sizeof(struct page *), | 43 | PAGE_SIZE / sizeof(struct page *), |
44 | }; | 44 | }; |
45 | 45 | ||
46 | unsigned exofs_max_io_pages(struct exofs_layout *layout, | 46 | unsigned exofs_max_io_pages(struct ore_layout *layout, |
47 | unsigned expected_pages) | 47 | unsigned expected_pages) |
48 | { | 48 | { |
49 | unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); | 49 | unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); |
@@ -58,7 +58,7 @@ struct page_collect { | |||
58 | struct exofs_sb_info *sbi; | 58 | struct exofs_sb_info *sbi; |
59 | struct inode *inode; | 59 | struct inode *inode; |
60 | unsigned expected_pages; | 60 | unsigned expected_pages; |
61 | struct exofs_io_state *ios; | 61 | struct ore_io_state *ios; |
62 | 62 | ||
63 | struct page **pages; | 63 | struct page **pages; |
64 | unsigned alloc_pages; | 64 | unsigned alloc_pages; |
@@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol) | |||
110 | { | 110 | { |
111 | unsigned pages; | 111 | unsigned pages; |
112 | 112 | ||
113 | if (!pcol->ios) { /* First time allocate io_state */ | ||
114 | int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); | ||
115 | |||
116 | if (ret) | ||
117 | return ret; | ||
118 | } | ||
119 | |||
120 | /* TODO: easily support bio chaining */ | 113 | /* TODO: easily support bio chaining */ |
121 | pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); | 114 | pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); |
122 | 115 | ||
@@ -140,7 +133,7 @@ static void pcol_free(struct page_collect *pcol) | |||
140 | pcol->pages = NULL; | 133 | pcol->pages = NULL; |
141 | 134 | ||
142 | if (pcol->ios) { | 135 | if (pcol->ios) { |
143 | exofs_put_io_state(pcol->ios); | 136 | ore_put_io_state(pcol->ios); |
144 | pcol->ios = NULL; | 137 | pcol->ios = NULL; |
145 | } | 138 | } |
146 | } | 139 | } |
@@ -200,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol) | |||
200 | u64 resid; | 193 | u64 resid; |
201 | u64 good_bytes; | 194 | u64 good_bytes; |
202 | u64 length = 0; | 195 | u64 length = 0; |
203 | int ret = exofs_check_io(pcol->ios, &resid); | 196 | int ret = ore_check_io(pcol->ios, &resid); |
204 | 197 | ||
205 | if (likely(!ret)) | 198 | if (likely(!ret)) |
206 | good_bytes = pcol->length; | 199 | good_bytes = pcol->length; |
@@ -241,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol) | |||
241 | } | 234 | } |
242 | 235 | ||
243 | /* callback of async reads */ | 236 | /* callback of async reads */ |
244 | static void readpages_done(struct exofs_io_state *ios, void *p) | 237 | static void readpages_done(struct ore_io_state *ios, void *p) |
245 | { | 238 | { |
246 | struct page_collect *pcol = p; | 239 | struct page_collect *pcol = p; |
247 | 240 | ||
@@ -269,20 +262,28 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | |||
269 | static int read_exec(struct page_collect *pcol) | 262 | static int read_exec(struct page_collect *pcol) |
270 | { | 263 | { |
271 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 264 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
272 | struct exofs_io_state *ios = pcol->ios; | 265 | struct ore_io_state *ios; |
273 | struct page_collect *pcol_copy = NULL; | 266 | struct page_collect *pcol_copy = NULL; |
274 | int ret; | 267 | int ret; |
275 | 268 | ||
276 | if (!pcol->pages) | 269 | if (!pcol->pages) |
277 | return 0; | 270 | return 0; |
278 | 271 | ||
272 | if (!pcol->ios) { | ||
273 | int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true, | ||
274 | pcol->pg_first << PAGE_CACHE_SHIFT, | ||
275 | pcol->length, &pcol->ios); | ||
276 | |||
277 | if (ret) | ||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | ios = pcol->ios; | ||
279 | ios->pages = pcol->pages; | 282 | ios->pages = pcol->pages; |
280 | ios->nr_pages = pcol->nr_pages; | 283 | ios->nr_pages = pcol->nr_pages; |
281 | ios->length = pcol->length; | ||
282 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
283 | 284 | ||
284 | if (pcol->read_4_write) { | 285 | if (pcol->read_4_write) { |
285 | exofs_oi_read(oi, pcol->ios); | 286 | ore_read(pcol->ios); |
286 | return __readpages_done(pcol); | 287 | return __readpages_done(pcol); |
287 | } | 288 | } |
288 | 289 | ||
@@ -295,14 +296,14 @@ static int read_exec(struct page_collect *pcol) | |||
295 | *pcol_copy = *pcol; | 296 | *pcol_copy = *pcol; |
296 | ios->done = readpages_done; | 297 | ios->done = readpages_done; |
297 | ios->private = pcol_copy; | 298 | ios->private = pcol_copy; |
298 | ret = exofs_oi_read(oi, ios); | 299 | ret = ore_read(ios); |
299 | if (unlikely(ret)) | 300 | if (unlikely(ret)) |
300 | goto err; | 301 | goto err; |
301 | 302 | ||
302 | atomic_inc(&pcol->sbi->s_curr_pending); | 303 | atomic_inc(&pcol->sbi->s_curr_pending); |
303 | 304 | ||
304 | EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | 305 | EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", |
305 | ios->obj.id, _LLU(ios->offset), pcol->length); | 306 | oi->one_comp.obj.id, _LLU(ios->offset), pcol->length); |
306 | 307 | ||
307 | /* pages ownership was passed to pcol_copy */ | 308 | /* pages ownership was passed to pcol_copy */ |
308 | _pcol_reset(pcol); | 309 | _pcol_reset(pcol); |
@@ -457,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page) | |||
457 | } | 458 | } |
458 | 459 | ||
459 | /* Callback for osd_write. All writes are asynchronous */ | 460 | /* Callback for osd_write. All writes are asynchronous */ |
460 | static void writepages_done(struct exofs_io_state *ios, void *p) | 461 | static void writepages_done(struct ore_io_state *ios, void *p) |
461 | { | 462 | { |
462 | struct page_collect *pcol = p; | 463 | struct page_collect *pcol = p; |
463 | int i; | 464 | int i; |
464 | u64 resid; | 465 | u64 resid; |
465 | u64 good_bytes; | 466 | u64 good_bytes; |
466 | u64 length = 0; | 467 | u64 length = 0; |
467 | int ret = exofs_check_io(ios, &resid); | 468 | int ret = ore_check_io(ios, &resid); |
468 | 469 | ||
469 | atomic_dec(&pcol->sbi->s_curr_pending); | 470 | atomic_dec(&pcol->sbi->s_curr_pending); |
470 | 471 | ||
@@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p) | |||
507 | static int write_exec(struct page_collect *pcol) | 508 | static int write_exec(struct page_collect *pcol) |
508 | { | 509 | { |
509 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 510 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
510 | struct exofs_io_state *ios = pcol->ios; | 511 | struct ore_io_state *ios; |
511 | struct page_collect *pcol_copy = NULL; | 512 | struct page_collect *pcol_copy = NULL; |
512 | int ret; | 513 | int ret; |
513 | 514 | ||
514 | if (!pcol->pages) | 515 | if (!pcol->pages) |
515 | return 0; | 516 | return 0; |
516 | 517 | ||
518 | BUG_ON(pcol->ios); | ||
519 | ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false, | ||
520 | pcol->pg_first << PAGE_CACHE_SHIFT, | ||
521 | pcol->length, &pcol->ios); | ||
522 | |||
523 | if (unlikely(ret)) | ||
524 | goto err; | ||
525 | |||
517 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 526 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
518 | if (!pcol_copy) { | 527 | if (!pcol_copy) { |
519 | EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); | 528 | EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); |
@@ -523,16 +532,15 @@ static int write_exec(struct page_collect *pcol) | |||
523 | 532 | ||
524 | *pcol_copy = *pcol; | 533 | *pcol_copy = *pcol; |
525 | 534 | ||
535 | ios = pcol->ios; | ||
526 | ios->pages = pcol_copy->pages; | 536 | ios->pages = pcol_copy->pages; |
527 | ios->nr_pages = pcol_copy->nr_pages; | 537 | ios->nr_pages = pcol_copy->nr_pages; |
528 | ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; | ||
529 | ios->length = pcol_copy->length; | ||
530 | ios->done = writepages_done; | 538 | ios->done = writepages_done; |
531 | ios->private = pcol_copy; | 539 | ios->private = pcol_copy; |
532 | 540 | ||
533 | ret = exofs_oi_write(oi, ios); | 541 | ret = ore_write(ios); |
534 | if (unlikely(ret)) { | 542 | if (unlikely(ret)) { |
535 | EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); | 543 | EXOFS_ERR("write_exec: ore_write() Failed\n"); |
536 | goto err; | 544 | goto err; |
537 | } | 545 | } |
538 | 546 | ||
@@ -844,17 +852,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode) | |||
844 | return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); | 852 | return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); |
845 | } | 853 | } |
846 | 854 | ||
847 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | ||
848 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | ||
849 | |||
850 | static int _do_truncate(struct inode *inode, loff_t newsize) | 855 | static int _do_truncate(struct inode *inode, loff_t newsize) |
851 | { | 856 | { |
852 | struct exofs_i_info *oi = exofs_i(inode); | 857 | struct exofs_i_info *oi = exofs_i(inode); |
858 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
853 | int ret; | 859 | int ret; |
854 | 860 | ||
855 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 861 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
856 | 862 | ||
857 | ret = exofs_oi_truncate(oi, (u64)newsize); | 863 | ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize); |
858 | if (likely(!ret)) | 864 | if (likely(!ret)) |
859 | truncate_setsize(inode, newsize); | 865 | truncate_setsize(inode, newsize); |
860 | 866 | ||
@@ -917,30 +923,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | |||
917 | [1] = g_attr_inode_file_layout, | 923 | [1] = g_attr_inode_file_layout, |
918 | [2] = g_attr_inode_dir_layout, | 924 | [2] = g_attr_inode_dir_layout, |
919 | }; | 925 | }; |
920 | struct exofs_io_state *ios; | 926 | struct ore_io_state *ios; |
921 | struct exofs_on_disk_inode_layout *layout; | 927 | struct exofs_on_disk_inode_layout *layout; |
922 | int ret; | 928 | int ret; |
923 | 929 | ||
924 | ret = exofs_get_io_state(&sbi->layout, &ios); | 930 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); |
925 | if (unlikely(ret)) { | 931 | if (unlikely(ret)) { |
926 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | 932 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
927 | return ret; | 933 | return ret; |
928 | } | 934 | } |
929 | 935 | ||
930 | ios->obj.id = exofs_oi_objno(oi); | 936 | attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); |
931 | exofs_make_credential(oi->i_cred, &ios->obj); | 937 | attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); |
932 | ios->cred = oi->i_cred; | ||
933 | |||
934 | attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); | ||
935 | attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); | ||
936 | 938 | ||
937 | ios->in_attr = attrs; | 939 | ios->in_attr = attrs; |
938 | ios->in_attr_len = ARRAY_SIZE(attrs); | 940 | ios->in_attr_len = ARRAY_SIZE(attrs); |
939 | 941 | ||
940 | ret = exofs_sbi_read(ios); | 942 | ret = ore_read(ios); |
941 | if (unlikely(ret)) { | 943 | if (unlikely(ret)) { |
942 | EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", | 944 | EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", |
943 | _LLU(ios->obj.id), ret); | 945 | _LLU(oi->one_comp.obj.id), ret); |
944 | memset(inode, 0, sizeof(*inode)); | 946 | memset(inode, 0, sizeof(*inode)); |
945 | inode->i_mode = 0040000 | (0777 & ~022); | 947 | inode->i_mode = 0040000 | (0777 & ~022); |
946 | /* If object is lost on target we might as well enable it's | 948 | /* If object is lost on target we might as well enable it's |
@@ -990,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | |||
990 | } | 992 | } |
991 | 993 | ||
992 | out: | 994 | out: |
993 | exofs_put_io_state(ios); | 995 | ore_put_io_state(ios); |
994 | return ret; | 996 | return ret; |
995 | } | 997 | } |
996 | 998 | ||
@@ -1016,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
1016 | return inode; | 1018 | return inode; |
1017 | oi = exofs_i(inode); | 1019 | oi = exofs_i(inode); |
1018 | __oi_init(oi); | 1020 | __oi_init(oi); |
1021 | exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, | ||
1022 | exofs_oi_objno(oi)); | ||
1019 | 1023 | ||
1020 | /* read the inode from the osd */ | 1024 | /* read the inode from the osd */ |
1021 | ret = exofs_get_inode(sb, oi, &fcb); | 1025 | ret = exofs_get_inode(sb, oi, &fcb); |
@@ -1107,21 +1111,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) | |||
1107 | * set the obj_created flag so that other methods know that the object exists on | 1111 | * set the obj_created flag so that other methods know that the object exists on |
1108 | * the OSD. | 1112 | * the OSD. |
1109 | */ | 1113 | */ |
1110 | static void create_done(struct exofs_io_state *ios, void *p) | 1114 | static void create_done(struct ore_io_state *ios, void *p) |
1111 | { | 1115 | { |
1112 | struct inode *inode = p; | 1116 | struct inode *inode = p; |
1113 | struct exofs_i_info *oi = exofs_i(inode); | 1117 | struct exofs_i_info *oi = exofs_i(inode); |
1114 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 1118 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
1115 | int ret; | 1119 | int ret; |
1116 | 1120 | ||
1117 | ret = exofs_check_io(ios, NULL); | 1121 | ret = ore_check_io(ios, NULL); |
1118 | exofs_put_io_state(ios); | 1122 | ore_put_io_state(ios); |
1119 | 1123 | ||
1120 | atomic_dec(&sbi->s_curr_pending); | 1124 | atomic_dec(&sbi->s_curr_pending); |
1121 | 1125 | ||
1122 | if (unlikely(ret)) { | 1126 | if (unlikely(ret)) { |
1123 | EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", | 1127 | EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", |
1124 | _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); | 1128 | _LLU(exofs_oi_objno(oi)), |
1129 | _LLU(oi->one_comp.obj.partition)); | ||
1125 | /*TODO: When FS is corrupted creation can fail, object already | 1130 | /*TODO: When FS is corrupted creation can fail, object already |
1126 | * exist. Get rid of this asynchronous creation, if exist | 1131 | * exist. Get rid of this asynchronous creation, if exist |
1127 | * increment the obj counter and try the next object. Until we | 1132 | * increment the obj counter and try the next object. Until we |
@@ -1140,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p) | |||
1140 | */ | 1145 | */ |
1141 | struct inode *exofs_new_inode(struct inode *dir, int mode) | 1146 | struct inode *exofs_new_inode(struct inode *dir, int mode) |
1142 | { | 1147 | { |
1143 | struct super_block *sb; | 1148 | struct super_block *sb = dir->i_sb; |
1149 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
1144 | struct inode *inode; | 1150 | struct inode *inode; |
1145 | struct exofs_i_info *oi; | 1151 | struct exofs_i_info *oi; |
1146 | struct exofs_sb_info *sbi; | 1152 | struct ore_io_state *ios; |
1147 | struct exofs_io_state *ios; | ||
1148 | int ret; | 1153 | int ret; |
1149 | 1154 | ||
1150 | sb = dir->i_sb; | ||
1151 | inode = new_inode(sb); | 1155 | inode = new_inode(sb); |
1152 | if (!inode) | 1156 | if (!inode) |
1153 | return ERR_PTR(-ENOMEM); | 1157 | return ERR_PTR(-ENOMEM); |
@@ -1157,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1157 | 1161 | ||
1158 | set_obj_2bcreated(oi); | 1162 | set_obj_2bcreated(oi); |
1159 | 1163 | ||
1160 | sbi = sb->s_fs_info; | ||
1161 | |||
1162 | inode->i_mapping->backing_dev_info = sb->s_bdi; | 1164 | inode->i_mapping->backing_dev_info = sb->s_bdi; |
1163 | inode_init_owner(inode, dir, mode); | 1165 | inode_init_owner(inode, dir, mode); |
1164 | inode->i_ino = sbi->s_nextid++; | 1166 | inode->i_ino = sbi->s_nextid++; |
@@ -1170,25 +1172,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1170 | spin_unlock(&sbi->s_next_gen_lock); | 1172 | spin_unlock(&sbi->s_next_gen_lock); |
1171 | insert_inode_hash(inode); | 1173 | insert_inode_hash(inode); |
1172 | 1174 | ||
1175 | exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, | ||
1176 | exofs_oi_objno(oi)); | ||
1173 | exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ | 1177 | exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ |
1174 | 1178 | ||
1175 | mark_inode_dirty(inode); | 1179 | mark_inode_dirty(inode); |
1176 | 1180 | ||
1177 | ret = exofs_get_io_state(&sbi->layout, &ios); | 1181 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); |
1178 | if (unlikely(ret)) { | 1182 | if (unlikely(ret)) { |
1179 | EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); | 1183 | EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); |
1180 | return ERR_PTR(ret); | 1184 | return ERR_PTR(ret); |
1181 | } | 1185 | } |
1182 | 1186 | ||
1183 | ios->obj.id = exofs_oi_objno(oi); | ||
1184 | exofs_make_credential(oi->i_cred, &ios->obj); | ||
1185 | |||
1186 | ios->done = create_done; | 1187 | ios->done = create_done; |
1187 | ios->private = inode; | 1188 | ios->private = inode; |
1188 | ios->cred = oi->i_cred; | 1189 | |
1189 | ret = exofs_sbi_create(ios); | 1190 | ret = ore_create(ios); |
1190 | if (ret) { | 1191 | if (ret) { |
1191 | exofs_put_io_state(ios); | 1192 | ore_put_io_state(ios); |
1192 | return ERR_PTR(ret); | 1193 | return ERR_PTR(ret); |
1193 | } | 1194 | } |
1194 | atomic_inc(&sbi->s_curr_pending); | 1195 | atomic_inc(&sbi->s_curr_pending); |
@@ -1207,11 +1208,11 @@ struct updatei_args { | |||
1207 | /* | 1208 | /* |
1208 | * Callback function from exofs_update_inode(). | 1209 | * Callback function from exofs_update_inode(). |
1209 | */ | 1210 | */ |
1210 | static void updatei_done(struct exofs_io_state *ios, void *p) | 1211 | static void updatei_done(struct ore_io_state *ios, void *p) |
1211 | { | 1212 | { |
1212 | struct updatei_args *args = p; | 1213 | struct updatei_args *args = p; |
1213 | 1214 | ||
1214 | exofs_put_io_state(ios); | 1215 | ore_put_io_state(ios); |
1215 | 1216 | ||
1216 | atomic_dec(&args->sbi->s_curr_pending); | 1217 | atomic_dec(&args->sbi->s_curr_pending); |
1217 | 1218 | ||
@@ -1227,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1227 | struct exofs_i_info *oi = exofs_i(inode); | 1228 | struct exofs_i_info *oi = exofs_i(inode); |
1228 | struct super_block *sb = inode->i_sb; | 1229 | struct super_block *sb = inode->i_sb; |
1229 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1230 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1230 | struct exofs_io_state *ios; | 1231 | struct ore_io_state *ios; |
1231 | struct osd_attr attr; | 1232 | struct osd_attr attr; |
1232 | struct exofs_fcb *fcb; | 1233 | struct exofs_fcb *fcb; |
1233 | struct updatei_args *args; | 1234 | struct updatei_args *args; |
@@ -1266,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1266 | } else | 1267 | } else |
1267 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | 1268 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); |
1268 | 1269 | ||
1269 | ret = exofs_get_io_state(&sbi->layout, &ios); | 1270 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); |
1270 | if (unlikely(ret)) { | 1271 | if (unlikely(ret)) { |
1271 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | 1272 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
1272 | goto free_args; | 1273 | goto free_args; |
1273 | } | 1274 | } |
1274 | 1275 | ||
@@ -1285,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1285 | ios->private = args; | 1286 | ios->private = args; |
1286 | } | 1287 | } |
1287 | 1288 | ||
1288 | ret = exofs_oi_write(oi, ios); | 1289 | ret = ore_write(ios); |
1289 | if (!do_sync && !ret) { | 1290 | if (!do_sync && !ret) { |
1290 | atomic_inc(&sbi->s_curr_pending); | 1291 | atomic_inc(&sbi->s_curr_pending); |
1291 | goto out; /* deallocation in updatei_done */ | 1292 | goto out; /* deallocation in updatei_done */ |
1292 | } | 1293 | } |
1293 | 1294 | ||
1294 | exofs_put_io_state(ios); | 1295 | ore_put_io_state(ios); |
1295 | free_args: | 1296 | free_args: |
1296 | kfree(args); | 1297 | kfree(args); |
1297 | out: | 1298 | out: |
@@ -1310,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1310 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | 1311 | * Callback function from exofs_delete_inode() - don't have much cleaning up to |
1311 | * do. | 1312 | * do. |
1312 | */ | 1313 | */ |
1313 | static void delete_done(struct exofs_io_state *ios, void *p) | 1314 | static void delete_done(struct ore_io_state *ios, void *p) |
1314 | { | 1315 | { |
1315 | struct exofs_sb_info *sbi = p; | 1316 | struct exofs_sb_info *sbi = p; |
1316 | 1317 | ||
1317 | exofs_put_io_state(ios); | 1318 | ore_put_io_state(ios); |
1318 | 1319 | ||
1319 | atomic_dec(&sbi->s_curr_pending); | 1320 | atomic_dec(&sbi->s_curr_pending); |
1320 | } | 1321 | } |
@@ -1329,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode) | |||
1329 | struct exofs_i_info *oi = exofs_i(inode); | 1330 | struct exofs_i_info *oi = exofs_i(inode); |
1330 | struct super_block *sb = inode->i_sb; | 1331 | struct super_block *sb = inode->i_sb; |
1331 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1332 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1332 | struct exofs_io_state *ios; | 1333 | struct ore_io_state *ios; |
1333 | int ret; | 1334 | int ret; |
1334 | 1335 | ||
1335 | truncate_inode_pages(&inode->i_data, 0); | 1336 | truncate_inode_pages(&inode->i_data, 0); |
@@ -1349,20 +1350,19 @@ void exofs_evict_inode(struct inode *inode) | |||
1349 | /* ignore the error, attempt a remove anyway */ | 1350 | /* ignore the error, attempt a remove anyway */ |
1350 | 1351 | ||
1351 | /* Now Remove the OSD objects */ | 1352 | /* Now Remove the OSD objects */ |
1352 | ret = exofs_get_io_state(&sbi->layout, &ios); | 1353 | ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); |
1353 | if (unlikely(ret)) { | 1354 | if (unlikely(ret)) { |
1354 | EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); | 1355 | EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); |
1355 | return; | 1356 | return; |
1356 | } | 1357 | } |
1357 | 1358 | ||
1358 | ios->obj.id = exofs_oi_objno(oi); | ||
1359 | ios->done = delete_done; | 1359 | ios->done = delete_done; |
1360 | ios->private = sbi; | 1360 | ios->private = sbi; |
1361 | ios->cred = oi->i_cred; | 1361 | |
1362 | ret = exofs_sbi_remove(ios); | 1362 | ret = ore_remove(ios); |
1363 | if (ret) { | 1363 | if (ret) { |
1364 | EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); | 1364 | EXOFS_ERR("%s: ore_remove failed\n", __func__); |
1365 | exofs_put_io_state(ios); | 1365 | ore_put_io_state(ios); |
1366 | return; | 1366 | return; |
1367 | } | 1367 | } |
1368 | atomic_inc(&sbi->s_curr_pending); | 1368 | atomic_inc(&sbi->s_curr_pending); |
diff --git a/fs/exofs/ios.c b/fs/exofs/ore.c index f74a2ec027a6..25305af88198 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ore.c | |||
@@ -23,81 +23,87 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <scsi/scsi_device.h> | ||
27 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
28 | 27 | ||
29 | #include "exofs.h" | 28 | #include <scsi/osd_ore.h> |
30 | 29 | ||
31 | #define EXOFS_DBGMSG2(M...) do {} while (0) | 30 | #define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) |
32 | /* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */ | ||
33 | 31 | ||
34 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | 32 | #ifdef CONFIG_EXOFS_DEBUG |
35 | { | 33 | #define ORE_DBGMSG(fmt, a...) \ |
36 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | 34 | printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) |
37 | } | 35 | #else |
36 | #define ORE_DBGMSG(fmt, a...) \ | ||
37 | do { if (0) printk(fmt, ##a); } while (0) | ||
38 | #endif | ||
38 | 39 | ||
39 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | 40 | /* u64 has problems with printk this will cast it to unsigned long long */ |
40 | u64 offset, void *p, unsigned length) | 41 | #define _LLU(x) (unsigned long long)(x) |
41 | { | ||
42 | struct osd_request *or = osd_start_request(od, GFP_KERNEL); | ||
43 | /* struct osd_sense_info osi = {.key = 0};*/ | ||
44 | int ret; | ||
45 | 42 | ||
46 | if (unlikely(!or)) { | 43 | #define ORE_DBGMSG2(M...) do {} while (0) |
47 | EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); | 44 | /* #define ORE_DBGMSG2 ORE_DBGMSG */ |
48 | return -ENOMEM; | ||
49 | } | ||
50 | ret = osd_req_read_kern(or, obj, offset, p, length); | ||
51 | if (unlikely(ret)) { | ||
52 | EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); | ||
53 | goto out; | ||
54 | } | ||
55 | 45 | ||
56 | ret = osd_finalize_request(or, 0, cred, NULL); | 46 | MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); |
57 | if (unlikely(ret)) { | 47 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); |
58 | EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); | 48 | MODULE_LICENSE("GPL"); |
59 | goto out; | ||
60 | } | ||
61 | 49 | ||
62 | ret = osd_execute_request(or); | 50 | static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) |
63 | if (unlikely(ret)) | 51 | { |
64 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | 52 | return ios->comps->comps[index & ios->comps->single_comp].cred; |
65 | /* osd_req_decode_sense(or, ret); */ | 53 | } |
66 | 54 | ||
67 | out: | 55 | static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) |
68 | osd_end_request(or); | 56 | { |
69 | return ret; | 57 | return &ios->comps->comps[index & ios->comps->single_comp].obj; |
70 | } | 58 | } |
71 | 59 | ||
72 | int exofs_get_io_state(struct exofs_layout *layout, | 60 | static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) |
73 | struct exofs_io_state **pios) | ||
74 | { | 61 | { |
75 | struct exofs_io_state *ios; | 62 | return ios->comps->ods[index]; |
63 | } | ||
64 | |||
65 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, | ||
66 | bool is_reading, u64 offset, u64 length, | ||
67 | struct ore_io_state **pios) | ||
68 | { | ||
69 | struct ore_io_state *ios; | ||
76 | 70 | ||
77 | /*TODO: Maybe use kmem_cach per sbi of size | 71 | /*TODO: Maybe use kmem_cach per sbi of size |
78 | * exofs_io_state_size(layout->s_numdevs) | 72 | * exofs_io_state_size(layout->s_numdevs) |
79 | */ | 73 | */ |
80 | ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); | 74 | ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL); |
81 | if (unlikely(!ios)) { | 75 | if (unlikely(!ios)) { |
82 | EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", | 76 | ORE_DBGMSG("Failed kzalloc bytes=%d\n", |
83 | exofs_io_state_size(layout->s_numdevs)); | 77 | ore_io_state_size(comps->numdevs)); |
84 | *pios = NULL; | 78 | *pios = NULL; |
85 | return -ENOMEM; | 79 | return -ENOMEM; |
86 | } | 80 | } |
87 | 81 | ||
88 | ios->layout = layout; | 82 | ios->layout = layout; |
89 | ios->obj.partition = layout->s_pid; | 83 | ios->comps = comps; |
84 | ios->offset = offset; | ||
85 | ios->length = length; | ||
86 | ios->reading = is_reading; | ||
87 | |||
90 | *pios = ios; | 88 | *pios = ios; |
91 | return 0; | 89 | return 0; |
92 | } | 90 | } |
91 | EXPORT_SYMBOL(ore_get_rw_state); | ||
92 | |||
93 | int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, | ||
94 | struct ore_io_state **ios) | ||
95 | { | ||
96 | return ore_get_rw_state(layout, comps, true, 0, 0, ios); | ||
97 | } | ||
98 | EXPORT_SYMBOL(ore_get_io_state); | ||
93 | 99 | ||
94 | void exofs_put_io_state(struct exofs_io_state *ios) | 100 | void ore_put_io_state(struct ore_io_state *ios) |
95 | { | 101 | { |
96 | if (ios) { | 102 | if (ios) { |
97 | unsigned i; | 103 | unsigned i; |
98 | 104 | ||
99 | for (i = 0; i < ios->numdevs; i++) { | 105 | for (i = 0; i < ios->numdevs; i++) { |
100 | struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; | 106 | struct ore_per_dev_state *per_dev = &ios->per_dev[i]; |
101 | 107 | ||
102 | if (per_dev->or) | 108 | if (per_dev->or) |
103 | osd_end_request(per_dev->or); | 109 | osd_end_request(per_dev->or); |
@@ -108,31 +114,9 @@ void exofs_put_io_state(struct exofs_io_state *ios) | |||
108 | kfree(ios); | 114 | kfree(ios); |
109 | } | 115 | } |
110 | } | 116 | } |
117 | EXPORT_SYMBOL(ore_put_io_state); | ||
111 | 118 | ||
112 | unsigned exofs_layout_od_id(struct exofs_layout *layout, | 119 | static void _sync_done(struct ore_io_state *ios, void *p) |
113 | osd_id obj_no, unsigned layout_index) | ||
114 | { | ||
115 | /* switch (layout->lay_func) { | ||
116 | case LAYOUT_MOVING_WINDOW: | ||
117 | {*/ | ||
118 | unsigned dev_mod = obj_no; | ||
119 | |||
120 | return (layout_index + dev_mod * layout->mirrors_p1) % | ||
121 | layout->s_numdevs; | ||
122 | /* } | ||
123 | case LAYOUT_FUNC_IMPLICT: | ||
124 | return layout->devs[layout_index]; | ||
125 | }*/ | ||
126 | } | ||
127 | |||
128 | static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, | ||
129 | unsigned layout_index) | ||
130 | { | ||
131 | return ios->layout->s_ods[ | ||
132 | exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; | ||
133 | } | ||
134 | |||
135 | static void _sync_done(struct exofs_io_state *ios, void *p) | ||
136 | { | 120 | { |
137 | struct completion *waiting = p; | 121 | struct completion *waiting = p; |
138 | 122 | ||
@@ -141,20 +125,20 @@ static void _sync_done(struct exofs_io_state *ios, void *p) | |||
141 | 125 | ||
142 | static void _last_io(struct kref *kref) | 126 | static void _last_io(struct kref *kref) |
143 | { | 127 | { |
144 | struct exofs_io_state *ios = container_of( | 128 | struct ore_io_state *ios = container_of( |
145 | kref, struct exofs_io_state, kref); | 129 | kref, struct ore_io_state, kref); |
146 | 130 | ||
147 | ios->done(ios, ios->private); | 131 | ios->done(ios, ios->private); |
148 | } | 132 | } |
149 | 133 | ||
150 | static void _done_io(struct osd_request *or, void *p) | 134 | static void _done_io(struct osd_request *or, void *p) |
151 | { | 135 | { |
152 | struct exofs_io_state *ios = p; | 136 | struct ore_io_state *ios = p; |
153 | 137 | ||
154 | kref_put(&ios->kref, _last_io); | 138 | kref_put(&ios->kref, _last_io); |
155 | } | 139 | } |
156 | 140 | ||
157 | static int exofs_io_execute(struct exofs_io_state *ios) | 141 | static int ore_io_execute(struct ore_io_state *ios) |
158 | { | 142 | { |
159 | DECLARE_COMPLETION_ONSTACK(wait); | 143 | DECLARE_COMPLETION_ONSTACK(wait); |
160 | bool sync = (ios->done == NULL); | 144 | bool sync = (ios->done == NULL); |
@@ -170,9 +154,9 @@ static int exofs_io_execute(struct exofs_io_state *ios) | |||
170 | if (unlikely(!or)) | 154 | if (unlikely(!or)) |
171 | continue; | 155 | continue; |
172 | 156 | ||
173 | ret = osd_finalize_request(or, 0, ios->cred, NULL); | 157 | ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); |
174 | if (unlikely(ret)) { | 158 | if (unlikely(ret)) { |
175 | EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", | 159 | ORE_DBGMSG("Failed to osd_finalize_request() => %d\n", |
176 | ret); | 160 | ret); |
177 | return ret; | 161 | return ret; |
178 | } | 162 | } |
@@ -194,7 +178,7 @@ static int exofs_io_execute(struct exofs_io_state *ios) | |||
194 | 178 | ||
195 | if (sync) { | 179 | if (sync) { |
196 | wait_for_completion(&wait); | 180 | wait_for_completion(&wait); |
197 | ret = exofs_check_io(ios, NULL); | 181 | ret = ore_check_io(ios, NULL); |
198 | } | 182 | } |
199 | return ret; | 183 | return ret; |
200 | } | 184 | } |
@@ -214,7 +198,7 @@ static void _clear_bio(struct bio *bio) | |||
214 | } | 198 | } |
215 | } | 199 | } |
216 | 200 | ||
217 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | 201 | int ore_check_io(struct ore_io_state *ios, u64 *resid) |
218 | { | 202 | { |
219 | enum osd_err_priority acumulated_osd_err = 0; | 203 | enum osd_err_priority acumulated_osd_err = 0; |
220 | int acumulated_lin_err = 0; | 204 | int acumulated_lin_err = 0; |
@@ -235,7 +219,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | |||
235 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { | 219 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { |
236 | /* start read offset passed endof file */ | 220 | /* start read offset passed endof file */ |
237 | _clear_bio(ios->per_dev[i].bio); | 221 | _clear_bio(ios->per_dev[i].bio); |
238 | EXOFS_DBGMSG("start read offset passed end of file " | 222 | ORE_DBGMSG("start read offset passed end of file " |
239 | "offset=0x%llx, length=0x%llx\n", | 223 | "offset=0x%llx, length=0x%llx\n", |
240 | _LLU(ios->per_dev[i].offset), | 224 | _LLU(ios->per_dev[i].offset), |
241 | _LLU(ios->per_dev[i].length)); | 225 | _LLU(ios->per_dev[i].length)); |
@@ -259,6 +243,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | |||
259 | 243 | ||
260 | return acumulated_lin_err; | 244 | return acumulated_lin_err; |
261 | } | 245 | } |
246 | EXPORT_SYMBOL(ore_check_io); | ||
262 | 247 | ||
263 | /* | 248 | /* |
264 | * L - logical offset into the file | 249 | * L - logical offset into the file |
@@ -305,20 +290,21 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | |||
305 | struct _striping_info { | 290 | struct _striping_info { |
306 | u64 obj_offset; | 291 | u64 obj_offset; |
307 | u64 group_length; | 292 | u64 group_length; |
293 | u64 M; /* for truncate */ | ||
308 | unsigned dev; | 294 | unsigned dev; |
309 | unsigned unit_off; | 295 | unsigned unit_off; |
310 | }; | 296 | }; |
311 | 297 | ||
312 | static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, | 298 | static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset, |
313 | struct _striping_info *si) | 299 | struct _striping_info *si) |
314 | { | 300 | { |
315 | u32 stripe_unit = ios->layout->stripe_unit; | 301 | u32 stripe_unit = layout->stripe_unit; |
316 | u32 group_width = ios->layout->group_width; | 302 | u32 group_width = layout->group_width; |
317 | u64 group_depth = ios->layout->group_depth; | 303 | u64 group_depth = layout->group_depth; |
318 | 304 | ||
319 | u32 U = stripe_unit * group_width; | 305 | u32 U = stripe_unit * group_width; |
320 | u64 T = U * group_depth; | 306 | u64 T = U * group_depth; |
321 | u64 S = T * ios->layout->group_count; | 307 | u64 S = T * layout->group_count; |
322 | u64 M = div64_u64(file_offset, S); | 308 | u64 M = div64_u64(file_offset, S); |
323 | 309 | ||
324 | /* | 310 | /* |
@@ -333,7 +319,7 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, | |||
333 | 319 | ||
334 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | 320 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ |
335 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | 321 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; |
336 | si->dev *= ios->layout->mirrors_p1; | 322 | si->dev *= layout->mirrors_p1; |
337 | 323 | ||
338 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); | 324 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); |
339 | 325 | ||
@@ -341,15 +327,16 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, | |||
341 | (M * group_depth * stripe_unit); | 327 | (M * group_depth * stripe_unit); |
342 | 328 | ||
343 | si->group_length = T - H; | 329 | si->group_length = T - H; |
330 | si->M = M; | ||
344 | } | 331 | } |
345 | 332 | ||
346 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | 333 | static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, |
347 | unsigned pgbase, struct exofs_per_dev_state *per_dev, | 334 | unsigned pgbase, struct ore_per_dev_state *per_dev, |
348 | int cur_len) | 335 | int cur_len) |
349 | { | 336 | { |
350 | unsigned pg = *cur_pg; | 337 | unsigned pg = *cur_pg; |
351 | struct request_queue *q = | 338 | struct request_queue *q = |
352 | osd_request_queue(exofs_ios_od(ios, per_dev->dev)); | 339 | osd_request_queue(_ios_od(ios, per_dev->dev)); |
353 | 340 | ||
354 | per_dev->length += cur_len; | 341 | per_dev->length += cur_len; |
355 | 342 | ||
@@ -361,7 +348,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | |||
361 | 348 | ||
362 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); | 349 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); |
363 | if (unlikely(!per_dev->bio)) { | 350 | if (unlikely(!per_dev->bio)) { |
364 | EXOFS_DBGMSG("Failed to allocate BIO size=%u\n", | 351 | ORE_DBGMSG("Failed to allocate BIO size=%u\n", |
365 | bio_size); | 352 | bio_size); |
366 | return -ENOMEM; | 353 | return -ENOMEM; |
367 | } | 354 | } |
@@ -387,7 +374,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | |||
387 | return 0; | 374 | return 0; |
388 | } | 375 | } |
389 | 376 | ||
390 | static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | 377 | static int _prepare_one_group(struct ore_io_state *ios, u64 length, |
391 | struct _striping_info *si) | 378 | struct _striping_info *si) |
392 | { | 379 | { |
393 | unsigned stripe_unit = ios->layout->stripe_unit; | 380 | unsigned stripe_unit = ios->layout->stripe_unit; |
@@ -400,7 +387,7 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | |||
400 | int ret = 0; | 387 | int ret = 0; |
401 | 388 | ||
402 | while (length) { | 389 | while (length) { |
403 | struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; | 390 | struct ore_per_dev_state *per_dev = &ios->per_dev[dev]; |
404 | unsigned cur_len, page_off = 0; | 391 | unsigned cur_len, page_off = 0; |
405 | 392 | ||
406 | if (!per_dev->length) { | 393 | if (!per_dev->length) { |
@@ -443,7 +430,7 @@ out: | |||
443 | return ret; | 430 | return ret; |
444 | } | 431 | } |
445 | 432 | ||
446 | static int _prepare_for_striping(struct exofs_io_state *ios) | 433 | static int _prepare_for_striping(struct ore_io_state *ios) |
447 | { | 434 | { |
448 | u64 length = ios->length; | 435 | u64 length = ios->length; |
449 | u64 offset = ios->offset; | 436 | u64 offset = ios->offset; |
@@ -452,9 +439,9 @@ static int _prepare_for_striping(struct exofs_io_state *ios) | |||
452 | 439 | ||
453 | if (!ios->pages) { | 440 | if (!ios->pages) { |
454 | if (ios->kern_buff) { | 441 | if (ios->kern_buff) { |
455 | struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; | 442 | struct ore_per_dev_state *per_dev = &ios->per_dev[0]; |
456 | 443 | ||
457 | _calc_stripe_info(ios, ios->offset, &si); | 444 | _calc_stripe_info(ios->layout, ios->offset, &si); |
458 | per_dev->offset = si.obj_offset; | 445 | per_dev->offset = si.obj_offset; |
459 | per_dev->dev = si.dev; | 446 | per_dev->dev = si.dev; |
460 | 447 | ||
@@ -468,7 +455,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios) | |||
468 | } | 455 | } |
469 | 456 | ||
470 | while (length) { | 457 | while (length) { |
471 | _calc_stripe_info(ios, offset, &si); | 458 | _calc_stripe_info(ios->layout, offset, &si); |
472 | 459 | ||
473 | if (length < si.group_length) | 460 | if (length < si.group_length) |
474 | si.group_length = length; | 461 | si.group_length = length; |
@@ -485,57 +472,59 @@ out: | |||
485 | return ret; | 472 | return ret; |
486 | } | 473 | } |
487 | 474 | ||
488 | int exofs_sbi_create(struct exofs_io_state *ios) | 475 | int ore_create(struct ore_io_state *ios) |
489 | { | 476 | { |
490 | int i, ret; | 477 | int i, ret; |
491 | 478 | ||
492 | for (i = 0; i < ios->layout->s_numdevs; i++) { | 479 | for (i = 0; i < ios->comps->numdevs; i++) { |
493 | struct osd_request *or; | 480 | struct osd_request *or; |
494 | 481 | ||
495 | or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); | 482 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
496 | if (unlikely(!or)) { | 483 | if (unlikely(!or)) { |
497 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 484 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
498 | ret = -ENOMEM; | 485 | ret = -ENOMEM; |
499 | goto out; | 486 | goto out; |
500 | } | 487 | } |
501 | ios->per_dev[i].or = or; | 488 | ios->per_dev[i].or = or; |
502 | ios->numdevs++; | 489 | ios->numdevs++; |
503 | 490 | ||
504 | osd_req_create_object(or, &ios->obj); | 491 | osd_req_create_object(or, _ios_obj(ios, i)); |
505 | } | 492 | } |
506 | ret = exofs_io_execute(ios); | 493 | ret = ore_io_execute(ios); |
507 | 494 | ||
508 | out: | 495 | out: |
509 | return ret; | 496 | return ret; |
510 | } | 497 | } |
498 | EXPORT_SYMBOL(ore_create); | ||
511 | 499 | ||
512 | int exofs_sbi_remove(struct exofs_io_state *ios) | 500 | int ore_remove(struct ore_io_state *ios) |
513 | { | 501 | { |
514 | int i, ret; | 502 | int i, ret; |
515 | 503 | ||
516 | for (i = 0; i < ios->layout->s_numdevs; i++) { | 504 | for (i = 0; i < ios->comps->numdevs; i++) { |
517 | struct osd_request *or; | 505 | struct osd_request *or; |
518 | 506 | ||
519 | or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); | 507 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
520 | if (unlikely(!or)) { | 508 | if (unlikely(!or)) { |
521 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 509 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
522 | ret = -ENOMEM; | 510 | ret = -ENOMEM; |
523 | goto out; | 511 | goto out; |
524 | } | 512 | } |
525 | ios->per_dev[i].or = or; | 513 | ios->per_dev[i].or = or; |
526 | ios->numdevs++; | 514 | ios->numdevs++; |
527 | 515 | ||
528 | osd_req_remove_object(or, &ios->obj); | 516 | osd_req_remove_object(or, _ios_obj(ios, i)); |
529 | } | 517 | } |
530 | ret = exofs_io_execute(ios); | 518 | ret = ore_io_execute(ios); |
531 | 519 | ||
532 | out: | 520 | out: |
533 | return ret; | 521 | return ret; |
534 | } | 522 | } |
523 | EXPORT_SYMBOL(ore_remove); | ||
535 | 524 | ||
536 | static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) | 525 | static int _write_mirror(struct ore_io_state *ios, int cur_comp) |
537 | { | 526 | { |
538 | struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp]; | 527 | struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp]; |
539 | unsigned dev = ios->per_dev[cur_comp].dev; | 528 | unsigned dev = ios->per_dev[cur_comp].dev; |
540 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | 529 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; |
541 | int ret = 0; | 530 | int ret = 0; |
@@ -544,12 +533,12 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) | |||
544 | return 0; /* Just an empty slot */ | 533 | return 0; /* Just an empty slot */ |
545 | 534 | ||
546 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | 535 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
547 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; | 536 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
548 | struct osd_request *or; | 537 | struct osd_request *or; |
549 | 538 | ||
550 | or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL); | 539 | or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); |
551 | if (unlikely(!or)) { | 540 | if (unlikely(!or)) { |
552 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 541 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
553 | ret = -ENOMEM; | 542 | ret = -ENOMEM; |
554 | goto out; | 543 | goto out; |
555 | } | 544 | } |
@@ -563,7 +552,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) | |||
563 | bio = bio_kmalloc(GFP_KERNEL, | 552 | bio = bio_kmalloc(GFP_KERNEL, |
564 | master_dev->bio->bi_max_vecs); | 553 | master_dev->bio->bi_max_vecs); |
565 | if (unlikely(!bio)) { | 554 | if (unlikely(!bio)) { |
566 | EXOFS_DBGMSG( | 555 | ORE_DBGMSG( |
567 | "Failed to allocate BIO size=%u\n", | 556 | "Failed to allocate BIO size=%u\n", |
568 | master_dev->bio->bi_max_vecs); | 557 | master_dev->bio->bi_max_vecs); |
569 | ret = -ENOMEM; | 558 | ret = -ENOMEM; |
@@ -582,25 +571,29 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) | |||
582 | bio->bi_rw |= REQ_WRITE; | 571 | bio->bi_rw |= REQ_WRITE; |
583 | } | 572 | } |
584 | 573 | ||
585 | osd_req_write(or, &ios->obj, per_dev->offset, bio, | 574 | osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, |
586 | per_dev->length); | 575 | bio, per_dev->length); |
587 | EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " | 576 | ORE_DBGMSG("write(0x%llx) offset=0x%llx " |
588 | "length=0x%llx dev=%d\n", | 577 | "length=0x%llx dev=%d\n", |
589 | _LLU(ios->obj.id), _LLU(per_dev->offset), | 578 | _LLU(_ios_obj(ios, dev)->id), |
579 | _LLU(per_dev->offset), | ||
590 | _LLU(per_dev->length), dev); | 580 | _LLU(per_dev->length), dev); |
591 | } else if (ios->kern_buff) { | 581 | } else if (ios->kern_buff) { |
592 | ret = osd_req_write_kern(or, &ios->obj, per_dev->offset, | 582 | ret = osd_req_write_kern(or, _ios_obj(ios, dev), |
593 | ios->kern_buff, ios->length); | 583 | per_dev->offset, |
584 | ios->kern_buff, ios->length); | ||
594 | if (unlikely(ret)) | 585 | if (unlikely(ret)) |
595 | goto out; | 586 | goto out; |
596 | EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " | 587 | ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " |
597 | "length=0x%llx dev=%d\n", | 588 | "length=0x%llx dev=%d\n", |
598 | _LLU(ios->obj.id), _LLU(per_dev->offset), | 589 | _LLU(_ios_obj(ios, dev)->id), |
590 | _LLU(per_dev->offset), | ||
599 | _LLU(ios->length), dev); | 591 | _LLU(ios->length), dev); |
600 | } else { | 592 | } else { |
601 | osd_req_set_attributes(or, &ios->obj); | 593 | osd_req_set_attributes(or, _ios_obj(ios, dev)); |
602 | EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", | 594 | ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", |
603 | _LLU(ios->obj.id), ios->out_attr_len, dev); | 595 | _LLU(_ios_obj(ios, dev)->id), |
596 | ios->out_attr_len, dev); | ||
604 | } | 597 | } |
605 | 598 | ||
606 | if (ios->out_attr) | 599 | if (ios->out_attr) |
@@ -616,7 +609,7 @@ out: | |||
616 | return ret; | 609 | return ret; |
617 | } | 610 | } |
618 | 611 | ||
619 | int exofs_sbi_write(struct exofs_io_state *ios) | 612 | int ore_write(struct ore_io_state *ios) |
620 | { | 613 | { |
621 | int i; | 614 | int i; |
622 | int ret; | 615 | int ret; |
@@ -626,52 +619,55 @@ int exofs_sbi_write(struct exofs_io_state *ios) | |||
626 | return ret; | 619 | return ret; |
627 | 620 | ||
628 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | 621 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { |
629 | ret = _sbi_write_mirror(ios, i); | 622 | ret = _write_mirror(ios, i); |
630 | if (unlikely(ret)) | 623 | if (unlikely(ret)) |
631 | return ret; | 624 | return ret; |
632 | } | 625 | } |
633 | 626 | ||
634 | ret = exofs_io_execute(ios); | 627 | ret = ore_io_execute(ios); |
635 | return ret; | 628 | return ret; |
636 | } | 629 | } |
630 | EXPORT_SYMBOL(ore_write); | ||
637 | 631 | ||
638 | static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) | 632 | static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) |
639 | { | 633 | { |
640 | struct osd_request *or; | 634 | struct osd_request *or; |
641 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; | 635 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
642 | unsigned first_dev = (unsigned)ios->obj.id; | 636 | struct osd_obj_id *obj = _ios_obj(ios, cur_comp); |
637 | unsigned first_dev = (unsigned)obj->id; | ||
643 | 638 | ||
644 | if (ios->pages && !per_dev->length) | 639 | if (ios->pages && !per_dev->length) |
645 | return 0; /* Just an empty slot */ | 640 | return 0; /* Just an empty slot */ |
646 | 641 | ||
647 | first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; | 642 | first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; |
648 | or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); | 643 | or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); |
649 | if (unlikely(!or)) { | 644 | if (unlikely(!or)) { |
650 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 645 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
651 | return -ENOMEM; | 646 | return -ENOMEM; |
652 | } | 647 | } |
653 | per_dev->or = or; | 648 | per_dev->or = or; |
654 | 649 | ||
655 | if (ios->pages) { | 650 | if (ios->pages) { |
656 | osd_req_read(or, &ios->obj, per_dev->offset, | 651 | osd_req_read(or, obj, per_dev->offset, |
657 | per_dev->bio, per_dev->length); | 652 | per_dev->bio, per_dev->length); |
658 | EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" | 653 | ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" |
659 | " dev=%d\n", _LLU(ios->obj.id), | 654 | " dev=%d\n", _LLU(obj->id), |
660 | _LLU(per_dev->offset), _LLU(per_dev->length), | 655 | _LLU(per_dev->offset), _LLU(per_dev->length), |
661 | first_dev); | 656 | first_dev); |
662 | } else if (ios->kern_buff) { | 657 | } else if (ios->kern_buff) { |
663 | int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset, | 658 | int ret = osd_req_read_kern(or, obj, per_dev->offset, |
664 | ios->kern_buff, ios->length); | 659 | ios->kern_buff, ios->length); |
665 | EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " | 660 | ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx " |
666 | "length=0x%llx dev=%d ret=>%d\n", | 661 | "length=0x%llx dev=%d ret=>%d\n", |
667 | _LLU(ios->obj.id), _LLU(per_dev->offset), | 662 | _LLU(obj->id), _LLU(per_dev->offset), |
668 | _LLU(ios->length), first_dev, ret); | 663 | _LLU(ios->length), first_dev, ret); |
669 | if (unlikely(ret)) | 664 | if (unlikely(ret)) |
670 | return ret; | 665 | return ret; |
671 | } else { | 666 | } else { |
672 | osd_req_get_attributes(or, &ios->obj); | 667 | osd_req_get_attributes(or, obj); |
673 | EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", | 668 | ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", |
674 | _LLU(ios->obj.id), ios->in_attr_len, first_dev); | 669 | _LLU(obj->id), |
670 | ios->in_attr_len, first_dev); | ||
675 | } | 671 | } |
676 | if (ios->out_attr) | 672 | if (ios->out_attr) |
677 | osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); | 673 | osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); |
@@ -682,7 +678,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) | |||
682 | return 0; | 678 | return 0; |
683 | } | 679 | } |
684 | 680 | ||
685 | int exofs_sbi_read(struct exofs_io_state *ios) | 681 | int ore_read(struct ore_io_state *ios) |
686 | { | 682 | { |
687 | int i; | 683 | int i; |
688 | int ret; | 684 | int ret; |
@@ -692,16 +688,17 @@ int exofs_sbi_read(struct exofs_io_state *ios) | |||
692 | return ret; | 688 | return ret; |
693 | 689 | ||
694 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | 690 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { |
695 | ret = _sbi_read_mirror(ios, i); | 691 | ret = _read_mirror(ios, i); |
696 | if (unlikely(ret)) | 692 | if (unlikely(ret)) |
697 | return ret; | 693 | return ret; |
698 | } | 694 | } |
699 | 695 | ||
700 | ret = exofs_io_execute(ios); | 696 | ret = ore_io_execute(ios); |
701 | return ret; | 697 | return ret; |
702 | } | 698 | } |
699 | EXPORT_SYMBOL(ore_read); | ||
703 | 700 | ||
704 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) | 701 | int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) |
705 | { | 702 | { |
706 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | 703 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ |
707 | void *iter = NULL; | 704 | void *iter = NULL; |
@@ -721,83 +718,118 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) | |||
721 | 718 | ||
722 | return -EIO; | 719 | return -EIO; |
723 | } | 720 | } |
721 | EXPORT_SYMBOL(extract_attr_from_ios); | ||
724 | 722 | ||
725 | static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, | 723 | static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, |
726 | struct osd_attr *attr) | 724 | struct osd_attr *attr) |
727 | { | 725 | { |
728 | int last_comp = cur_comp + ios->layout->mirrors_p1; | 726 | int last_comp = cur_comp + ios->layout->mirrors_p1; |
729 | 727 | ||
730 | for (; cur_comp < last_comp; ++cur_comp) { | 728 | for (; cur_comp < last_comp; ++cur_comp) { |
731 | struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; | 729 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
732 | struct osd_request *or; | 730 | struct osd_request *or; |
733 | 731 | ||
734 | or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL); | 732 | or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); |
735 | if (unlikely(!or)) { | 733 | if (unlikely(!or)) { |
736 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | 734 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
737 | return -ENOMEM; | 735 | return -ENOMEM; |
738 | } | 736 | } |
739 | per_dev->or = or; | 737 | per_dev->or = or; |
740 | 738 | ||
741 | osd_req_set_attributes(or, &ios->obj); | 739 | osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); |
742 | osd_req_add_set_attr_list(or, attr, 1); | 740 | osd_req_add_set_attr_list(or, attr, 1); |
743 | } | 741 | } |
744 | 742 | ||
745 | return 0; | 743 | return 0; |
746 | } | 744 | } |
747 | 745 | ||
748 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) | 746 | struct _trunc_info { |
747 | struct _striping_info si; | ||
748 | u64 prev_group_obj_off; | ||
749 | u64 next_group_obj_off; | ||
750 | |||
751 | unsigned first_group_dev; | ||
752 | unsigned nex_group_dev; | ||
753 | unsigned max_devs; | ||
754 | }; | ||
755 | |||
756 | void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, | ||
757 | struct _trunc_info *ti) | ||
758 | { | ||
759 | unsigned stripe_unit = layout->stripe_unit; | ||
760 | |||
761 | _calc_stripe_info(layout, file_offset, &ti->si); | ||
762 | |||
763 | ti->prev_group_obj_off = ti->si.M * stripe_unit; | ||
764 | ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; | ||
765 | |||
766 | ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); | ||
767 | ti->nex_group_dev = ti->first_group_dev + layout->group_width; | ||
768 | ti->max_devs = layout->group_width * layout->group_count; | ||
769 | } | ||
770 | |||
771 | int ore_truncate(struct ore_layout *layout, struct ore_components *comps, | ||
772 | u64 size) | ||
749 | { | 773 | { |
750 | struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; | 774 | struct ore_io_state *ios; |
751 | struct exofs_io_state *ios; | ||
752 | struct exofs_trunc_attr { | 775 | struct exofs_trunc_attr { |
753 | struct osd_attr attr; | 776 | struct osd_attr attr; |
754 | __be64 newsize; | 777 | __be64 newsize; |
755 | } *size_attrs; | 778 | } *size_attrs; |
756 | struct _striping_info si; | 779 | struct _trunc_info ti; |
757 | int i, ret; | 780 | int i, ret; |
758 | 781 | ||
759 | ret = exofs_get_io_state(&sbi->layout, &ios); | 782 | ret = ore_get_io_state(layout, comps, &ios); |
760 | if (unlikely(ret)) | 783 | if (unlikely(ret)) |
761 | return ret; | 784 | return ret; |
762 | 785 | ||
763 | size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs), | 786 | _calc_trunk_info(ios->layout, size, &ti); |
787 | |||
788 | size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs), | ||
764 | GFP_KERNEL); | 789 | GFP_KERNEL); |
765 | if (unlikely(!size_attrs)) { | 790 | if (unlikely(!size_attrs)) { |
766 | ret = -ENOMEM; | 791 | ret = -ENOMEM; |
767 | goto out; | 792 | goto out; |
768 | } | 793 | } |
769 | 794 | ||
770 | ios->obj.id = exofs_oi_objno(oi); | 795 | ios->numdevs = ios->comps->numdevs; |
771 | ios->cred = oi->i_cred; | ||
772 | 796 | ||
773 | ios->numdevs = ios->layout->s_numdevs; | 797 | for (i = 0; i < ti.max_devs; ++i) { |
774 | _calc_stripe_info(ios, size, &si); | ||
775 | |||
776 | for (i = 0; i < ios->layout->group_width; ++i) { | ||
777 | struct exofs_trunc_attr *size_attr = &size_attrs[i]; | 798 | struct exofs_trunc_attr *size_attr = &size_attrs[i]; |
778 | u64 obj_size; | 799 | u64 obj_size; |
779 | 800 | ||
780 | if (i < si.dev) | 801 | if (i < ti.first_group_dev) |
781 | obj_size = si.obj_offset + | 802 | obj_size = ti.prev_group_obj_off; |
782 | ios->layout->stripe_unit - si.unit_off; | 803 | else if (i >= ti.nex_group_dev) |
783 | else if (i == si.dev) | 804 | obj_size = ti.next_group_obj_off; |
784 | obj_size = si.obj_offset; | 805 | else if (i < ti.si.dev) /* dev within this group */ |
785 | else /* i > si.dev */ | 806 | obj_size = ti.si.obj_offset + |
786 | obj_size = si.obj_offset - si.unit_off; | 807 | ios->layout->stripe_unit - ti.si.unit_off; |
808 | else if (i == ti.si.dev) | ||
809 | obj_size = ti.si.obj_offset; | ||
810 | else /* i > ti.dev */ | ||
811 | obj_size = ti.si.obj_offset - ti.si.unit_off; | ||
787 | 812 | ||
788 | size_attr->newsize = cpu_to_be64(obj_size); | 813 | size_attr->newsize = cpu_to_be64(obj_size); |
789 | size_attr->attr = g_attr_logical_length; | 814 | size_attr->attr = g_attr_logical_length; |
790 | size_attr->attr.val_ptr = &size_attr->newsize; | 815 | size_attr->attr.val_ptr = &size_attr->newsize; |
791 | 816 | ||
817 | ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", | ||
818 | _LLU(comps->comps->obj.id), _LLU(obj_size), i); | ||
792 | ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, | 819 | ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, |
793 | &size_attr->attr); | 820 | &size_attr->attr); |
794 | if (unlikely(ret)) | 821 | if (unlikely(ret)) |
795 | goto out; | 822 | goto out; |
796 | } | 823 | } |
797 | ret = exofs_io_execute(ios); | 824 | ret = ore_io_execute(ios); |
798 | 825 | ||
799 | out: | 826 | out: |
800 | kfree(size_attrs); | 827 | kfree(size_attrs); |
801 | exofs_put_io_state(ios); | 828 | ore_put_io_state(ios); |
802 | return ret; | 829 | return ret; |
803 | } | 830 | } |
831 | EXPORT_SYMBOL(ore_truncate); | ||
832 | |||
833 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | ||
834 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | ||
835 | EXPORT_SYMBOL(g_attr_logical_length); | ||
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h deleted file mode 100644 index c52e9888b8ab..000000000000 --- a/fs/exofs/pnfs.h +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, 2009 | ||
3 | * Boaz Harrosh <bharrosh@panasas.com> | ||
4 | * | ||
5 | * This file is part of exofs. | ||
6 | * | ||
7 | * exofs is free software; you can redistribute it and/or modify it under the | ||
8 | * terms of the GNU General Public License version 2 as published by the Free | ||
9 | * Software Foundation. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* FIXME: Remove this file once pnfs hits mainline */ | ||
14 | |||
15 | #ifndef __EXOFS_PNFS_H__ | ||
16 | #define __EXOFS_PNFS_H__ | ||
17 | |||
18 | #if ! defined(__PNFS_OSD_XDR_H__) | ||
19 | |||
20 | enum pnfs_iomode { | ||
21 | IOMODE_READ = 1, | ||
22 | IOMODE_RW = 2, | ||
23 | IOMODE_ANY = 3, | ||
24 | }; | ||
25 | |||
26 | /* Layout Structure */ | ||
27 | enum pnfs_osd_raid_algorithm4 { | ||
28 | PNFS_OSD_RAID_0 = 1, | ||
29 | PNFS_OSD_RAID_4 = 2, | ||
30 | PNFS_OSD_RAID_5 = 3, | ||
31 | PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ | ||
32 | }; | ||
33 | |||
34 | struct pnfs_osd_data_map { | ||
35 | u32 odm_num_comps; | ||
36 | u64 odm_stripe_unit; | ||
37 | u32 odm_group_width; | ||
38 | u32 odm_group_depth; | ||
39 | u32 odm_mirror_cnt; | ||
40 | u32 odm_raid_algorithm; | ||
41 | }; | ||
42 | |||
43 | #endif /* ! defined(__PNFS_OSD_XDR_H__) */ | ||
44 | |||
45 | #endif /* __EXOFS_PNFS_H__ */ | ||
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index c57beddcc217..274894053b02 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -40,6 +40,8 @@ | |||
40 | 40 | ||
41 | #include "exofs.h" | 41 | #include "exofs.h" |
42 | 42 | ||
43 | #define EXOFS_DBGMSG2(M...) do {} while (0) | ||
44 | |||
43 | /****************************************************************************** | 45 | /****************************************************************************** |
44 | * MOUNT OPTIONS | 46 | * MOUNT OPTIONS |
45 | *****************************************************************************/ | 47 | *****************************************************************************/ |
@@ -208,10 +210,48 @@ static void destroy_inodecache(void) | |||
208 | } | 210 | } |
209 | 211 | ||
210 | /****************************************************************************** | 212 | /****************************************************************************** |
211 | * SUPERBLOCK FUNCTIONS | 213 | * Some osd helpers |
212 | *****************************************************************************/ | 214 | *****************************************************************************/ |
213 | static const struct super_operations exofs_sops; | 215 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) |
214 | static const struct export_operations exofs_export_ops; | 216 | { |
217 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | ||
218 | } | ||
219 | |||
220 | static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | ||
221 | u64 offset, void *p, unsigned length) | ||
222 | { | ||
223 | struct osd_request *or = osd_start_request(od, GFP_KERNEL); | ||
224 | /* struct osd_sense_info osi = {.key = 0};*/ | ||
225 | int ret; | ||
226 | |||
227 | if (unlikely(!or)) { | ||
228 | EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); | ||
229 | return -ENOMEM; | ||
230 | } | ||
231 | ret = osd_req_read_kern(or, obj, offset, p, length); | ||
232 | if (unlikely(ret)) { | ||
233 | EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); | ||
234 | goto out; | ||
235 | } | ||
236 | |||
237 | ret = osd_finalize_request(or, 0, cred, NULL); | ||
238 | if (unlikely(ret)) { | ||
239 | EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); | ||
240 | goto out; | ||
241 | } | ||
242 | |||
243 | ret = osd_execute_request(or); | ||
244 | if (unlikely(ret)) | ||
245 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
246 | /* osd_req_decode_sense(or, ret); */ | ||
247 | |||
248 | out: | ||
249 | osd_end_request(or); | ||
250 | EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " | ||
251 | "length=0x%llx dev=%p ret=>%d\n", | ||
252 | _LLU(obj->id), _LLU(offset), _LLU(length), od, ret); | ||
253 | return ret; | ||
254 | } | ||
215 | 255 | ||
216 | static const struct osd_attr g_attr_sb_stats = ATTR_DEF( | 256 | static const struct osd_attr g_attr_sb_stats = ATTR_DEF( |
217 | EXOFS_APAGE_SB_DATA, | 257 | EXOFS_APAGE_SB_DATA, |
@@ -223,21 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) | |||
223 | struct osd_attr attrs[] = { | 263 | struct osd_attr attrs[] = { |
224 | [0] = g_attr_sb_stats, | 264 | [0] = g_attr_sb_stats, |
225 | }; | 265 | }; |
226 | struct exofs_io_state *ios; | 266 | struct ore_io_state *ios; |
227 | int ret; | 267 | int ret; |
228 | 268 | ||
229 | ret = exofs_get_io_state(&sbi->layout, &ios); | 269 | ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); |
230 | if (unlikely(ret)) { | 270 | if (unlikely(ret)) { |
231 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | 271 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
232 | return ret; | 272 | return ret; |
233 | } | 273 | } |
234 | 274 | ||
235 | ios->cred = sbi->s_cred; | ||
236 | |||
237 | ios->in_attr = attrs; | 275 | ios->in_attr = attrs; |
238 | ios->in_attr_len = ARRAY_SIZE(attrs); | 276 | ios->in_attr_len = ARRAY_SIZE(attrs); |
239 | 277 | ||
240 | ret = exofs_sbi_read(ios); | 278 | ret = ore_read(ios); |
241 | if (unlikely(ret)) { | 279 | if (unlikely(ret)) { |
242 | EXOFS_ERR("Error reading super_block stats => %d\n", ret); | 280 | EXOFS_ERR("Error reading super_block stats => %d\n", ret); |
243 | goto out; | 281 | goto out; |
@@ -264,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) | |||
264 | } | 302 | } |
265 | 303 | ||
266 | out: | 304 | out: |
267 | exofs_put_io_state(ios); | 305 | ore_put_io_state(ios); |
268 | return ret; | 306 | return ret; |
269 | } | 307 | } |
270 | 308 | ||
271 | static void stats_done(struct exofs_io_state *ios, void *p) | 309 | static void stats_done(struct ore_io_state *ios, void *p) |
272 | { | 310 | { |
273 | exofs_put_io_state(ios); | 311 | ore_put_io_state(ios); |
274 | /* Good thanks nothing to do anymore */ | 312 | /* Good thanks nothing to do anymore */ |
275 | } | 313 | } |
276 | 314 | ||
@@ -280,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) | |||
280 | struct osd_attr attrs[] = { | 318 | struct osd_attr attrs[] = { |
281 | [0] = g_attr_sb_stats, | 319 | [0] = g_attr_sb_stats, |
282 | }; | 320 | }; |
283 | struct exofs_io_state *ios; | 321 | struct ore_io_state *ios; |
284 | int ret; | 322 | int ret; |
285 | 323 | ||
286 | ret = exofs_get_io_state(&sbi->layout, &ios); | 324 | ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); |
287 | if (unlikely(ret)) { | 325 | if (unlikely(ret)) { |
288 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | 326 | EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); |
289 | return ret; | 327 | return ret; |
290 | } | 328 | } |
291 | 329 | ||
@@ -293,21 +331,27 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) | |||
293 | sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); | 331 | sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); |
294 | attrs[0].val_ptr = &sbi->s_ess; | 332 | attrs[0].val_ptr = &sbi->s_ess; |
295 | 333 | ||
296 | ios->cred = sbi->s_cred; | 334 | |
297 | ios->done = stats_done; | 335 | ios->done = stats_done; |
298 | ios->private = sbi; | 336 | ios->private = sbi; |
299 | ios->out_attr = attrs; | 337 | ios->out_attr = attrs; |
300 | ios->out_attr_len = ARRAY_SIZE(attrs); | 338 | ios->out_attr_len = ARRAY_SIZE(attrs); |
301 | 339 | ||
302 | ret = exofs_sbi_write(ios); | 340 | ret = ore_write(ios); |
303 | if (unlikely(ret)) { | 341 | if (unlikely(ret)) { |
304 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); | 342 | EXOFS_ERR("%s: ore_write failed.\n", __func__); |
305 | exofs_put_io_state(ios); | 343 | ore_put_io_state(ios); |
306 | } | 344 | } |
307 | 345 | ||
308 | return ret; | 346 | return ret; |
309 | } | 347 | } |
310 | 348 | ||
349 | /****************************************************************************** | ||
350 | * SUPERBLOCK FUNCTIONS | ||
351 | *****************************************************************************/ | ||
352 | static const struct super_operations exofs_sops; | ||
353 | static const struct export_operations exofs_export_ops; | ||
354 | |||
311 | /* | 355 | /* |
312 | * Write the superblock to the OSD | 356 | * Write the superblock to the OSD |
313 | */ | 357 | */ |
@@ -315,7 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
315 | { | 359 | { |
316 | struct exofs_sb_info *sbi; | 360 | struct exofs_sb_info *sbi; |
317 | struct exofs_fscb *fscb; | 361 | struct exofs_fscb *fscb; |
318 | struct exofs_io_state *ios; | 362 | struct ore_comp one_comp; |
363 | struct ore_components comps; | ||
364 | struct ore_io_state *ios; | ||
319 | int ret = -ENOMEM; | 365 | int ret = -ENOMEM; |
320 | 366 | ||
321 | fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); | 367 | fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); |
@@ -331,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
331 | * version). Otherwise the exofs_fscb is read-only from mkfs time. All | 377 | * version). Otherwise the exofs_fscb is read-only from mkfs time. All |
332 | * the writeable info is set in exofs_sbi_write_stats() above. | 378 | * the writeable info is set in exofs_sbi_write_stats() above. |
333 | */ | 379 | */ |
334 | ret = exofs_get_io_state(&sbi->layout, &ios); | 380 | |
381 | exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID); | ||
382 | |||
383 | ret = ore_get_io_state(&sbi->layout, &comps, &ios); | ||
335 | if (unlikely(ret)) | 384 | if (unlikely(ret)) |
336 | goto out; | 385 | goto out; |
337 | 386 | ||
@@ -345,14 +394,12 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
345 | fscb->s_newfs = 0; | 394 | fscb->s_newfs = 0; |
346 | fscb->s_version = EXOFS_FSCB_VER; | 395 | fscb->s_version = EXOFS_FSCB_VER; |
347 | 396 | ||
348 | ios->obj.id = EXOFS_SUPER_ID; | ||
349 | ios->offset = 0; | 397 | ios->offset = 0; |
350 | ios->kern_buff = fscb; | 398 | ios->kern_buff = fscb; |
351 | ios->cred = sbi->s_cred; | ||
352 | 399 | ||
353 | ret = exofs_sbi_write(ios); | 400 | ret = ore_write(ios); |
354 | if (unlikely(ret)) | 401 | if (unlikely(ret)) |
355 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); | 402 | EXOFS_ERR("%s: ore_write failed.\n", __func__); |
356 | else | 403 | else |
357 | sb->s_dirt = 0; | 404 | sb->s_dirt = 0; |
358 | 405 | ||
@@ -360,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
360 | unlock_super(sb); | 407 | unlock_super(sb); |
361 | out: | 408 | out: |
362 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); | 409 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); |
363 | exofs_put_io_state(ios); | 410 | ore_put_io_state(ios); |
364 | kfree(fscb); | 411 | kfree(fscb); |
365 | return ret; | 412 | return ret; |
366 | } | 413 | } |
@@ -384,15 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path, | |||
384 | 431 | ||
385 | void exofs_free_sbi(struct exofs_sb_info *sbi) | 432 | void exofs_free_sbi(struct exofs_sb_info *sbi) |
386 | { | 433 | { |
387 | while (sbi->layout.s_numdevs) { | 434 | while (sbi->comps.numdevs) { |
388 | int i = --sbi->layout.s_numdevs; | 435 | int i = --sbi->comps.numdevs; |
389 | struct osd_dev *od = sbi->layout.s_ods[i]; | 436 | struct osd_dev *od = sbi->comps.ods[i]; |
390 | 437 | ||
391 | if (od) { | 438 | if (od) { |
392 | sbi->layout.s_ods[i] = NULL; | 439 | sbi->comps.ods[i] = NULL; |
393 | osduld_put_device(od); | 440 | osduld_put_device(od); |
394 | } | 441 | } |
395 | } | 442 | } |
443 | if (sbi->comps.ods != sbi->_min_one_dev) | ||
444 | kfree(sbi->comps.ods); | ||
396 | kfree(sbi); | 445 | kfree(sbi); |
397 | } | 446 | } |
398 | 447 | ||
@@ -419,8 +468,8 @@ static void exofs_put_super(struct super_block *sb) | |||
419 | msecs_to_jiffies(100)); | 468 | msecs_to_jiffies(100)); |
420 | } | 469 | } |
421 | 470 | ||
422 | _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], | 471 | _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0], |
423 | sbi->layout.s_pid); | 472 | sbi->one_comp.obj.partition); |
424 | 473 | ||
425 | bdi_destroy(&sbi->bdi); | 474 | bdi_destroy(&sbi->bdi); |
426 | exofs_free_sbi(sbi); | 475 | exofs_free_sbi(sbi); |
@@ -501,10 +550,19 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | |||
501 | return -EINVAL; | 550 | return -EINVAL; |
502 | } | 551 | } |
503 | 552 | ||
553 | EXOFS_DBGMSG("exofs: layout: " | ||
554 | "num_comps=%u stripe_unit=0x%x group_width=%u " | ||
555 | "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n", | ||
556 | numdevs, | ||
557 | sbi->layout.stripe_unit, | ||
558 | sbi->layout.group_width, | ||
559 | _LLU(sbi->layout.group_depth), | ||
560 | sbi->layout.mirrors_p1, | ||
561 | sbi->data_map.odm_raid_algorithm); | ||
504 | return 0; | 562 | return 0; |
505 | } | 563 | } |
506 | 564 | ||
507 | static unsigned __ra_pages(struct exofs_layout *layout) | 565 | static unsigned __ra_pages(struct ore_layout *layout) |
508 | { | 566 | { |
509 | const unsigned _MIN_RA = 32; /* min 128K read-ahead */ | 567 | const unsigned _MIN_RA = 32; /* min 128K read-ahead */ |
510 | unsigned ra_pages = layout->group_width * layout->stripe_unit / | 568 | unsigned ra_pages = layout->group_width * layout->stripe_unit / |
@@ -547,13 +605,11 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, | |||
547 | return !(odi->systemid_len || odi->osdname_len); | 605 | return !(odi->systemid_len || odi->osdname_len); |
548 | } | 606 | } |
549 | 607 | ||
550 | static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | 608 | static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, |
609 | struct osd_dev *fscb_od, | ||
551 | unsigned table_count) | 610 | unsigned table_count) |
552 | { | 611 | { |
553 | struct exofs_sb_info *sbi = *psbi; | 612 | struct ore_comp comp; |
554 | struct osd_dev *fscb_od; | ||
555 | struct osd_obj_id obj = {.partition = sbi->layout.s_pid, | ||
556 | .id = EXOFS_DEVTABLE_ID}; | ||
557 | struct exofs_device_table *dt; | 613 | struct exofs_device_table *dt; |
558 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + | 614 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + |
559 | sizeof(*dt); | 615 | sizeof(*dt); |
@@ -567,10 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | |||
567 | return -ENOMEM; | 623 | return -ENOMEM; |
568 | } | 624 | } |
569 | 625 | ||
570 | fscb_od = sbi->layout.s_ods[0]; | 626 | sbi->comps.numdevs = 0; |
571 | sbi->layout.s_ods[0] = NULL; | 627 | |
572 | sbi->layout.s_numdevs = 0; | 628 | comp.obj.partition = sbi->one_comp.obj.partition; |
573 | ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); | 629 | comp.obj.id = EXOFS_DEVTABLE_ID; |
630 | exofs_make_credential(comp.cred, &comp.obj); | ||
631 | |||
632 | ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt, | ||
633 | table_bytes); | ||
574 | if (unlikely(ret)) { | 634 | if (unlikely(ret)) { |
575 | EXOFS_ERR("ERROR: reading device table\n"); | 635 | EXOFS_ERR("ERROR: reading device table\n"); |
576 | goto out; | 636 | goto out; |
@@ -588,16 +648,18 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | |||
588 | goto out; | 648 | goto out; |
589 | 649 | ||
590 | if (likely(numdevs > 1)) { | 650 | if (likely(numdevs > 1)) { |
591 | unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); | 651 | unsigned size = numdevs * sizeof(sbi->comps.ods[0]); |
592 | 652 | ||
593 | sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); | 653 | /* Twice bigger table: See exofs_init_comps() and below |
594 | if (unlikely(!sbi)) { | 654 | * comment |
655 | */ | ||
656 | sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL); | ||
657 | if (unlikely(!sbi->comps.ods)) { | ||
658 | EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", | ||
659 | numdevs); | ||
595 | ret = -ENOMEM; | 660 | ret = -ENOMEM; |
596 | goto out; | 661 | goto out; |
597 | } | 662 | } |
598 | memset(&sbi->layout.s_ods[1], 0, | ||
599 | size - sizeof(sbi->layout.s_ods[0])); | ||
600 | *psbi = sbi; | ||
601 | } | 663 | } |
602 | 664 | ||
603 | for (i = 0; i < numdevs; i++) { | 665 | for (i = 0; i < numdevs; i++) { |
@@ -619,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | |||
619 | * line. We always keep them in device-table order. | 681 | * line. We always keep them in device-table order. |
620 | */ | 682 | */ |
621 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { | 683 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { |
622 | sbi->layout.s_ods[i] = fscb_od; | 684 | sbi->comps.ods[i] = fscb_od; |
623 | ++sbi->layout.s_numdevs; | 685 | ++sbi->comps.numdevs; |
624 | fscb_od = NULL; | 686 | fscb_od = NULL; |
625 | continue; | 687 | continue; |
626 | } | 688 | } |
@@ -633,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | |||
633 | goto out; | 695 | goto out; |
634 | } | 696 | } |
635 | 697 | ||
636 | sbi->layout.s_ods[i] = od; | 698 | sbi->comps.ods[i] = od; |
637 | ++sbi->layout.s_numdevs; | 699 | ++sbi->comps.numdevs; |
638 | 700 | ||
639 | /* Read the fscb of the other devices to make sure the FS | 701 | /* Read the fscb of the other devices to make sure the FS |
640 | * partition is there. | 702 | * partition is there. |
641 | */ | 703 | */ |
642 | ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, | 704 | ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, |
643 | sizeof(fscb)); | 705 | sizeof(fscb)); |
644 | if (unlikely(ret)) { | 706 | if (unlikely(ret)) { |
645 | EXOFS_ERR("ERROR: Malformed participating device " | 707 | EXOFS_ERR("ERROR: Malformed participating device " |
@@ -656,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | |||
656 | 718 | ||
657 | out: | 719 | out: |
658 | kfree(dt); | 720 | kfree(dt); |
659 | if (unlikely(!ret && fscb_od)) { | 721 | if (likely(!ret)) { |
660 | EXOFS_ERR( | 722 | unsigned numdevs = sbi->comps.numdevs; |
661 | "ERROR: Bad device-table container device not present\n"); | ||
662 | osduld_put_device(fscb_od); | ||
663 | ret = -EINVAL; | ||
664 | } | ||
665 | 723 | ||
724 | if (unlikely(fscb_od)) { | ||
725 | EXOFS_ERR("ERROR: Bad device-table container device not present\n"); | ||
726 | osduld_put_device(fscb_od); | ||
727 | return -EINVAL; | ||
728 | } | ||
729 | /* exofs round-robins the device table view according to inode | ||
730 | * number. We hold a: twice bigger table hence inodes can point | ||
731 | * to any device and have a sequential view of the table | ||
732 | * starting at this device. See exofs_init_comps() | ||
733 | */ | ||
734 | for (i = 0; i < numdevs - 1; ++i) | ||
735 | sbi->comps.ods[i + numdevs] = sbi->comps.ods[i]; | ||
736 | } | ||
666 | return ret; | 737 | return ret; |
667 | } | 738 | } |
668 | 739 | ||
@@ -676,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
676 | struct exofs_sb_info *sbi; /*extended info */ | 747 | struct exofs_sb_info *sbi; /*extended info */ |
677 | struct osd_dev *od; /* Master device */ | 748 | struct osd_dev *od; /* Master device */ |
678 | struct exofs_fscb fscb; /*on-disk superblock info */ | 749 | struct exofs_fscb fscb; /*on-disk superblock info */ |
679 | struct osd_obj_id obj; | 750 | struct ore_comp comp; |
680 | unsigned table_count; | 751 | unsigned table_count; |
681 | int ret; | 752 | int ret; |
682 | 753 | ||
@@ -684,10 +755,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
684 | if (!sbi) | 755 | if (!sbi) |
685 | return -ENOMEM; | 756 | return -ENOMEM; |
686 | 757 | ||
687 | ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); | ||
688 | if (ret) | ||
689 | goto free_bdi; | ||
690 | |||
691 | /* use mount options to fill superblock */ | 758 | /* use mount options to fill superblock */ |
692 | if (opts->is_osdname) { | 759 | if (opts->is_osdname) { |
693 | struct osd_dev_info odi = {.systemid_len = 0}; | 760 | struct osd_dev_info odi = {.systemid_len = 0}; |
@@ -695,6 +762,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
695 | odi.osdname_len = strlen(opts->dev_name); | 762 | odi.osdname_len = strlen(opts->dev_name); |
696 | odi.osdname = (u8 *)opts->dev_name; | 763 | odi.osdname = (u8 *)opts->dev_name; |
697 | od = osduld_info_lookup(&odi); | 764 | od = osduld_info_lookup(&odi); |
765 | kfree(opts->dev_name); | ||
766 | opts->dev_name = NULL; | ||
698 | } else { | 767 | } else { |
699 | od = osduld_path_lookup(opts->dev_name); | 768 | od = osduld_path_lookup(opts->dev_name); |
700 | } | 769 | } |
@@ -709,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
709 | sbi->layout.group_width = 1; | 778 | sbi->layout.group_width = 1; |
710 | sbi->layout.group_depth = -1; | 779 | sbi->layout.group_depth = -1; |
711 | sbi->layout.group_count = 1; | 780 | sbi->layout.group_count = 1; |
712 | sbi->layout.s_ods[0] = od; | ||
713 | sbi->layout.s_numdevs = 1; | ||
714 | sbi->layout.s_pid = opts->pid; | ||
715 | sbi->s_timeout = opts->timeout; | 781 | sbi->s_timeout = opts->timeout; |
716 | 782 | ||
783 | sbi->one_comp.obj.partition = opts->pid; | ||
784 | sbi->one_comp.obj.id = 0; | ||
785 | exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); | ||
786 | sbi->comps.numdevs = 1; | ||
787 | sbi->comps.single_comp = EC_SINGLE_COMP; | ||
788 | sbi->comps.comps = &sbi->one_comp; | ||
789 | sbi->comps.ods = sbi->_min_one_dev; | ||
790 | |||
717 | /* fill in some other data by hand */ | 791 | /* fill in some other data by hand */ |
718 | memset(sb->s_id, 0, sizeof(sb->s_id)); | 792 | memset(sb->s_id, 0, sizeof(sb->s_id)); |
719 | strcpy(sb->s_id, "exofs"); | 793 | strcpy(sb->s_id, "exofs"); |
@@ -724,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
724 | sb->s_bdev = NULL; | 798 | sb->s_bdev = NULL; |
725 | sb->s_dev = 0; | 799 | sb->s_dev = 0; |
726 | 800 | ||
727 | obj.partition = sbi->layout.s_pid; | 801 | comp.obj.partition = sbi->one_comp.obj.partition; |
728 | obj.id = EXOFS_SUPER_ID; | 802 | comp.obj.id = EXOFS_SUPER_ID; |
729 | exofs_make_credential(sbi->s_cred, &obj); | 803 | exofs_make_credential(comp.cred, &comp.obj); |
730 | 804 | ||
731 | ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); | 805 | ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb)); |
732 | if (unlikely(ret)) | 806 | if (unlikely(ret)) |
733 | goto free_sbi; | 807 | goto free_sbi; |
734 | 808 | ||
@@ -757,9 +831,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
757 | 831 | ||
758 | table_count = le64_to_cpu(fscb.s_dev_table_count); | 832 | table_count = le64_to_cpu(fscb.s_dev_table_count); |
759 | if (table_count) { | 833 | if (table_count) { |
760 | ret = exofs_read_lookup_dev_table(&sbi, table_count); | 834 | ret = exofs_read_lookup_dev_table(sbi, od, table_count); |
761 | if (unlikely(ret)) | 835 | if (unlikely(ret)) |
762 | goto free_sbi; | 836 | goto free_sbi; |
837 | } else { | ||
838 | sbi->comps.ods[0] = od; | ||
763 | } | 839 | } |
764 | 840 | ||
765 | __sbi_read_stats(sbi); | 841 | __sbi_read_stats(sbi); |
@@ -793,20 +869,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
793 | goto free_sbi; | 869 | goto free_sbi; |
794 | } | 870 | } |
795 | 871 | ||
796 | _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], | 872 | ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); |
797 | sbi->layout.s_pid); | 873 | if (ret) { |
798 | if (opts->is_osdname) | 874 | EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); |
799 | kfree(opts->dev_name); | 875 | goto free_sbi; |
876 | } | ||
877 | |||
878 | _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0], | ||
879 | sbi->one_comp.obj.partition); | ||
800 | return 0; | 880 | return 0; |
801 | 881 | ||
802 | free_sbi: | 882 | free_sbi: |
803 | bdi_destroy(&sbi->bdi); | ||
804 | free_bdi: | ||
805 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", | 883 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", |
806 | opts->dev_name, sbi->layout.s_pid, ret); | 884 | opts->dev_name, sbi->one_comp.obj.partition, ret); |
807 | exofs_free_sbi(sbi); | 885 | exofs_free_sbi(sbi); |
808 | if (opts->is_osdname) | ||
809 | kfree(opts->dev_name); | ||
810 | return ret; | 886 | return ret; |
811 | } | 887 | } |
812 | 888 | ||
@@ -837,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
837 | { | 913 | { |
838 | struct super_block *sb = dentry->d_sb; | 914 | struct super_block *sb = dentry->d_sb; |
839 | struct exofs_sb_info *sbi = sb->s_fs_info; | 915 | struct exofs_sb_info *sbi = sb->s_fs_info; |
840 | struct exofs_io_state *ios; | 916 | struct ore_io_state *ios; |
841 | struct osd_attr attrs[] = { | 917 | struct osd_attr attrs[] = { |
842 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, | 918 | ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, |
843 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), | 919 | OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), |
@@ -846,21 +922,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
846 | }; | 922 | }; |
847 | uint64_t capacity = ULLONG_MAX; | 923 | uint64_t capacity = ULLONG_MAX; |
848 | uint64_t used = ULLONG_MAX; | 924 | uint64_t used = ULLONG_MAX; |
849 | uint8_t cred_a[OSD_CAP_LEN]; | ||
850 | int ret; | 925 | int ret; |
851 | 926 | ||
852 | ret = exofs_get_io_state(&sbi->layout, &ios); | 927 | ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); |
853 | if (ret) { | 928 | if (ret) { |
854 | EXOFS_DBGMSG("exofs_get_io_state failed.\n"); | 929 | EXOFS_DBGMSG("ore_get_io_state failed.\n"); |
855 | return ret; | 930 | return ret; |
856 | } | 931 | } |
857 | 932 | ||
858 | exofs_make_credential(cred_a, &ios->obj); | ||
859 | ios->cred = sbi->s_cred; | ||
860 | ios->in_attr = attrs; | 933 | ios->in_attr = attrs; |
861 | ios->in_attr_len = ARRAY_SIZE(attrs); | 934 | ios->in_attr_len = ARRAY_SIZE(attrs); |
862 | 935 | ||
863 | ret = exofs_sbi_read(ios); | 936 | ret = ore_read(ios); |
864 | if (unlikely(ret)) | 937 | if (unlikely(ret)) |
865 | goto out; | 938 | goto out; |
866 | 939 | ||
@@ -889,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
889 | buf->f_namelen = EXOFS_NAME_LEN; | 962 | buf->f_namelen = EXOFS_NAME_LEN; |
890 | 963 | ||
891 | out: | 964 | out: |
892 | exofs_put_io_state(ios); | 965 | ore_put_io_state(ios); |
893 | return ret; | 966 | return ret; |
894 | } | 967 | } |
895 | 968 | ||
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 52c053763942..35d6a3cfd9ff 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -194,12 +194,10 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
194 | case ACL_TYPE_ACCESS: | 194 | case ACL_TYPE_ACCESS: |
195 | name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; | 195 | name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; |
196 | if (acl) { | 196 | if (acl) { |
197 | mode_t mode = inode->i_mode; | 197 | error = posix_acl_equiv_mode(acl, &inode->i_mode); |
198 | error = posix_acl_equiv_mode(acl, &mode); | ||
199 | if (error < 0) | 198 | if (error < 0) |
200 | return error; | 199 | return error; |
201 | else { | 200 | else { |
202 | inode->i_mode = mode; | ||
203 | inode->i_ctime = CURRENT_TIME_SEC; | 201 | inode->i_ctime = CURRENT_TIME_SEC; |
204 | mark_inode_dirty(inode); | 202 | mark_inode_dirty(inode); |
205 | if (error == 0) | 203 | if (error == 0) |
@@ -253,16 +251,14 @@ ext2_init_acl(struct inode *inode, struct inode *dir) | |||
253 | inode->i_mode &= ~current_umask(); | 251 | inode->i_mode &= ~current_umask(); |
254 | } | 252 | } |
255 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { | 253 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { |
256 | mode_t mode = inode->i_mode; | ||
257 | if (S_ISDIR(inode->i_mode)) { | 254 | if (S_ISDIR(inode->i_mode)) { |
258 | error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); | 255 | error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); |
259 | if (error) | 256 | if (error) |
260 | goto cleanup; | 257 | goto cleanup; |
261 | } | 258 | } |
262 | error = posix_acl_create(&acl, GFP_KERNEL, &mode); | 259 | error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); |
263 | if (error < 0) | 260 | if (error < 0) |
264 | return error; | 261 | return error; |
265 | inode->i_mode = mode; | ||
266 | if (error > 0) { | 262 | if (error > 0) { |
267 | /* This is an extended ACL */ | 263 | /* This is an extended ACL */ |
268 | error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); | 264 | error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); |
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 5c0a6a4fb052..503bfb0ed79b 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h | |||
@@ -61,7 +61,6 @@ extern int ext2_init_acl (struct inode *, struct inode *); | |||
61 | #else | 61 | #else |
62 | #include <linux/sched.h> | 62 | #include <linux/sched.h> |
63 | #define ext2_get_acl NULL | 63 | #define ext2_get_acl NULL |
64 | #define ext2_get_acl NULL | ||
65 | #define ext2_set_acl NULL | 64 | #define ext2_set_acl NULL |
66 | 65 | ||
67 | static inline int | 66 | static inline int |
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 529970617a21..d27b71f1d183 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, | |||
161 | 161 | ||
162 | if (name == NULL) | 162 | if (name == NULL) |
163 | return -EINVAL; | 163 | return -EINVAL; |
164 | name_len = strlen(name); | ||
165 | if (name_len > 255) | ||
166 | return -ERANGE; | ||
167 | |||
164 | down_read(&EXT2_I(inode)->xattr_sem); | 168 | down_read(&EXT2_I(inode)->xattr_sem); |
165 | error = -ENODATA; | 169 | error = -ENODATA; |
166 | if (!EXT2_I(inode)->i_file_acl) | 170 | if (!EXT2_I(inode)->i_file_acl) |
@@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", | |||
181 | error = -EIO; | 185 | error = -EIO; |
182 | goto cleanup; | 186 | goto cleanup; |
183 | } | 187 | } |
184 | /* find named attribute */ | ||
185 | name_len = strlen(name); | ||
186 | 188 | ||
187 | error = -ERANGE; | 189 | /* find named attribute */ |
188 | if (name_len > 255) | ||
189 | goto cleanup; | ||
190 | entry = FIRST_ENTRY(bh); | 190 | entry = FIRST_ENTRY(bh); |
191 | while (!IS_LAST_ENTRY(entry)) { | 191 | while (!IS_LAST_ENTRY(entry)) { |
192 | struct ext2_xattr_entry *next = | 192 | struct ext2_xattr_entry *next = |
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 6c29bf0df04a..3091f62e55b6 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, | |||
199 | case ACL_TYPE_ACCESS: | 199 | case ACL_TYPE_ACCESS: |
200 | name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; | 200 | name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; |
201 | if (acl) { | 201 | if (acl) { |
202 | mode_t mode = inode->i_mode; | 202 | error = posix_acl_equiv_mode(acl, &inode->i_mode); |
203 | error = posix_acl_equiv_mode(acl, &mode); | ||
204 | if (error < 0) | 203 | if (error < 0) |
205 | return error; | 204 | return error; |
206 | else { | 205 | else { |
207 | inode->i_mode = mode; | ||
208 | inode->i_ctime = CURRENT_TIME_SEC; | 206 | inode->i_ctime = CURRENT_TIME_SEC; |
209 | ext3_mark_inode_dirty(handle, inode); | 207 | ext3_mark_inode_dirty(handle, inode); |
210 | if (error == 0) | 208 | if (error == 0) |
@@ -261,19 +259,16 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
261 | inode->i_mode &= ~current_umask(); | 259 | inode->i_mode &= ~current_umask(); |
262 | } | 260 | } |
263 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { | 261 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { |
264 | mode_t mode = inode->i_mode; | ||
265 | |||
266 | if (S_ISDIR(inode->i_mode)) { | 262 | if (S_ISDIR(inode->i_mode)) { |
267 | error = ext3_set_acl(handle, inode, | 263 | error = ext3_set_acl(handle, inode, |
268 | ACL_TYPE_DEFAULT, acl); | 264 | ACL_TYPE_DEFAULT, acl); |
269 | if (error) | 265 | if (error) |
270 | goto cleanup; | 266 | goto cleanup; |
271 | } | 267 | } |
272 | error = posix_acl_create(&acl, GFP_NOFS, &mode); | 268 | error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); |
273 | if (error < 0) | 269 | if (error < 0) |
274 | return error; | 270 | return error; |
275 | 271 | ||
276 | inode->i_mode = mode; | ||
277 | if (error > 0) { | 272 | if (error > 0) { |
278 | /* This is an extended ACL */ | 273 | /* This is an extended ACL */ |
279 | error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); | 274 | error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index fe52297e31ad..6386d76f44a7 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/quotaops.h> | 21 | #include <linux/quotaops.h> |
22 | #include <linux/buffer_head.h> | 22 | #include <linux/buffer_head.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <trace/events/ext3.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * balloc.c contains the blocks allocation and deallocation routines | 27 | * balloc.c contains the blocks allocation and deallocation routines |
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) | |||
161 | desc = ext3_get_group_desc(sb, block_group, NULL); | 162 | desc = ext3_get_group_desc(sb, block_group, NULL); |
162 | if (!desc) | 163 | if (!desc) |
163 | return NULL; | 164 | return NULL; |
165 | trace_ext3_read_block_bitmap(sb, block_group); | ||
164 | bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); | 166 | bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); |
165 | bh = sb_getblk(sb, bitmap_blk); | 167 | bh = sb_getblk(sb, bitmap_blk); |
166 | if (unlikely(!bh)) { | 168 | if (unlikely(!bh)) { |
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb, | |||
351 | struct rb_node * parent = NULL; | 353 | struct rb_node * parent = NULL; |
352 | struct ext3_reserve_window_node *this; | 354 | struct ext3_reserve_window_node *this; |
353 | 355 | ||
356 | trace_ext3_rsv_window_add(sb, rsv); | ||
354 | while (*p) | 357 | while (*p) |
355 | { | 358 | { |
356 | parent = *p; | 359 | parent = *p; |
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode) | |||
476 | rsv = &block_i->rsv_window_node; | 479 | rsv = &block_i->rsv_window_node; |
477 | if (!rsv_is_empty(&rsv->rsv_window)) { | 480 | if (!rsv_is_empty(&rsv->rsv_window)) { |
478 | spin_lock(rsv_lock); | 481 | spin_lock(rsv_lock); |
479 | if (!rsv_is_empty(&rsv->rsv_window)) | 482 | if (!rsv_is_empty(&rsv->rsv_window)) { |
483 | trace_ext3_discard_reservation(inode, rsv); | ||
480 | rsv_window_remove(inode->i_sb, rsv); | 484 | rsv_window_remove(inode->i_sb, rsv); |
485 | } | ||
481 | spin_unlock(rsv_lock); | 486 | spin_unlock(rsv_lock); |
482 | } | 487 | } |
483 | } | 488 | } |
@@ -683,14 +688,10 @@ error_return: | |||
683 | void ext3_free_blocks(handle_t *handle, struct inode *inode, | 688 | void ext3_free_blocks(handle_t *handle, struct inode *inode, |
684 | ext3_fsblk_t block, unsigned long count) | 689 | ext3_fsblk_t block, unsigned long count) |
685 | { | 690 | { |
686 | struct super_block * sb; | 691 | struct super_block *sb = inode->i_sb; |
687 | unsigned long dquot_freed_blocks; | 692 | unsigned long dquot_freed_blocks; |
688 | 693 | ||
689 | sb = inode->i_sb; | 694 | trace_ext3_free_blocks(inode, block, count); |
690 | if (!sb) { | ||
691 | printk ("ext3_free_blocks: nonexistent device"); | ||
692 | return; | ||
693 | } | ||
694 | ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); | 695 | ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); |
695 | if (dquot_freed_blocks) | 696 | if (dquot_freed_blocks) |
696 | dquot_free_block(inode, dquot_freed_blocks); | 697 | dquot_free_block(inode, dquot_freed_blocks); |
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
1136 | else | 1137 | else |
1137 | start_block = grp_goal + group_first_block; | 1138 | start_block = grp_goal + group_first_block; |
1138 | 1139 | ||
1140 | trace_ext3_alloc_new_reservation(sb, start_block); | ||
1139 | size = my_rsv->rsv_goal_size; | 1141 | size = my_rsv->rsv_goal_size; |
1140 | 1142 | ||
1141 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | 1143 | if (!rsv_is_empty(&my_rsv->rsv_window)) { |
@@ -1230,8 +1232,11 @@ retry: | |||
1230 | * check if the first free block is within the | 1232 | * check if the first free block is within the |
1231 | * free space we just reserved | 1233 | * free space we just reserved |
1232 | */ | 1234 | */ |
1233 | if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) | 1235 | if (start_block >= my_rsv->rsv_start && |
1236 | start_block <= my_rsv->rsv_end) { | ||
1237 | trace_ext3_reserved(sb, start_block, my_rsv); | ||
1234 | return 0; /* success */ | 1238 | return 0; /* success */ |
1239 | } | ||
1235 | /* | 1240 | /* |
1236 | * if the first free bit we found is out of the reservable space | 1241 | * if the first free bit we found is out of the reservable space |
1237 | * continue search for next reservable space, | 1242 | * continue search for next reservable space, |
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, | |||
1514 | 1519 | ||
1515 | *errp = -ENOSPC; | 1520 | *errp = -ENOSPC; |
1516 | sb = inode->i_sb; | 1521 | sb = inode->i_sb; |
1517 | if (!sb) { | ||
1518 | printk("ext3_new_block: nonexistent device"); | ||
1519 | return 0; | ||
1520 | } | ||
1521 | 1522 | ||
1522 | /* | 1523 | /* |
1523 | * Check quota for allocation of this block. | 1524 | * Check quota for allocation of this block. |
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, | |||
1528 | return 0; | 1529 | return 0; |
1529 | } | 1530 | } |
1530 | 1531 | ||
1532 | trace_ext3_request_blocks(inode, goal, num); | ||
1533 | |||
1531 | sbi = EXT3_SB(sb); | 1534 | sbi = EXT3_SB(sb); |
1532 | es = EXT3_SB(sb)->s_es; | 1535 | es = sbi->s_es; |
1533 | ext3_debug("goal=%lu.\n", goal); | 1536 | ext3_debug("goal=%lu.\n", goal); |
1534 | /* | 1537 | /* |
1535 | * Allocate a block from reservation only when | 1538 | * Allocate a block from reservation only when |
@@ -1742,6 +1745,10 @@ allocated: | |||
1742 | brelse(bitmap_bh); | 1745 | brelse(bitmap_bh); |
1743 | dquot_free_block(inode, *count-num); | 1746 | dquot_free_block(inode, *count-num); |
1744 | *count = num; | 1747 | *count = num; |
1748 | |||
1749 | trace_ext3_allocate_blocks(inode, goal, num, | ||
1750 | (unsigned long long)ret_block); | ||
1751 | |||
1745 | return ret_block; | 1752 | return ret_block; |
1746 | 1753 | ||
1747 | io_error: | 1754 | io_error: |
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group, | |||
1996 | if ((next - start) < minblocks) | 2003 | if ((next - start) < minblocks) |
1997 | goto free_extent; | 2004 | goto free_extent; |
1998 | 2005 | ||
2006 | trace_ext3_discard_blocks(sb, discard_block, next - start); | ||
1999 | /* Send the TRIM command down to the device */ | 2007 | /* Send the TRIM command down to the device */ |
2000 | err = sb_issue_discard(sb, discard_block, next - start, | 2008 | err = sb_issue_discard(sb, discard_block, next - start, |
2001 | GFP_NOFS, 0); | 2009 | GFP_NOFS, 0); |
@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
2100 | if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) | 2108 | if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) |
2101 | return -EINVAL; | 2109 | return -EINVAL; |
2102 | if (start >= max_blks) | 2110 | if (start >= max_blks) |
2103 | goto out; | 2111 | return -EINVAL; |
2104 | if (start + len > max_blks) | 2112 | if (start + len > max_blks) |
2105 | len = max_blks - start; | 2113 | len = max_blks - start; |
2106 | 2114 | ||
@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
2148 | 2156 | ||
2149 | if (ret >= 0) | 2157 | if (ret >= 0) |
2150 | ret = 0; | 2158 | ret = 0; |
2151 | |||
2152 | out: | ||
2153 | range->len = trimmed * sb->s_blocksize; | 2159 | range->len = trimmed * sb->s_blocksize; |
2154 | 2160 | ||
2155 | return ret; | 2161 | return ret; |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 2be5b99097f1..724df69847dc 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = { | |||
71 | }; | 71 | }; |
72 | 72 | ||
73 | const struct inode_operations ext3_file_inode_operations = { | 73 | const struct inode_operations ext3_file_inode_operations = { |
74 | .truncate = ext3_truncate, | ||
75 | .setattr = ext3_setattr, | 74 | .setattr = ext3_setattr, |
76 | #ifdef CONFIG_EXT3_FS_XATTR | 75 | #ifdef CONFIG_EXT3_FS_XATTR |
77 | .setxattr = generic_setxattr, | 76 | .setxattr = generic_setxattr, |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 0bcf63adb80a..d494c554c6e6 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/jbd.h> | 30 | #include <linux/jbd.h> |
31 | #include <linux/ext3_fs.h> | 31 | #include <linux/ext3_fs.h> |
32 | #include <linux/ext3_jbd.h> | 32 | #include <linux/ext3_jbd.h> |
33 | #include <trace/events/ext3.h> | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * akpm: A new design for ext3_sync_file(). | 36 | * akpm: A new design for ext3_sync_file(). |
@@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
51 | int ret, needs_barrier = 0; | 52 | int ret, needs_barrier = 0; |
52 | tid_t commit_tid; | 53 | tid_t commit_tid; |
53 | 54 | ||
55 | trace_ext3_sync_file_enter(file, datasync); | ||
56 | |||
54 | if (inode->i_sb->s_flags & MS_RDONLY) | 57 | if (inode->i_sb->s_flags & MS_RDONLY) |
55 | return 0; | 58 | return 0; |
56 | 59 | ||
57 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 60 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
58 | if (ret) | 61 | if (ret) |
59 | return ret; | 62 | goto out; |
60 | 63 | ||
61 | /* | 64 | /* |
62 | * Taking the mutex here just to keep consistent with how fsync was | 65 | * Taking the mutex here just to keep consistent with how fsync was |
@@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
83 | */ | 86 | */ |
84 | if (ext3_should_journal_data(inode)) { | 87 | if (ext3_should_journal_data(inode)) { |
85 | mutex_unlock(&inode->i_mutex); | 88 | mutex_unlock(&inode->i_mutex); |
86 | return ext3_force_commit(inode->i_sb); | 89 | ret = ext3_force_commit(inode->i_sb); |
90 | goto out; | ||
87 | } | 91 | } |
88 | 92 | ||
89 | if (datasync) | 93 | if (datasync) |
@@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
104 | */ | 108 | */ |
105 | if (needs_barrier) | 109 | if (needs_barrier) |
106 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 110 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
111 | |||
107 | mutex_unlock(&inode->i_mutex); | 112 | mutex_unlock(&inode->i_mutex); |
113 | out: | ||
114 | trace_ext3_sync_file_exit(inode, ret); | ||
108 | return ret; | 115 | return ret; |
109 | } | 116 | } |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index bfc2dc43681d..bf09cbf938cc 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/buffer_head.h> | 23 | #include <linux/buffer_head.h> |
24 | #include <linux/random.h> | 24 | #include <linux/random.h> |
25 | #include <linux/bitops.h> | 25 | #include <linux/bitops.h> |
26 | #include <trace/events/ext3.h> | ||
26 | 27 | ||
27 | #include <asm/byteorder.h> | 28 | #include <asm/byteorder.h> |
28 | 29 | ||
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) | |||
118 | 119 | ||
119 | ino = inode->i_ino; | 120 | ino = inode->i_ino; |
120 | ext3_debug ("freeing inode %lu\n", ino); | 121 | ext3_debug ("freeing inode %lu\n", ino); |
122 | trace_ext3_free_inode(inode); | ||
121 | 123 | ||
122 | is_directory = S_ISDIR(inode->i_mode); | 124 | is_directory = S_ISDIR(inode->i_mode); |
123 | 125 | ||
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, | |||
426 | return ERR_PTR(-EPERM); | 428 | return ERR_PTR(-EPERM); |
427 | 429 | ||
428 | sb = dir->i_sb; | 430 | sb = dir->i_sb; |
431 | trace_ext3_request_inode(dir, mode); | ||
429 | inode = new_inode(sb); | 432 | inode = new_inode(sb); |
430 | if (!inode) | 433 | if (!inode) |
431 | return ERR_PTR(-ENOMEM); | 434 | return ERR_PTR(-ENOMEM); |
@@ -601,6 +604,7 @@ got: | |||
601 | } | 604 | } |
602 | 605 | ||
603 | ext3_debug("allocating inode %lu\n", inode->i_ino); | 606 | ext3_debug("allocating inode %lu\n", inode->i_ino); |
607 | trace_ext3_allocate_inode(inode, dir, mode); | ||
604 | goto really_out; | 608 | goto really_out; |
605 | fail: | 609 | fail: |
606 | ext3_std_error(sb, err); | 610 | ext3_std_error(sb, err); |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2978a2a17a59..04da6acde85d 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -38,10 +38,12 @@ | |||
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | 39 | #include <linux/fiemap.h> |
40 | #include <linux/namei.h> | 40 | #include <linux/namei.h> |
41 | #include <trace/events/ext3.h> | ||
41 | #include "xattr.h" | 42 | #include "xattr.h" |
42 | #include "acl.h" | 43 | #include "acl.h" |
43 | 44 | ||
44 | static int ext3_writepage_trans_blocks(struct inode *inode); | 45 | static int ext3_writepage_trans_blocks(struct inode *inode); |
46 | static int ext3_block_truncate_page(struct inode *inode, loff_t from); | ||
45 | 47 | ||
46 | /* | 48 | /* |
47 | * Test whether an inode is a fast symlink. | 49 | * Test whether an inode is a fast symlink. |
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, | |||
70 | 72 | ||
71 | might_sleep(); | 73 | might_sleep(); |
72 | 74 | ||
75 | trace_ext3_forget(inode, is_metadata, blocknr); | ||
73 | BUFFER_TRACE(bh, "enter"); | 76 | BUFFER_TRACE(bh, "enter"); |
74 | 77 | ||
75 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " | 78 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " |
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode) | |||
194 | */ | 197 | */ |
195 | void ext3_evict_inode (struct inode *inode) | 198 | void ext3_evict_inode (struct inode *inode) |
196 | { | 199 | { |
200 | struct ext3_inode_info *ei = EXT3_I(inode); | ||
197 | struct ext3_block_alloc_info *rsv; | 201 | struct ext3_block_alloc_info *rsv; |
198 | handle_t *handle; | 202 | handle_t *handle; |
199 | int want_delete = 0; | 203 | int want_delete = 0; |
200 | 204 | ||
205 | trace_ext3_evict_inode(inode); | ||
201 | if (!inode->i_nlink && !is_bad_inode(inode)) { | 206 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
202 | dquot_initialize(inode); | 207 | dquot_initialize(inode); |
203 | want_delete = 1; | 208 | want_delete = 1; |
204 | } | 209 | } |
205 | 210 | ||
211 | /* | ||
212 | * When journalling data dirty buffers are tracked only in the journal. | ||
213 | * So although mm thinks everything is clean and ready for reaping the | ||
214 | * inode might still have some pages to write in the running | ||
215 | * transaction or waiting to be checkpointed. Thus calling | ||
216 | * journal_invalidatepage() (via truncate_inode_pages()) to discard | ||
217 | * these buffers can cause data loss. Also even if we did not discard | ||
218 | * these buffers, we would have no way to find them after the inode | ||
219 | * is reaped and thus user could see stale data if he tries to read | ||
220 | * them before the transaction is checkpointed. So be careful and | ||
221 | * force everything to disk here... We use ei->i_datasync_tid to | ||
222 | * store the newest transaction containing inode's data. | ||
223 | * | ||
224 | * Note that directories do not have this problem because they don't | ||
225 | * use page cache. | ||
226 | */ | ||
227 | if (inode->i_nlink && ext3_should_journal_data(inode) && | ||
228 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | ||
229 | tid_t commit_tid = atomic_read(&ei->i_datasync_tid); | ||
230 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; | ||
231 | |||
232 | log_start_commit(journal, commit_tid); | ||
233 | log_wait_commit(journal, commit_tid); | ||
234 | filemap_write_and_wait(&inode->i_data); | ||
235 | } | ||
206 | truncate_inode_pages(&inode->i_data, 0); | 236 | truncate_inode_pages(&inode->i_data, 0); |
207 | 237 | ||
208 | ext3_discard_reservation(inode); | 238 | ext3_discard_reservation(inode); |
209 | rsv = EXT3_I(inode)->i_block_alloc_info; | 239 | rsv = ei->i_block_alloc_info; |
210 | EXT3_I(inode)->i_block_alloc_info = NULL; | 240 | ei->i_block_alloc_info = NULL; |
211 | if (unlikely(rsv)) | 241 | if (unlikely(rsv)) |
212 | kfree(rsv); | 242 | kfree(rsv); |
213 | 243 | ||
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode) | |||
231 | if (inode->i_blocks) | 261 | if (inode->i_blocks) |
232 | ext3_truncate(inode); | 262 | ext3_truncate(inode); |
233 | /* | 263 | /* |
234 | * Kill off the orphan record which ext3_truncate created. | 264 | * Kill off the orphan record created when the inode lost the last |
235 | * AKPM: I think this can be inside the above `if'. | 265 | * link. Note that ext3_orphan_del() has to be able to cope with the |
236 | * Note that ext3_orphan_del() has to be able to cope with the | 266 | * deletion of a non-existent orphan - ext3_truncate() could |
237 | * deletion of a non-existent orphan - this is because we don't | 267 | * have removed the record. |
238 | * know if ext3_truncate() actually created an orphan record. | ||
239 | * (Well, we could do this if we need to, but heck - it works) | ||
240 | */ | 268 | */ |
241 | ext3_orphan_del(handle, inode); | 269 | ext3_orphan_del(handle, inode); |
242 | EXT3_I(inode)->i_dtime = get_seconds(); | 270 | ei->i_dtime = get_seconds(); |
243 | 271 | ||
244 | /* | 272 | /* |
245 | * One subtle ordering requirement: if anything has gone wrong | 273 | * One subtle ordering requirement: if anything has gone wrong |
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
842 | ext3_fsblk_t first_block = 0; | 870 | ext3_fsblk_t first_block = 0; |
843 | 871 | ||
844 | 872 | ||
873 | trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create); | ||
845 | J_ASSERT(handle != NULL || create == 0); | 874 | J_ASSERT(handle != NULL || create == 0); |
846 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); | 875 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); |
847 | 876 | ||
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
886 | if (!create || err == -EIO) | 915 | if (!create || err == -EIO) |
887 | goto cleanup; | 916 | goto cleanup; |
888 | 917 | ||
918 | /* | ||
919 | * Block out ext3_truncate while we alter the tree | ||
920 | */ | ||
889 | mutex_lock(&ei->truncate_mutex); | 921 | mutex_lock(&ei->truncate_mutex); |
890 | 922 | ||
891 | /* | 923 | /* |
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
934 | */ | 966 | */ |
935 | count = ext3_blks_to_allocate(partial, indirect_blks, | 967 | count = ext3_blks_to_allocate(partial, indirect_blks, |
936 | maxblocks, blocks_to_boundary); | 968 | maxblocks, blocks_to_boundary); |
937 | /* | ||
938 | * Block out ext3_truncate while we alter the tree | ||
939 | */ | ||
940 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, | 969 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, |
941 | offsets + (partial - chain), partial); | 970 | offsets + (partial - chain), partial); |
942 | 971 | ||
@@ -970,6 +999,9 @@ cleanup: | |||
970 | } | 999 | } |
971 | BUFFER_TRACE(bh_result, "returned"); | 1000 | BUFFER_TRACE(bh_result, "returned"); |
972 | out: | 1001 | out: |
1002 | trace_ext3_get_blocks_exit(inode, iblock, | ||
1003 | depth ? le32_to_cpu(chain[depth-1].key) : 0, | ||
1004 | count, err); | ||
973 | return err; | 1005 | return err; |
974 | } | 1006 | } |
975 | 1007 | ||
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode) | |||
1202 | ext3_truncate(inode); | 1234 | ext3_truncate(inode); |
1203 | } | 1235 | } |
1204 | 1236 | ||
1237 | /* | ||
1238 | * Truncate blocks that were not used by direct IO write. We have to zero out | ||
1239 | * the last file block as well because direct IO might have written to it. | ||
1240 | */ | ||
1241 | static void ext3_truncate_failed_direct_write(struct inode *inode) | ||
1242 | { | ||
1243 | ext3_block_truncate_page(inode, inode->i_size); | ||
1244 | ext3_truncate(inode); | ||
1245 | } | ||
1246 | |||
1205 | static int ext3_write_begin(struct file *file, struct address_space *mapping, | 1247 | static int ext3_write_begin(struct file *file, struct address_space *mapping, |
1206 | loff_t pos, unsigned len, unsigned flags, | 1248 | loff_t pos, unsigned len, unsigned flags, |
1207 | struct page **pagep, void **fsdata) | 1249 | struct page **pagep, void **fsdata) |
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, | |||
1217 | * we allocate blocks but write fails for some reason */ | 1259 | * we allocate blocks but write fails for some reason */ |
1218 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; | 1260 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; |
1219 | 1261 | ||
1262 | trace_ext3_write_begin(inode, pos, len, flags); | ||
1263 | |||
1220 | index = pos >> PAGE_CACHE_SHIFT; | 1264 | index = pos >> PAGE_CACHE_SHIFT; |
1221 | from = pos & (PAGE_CACHE_SIZE - 1); | 1265 | from = pos & (PAGE_CACHE_SIZE - 1); |
1222 | to = from + len; | 1266 | to = from + len; |
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file, | |||
1332 | unsigned from, to; | 1376 | unsigned from, to; |
1333 | int ret = 0, ret2; | 1377 | int ret = 0, ret2; |
1334 | 1378 | ||
1379 | trace_ext3_ordered_write_end(inode, pos, len, copied); | ||
1335 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1380 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1336 | 1381 | ||
1337 | from = pos & (PAGE_CACHE_SIZE - 1); | 1382 | from = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file, | |||
1367 | struct inode *inode = file->f_mapping->host; | 1412 | struct inode *inode = file->f_mapping->host; |
1368 | int ret; | 1413 | int ret; |
1369 | 1414 | ||
1415 | trace_ext3_writeback_write_end(inode, pos, len, copied); | ||
1370 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1416 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1371 | update_file_sizes(inode, pos, copied); | 1417 | update_file_sizes(inode, pos, copied); |
1372 | /* | 1418 | /* |
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file, | |||
1391 | { | 1437 | { |
1392 | handle_t *handle = ext3_journal_current_handle(); | 1438 | handle_t *handle = ext3_journal_current_handle(); |
1393 | struct inode *inode = mapping->host; | 1439 | struct inode *inode = mapping->host; |
1440 | struct ext3_inode_info *ei = EXT3_I(inode); | ||
1394 | int ret = 0, ret2; | 1441 | int ret = 0, ret2; |
1395 | int partial = 0; | 1442 | int partial = 0; |
1396 | unsigned from, to; | 1443 | unsigned from, to; |
1397 | 1444 | ||
1445 | trace_ext3_journalled_write_end(inode, pos, len, copied); | ||
1398 | from = pos & (PAGE_CACHE_SIZE - 1); | 1446 | from = pos & (PAGE_CACHE_SIZE - 1); |
1399 | to = from + len; | 1447 | to = from + len; |
1400 | 1448 | ||
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file, | |||
1419 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) | 1467 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1420 | ext3_orphan_add(handle, inode); | 1468 | ext3_orphan_add(handle, inode); |
1421 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1469 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1422 | if (inode->i_size > EXT3_I(inode)->i_disksize) { | 1470 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); |
1423 | EXT3_I(inode)->i_disksize = inode->i_size; | 1471 | if (inode->i_size > ei->i_disksize) { |
1472 | ei->i_disksize = inode->i_size; | ||
1424 | ret2 = ext3_mark_inode_dirty(handle, inode); | 1473 | ret2 = ext3_mark_inode_dirty(handle, inode); |
1425 | if (!ret) | 1474 | if (!ret) |
1426 | ret = ret2; | 1475 | ret = ret2; |
@@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page, | |||
1577 | if (ext3_journal_current_handle()) | 1626 | if (ext3_journal_current_handle()) |
1578 | goto out_fail; | 1627 | goto out_fail; |
1579 | 1628 | ||
1629 | trace_ext3_ordered_writepage(page); | ||
1580 | if (!page_has_buffers(page)) { | 1630 | if (!page_has_buffers(page)) { |
1581 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 1631 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
1582 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1632 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
@@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page, | |||
1647 | if (ext3_journal_current_handle()) | 1697 | if (ext3_journal_current_handle()) |
1648 | goto out_fail; | 1698 | goto out_fail; |
1649 | 1699 | ||
1700 | trace_ext3_writeback_writepage(page); | ||
1650 | if (page_has_buffers(page)) { | 1701 | if (page_has_buffers(page)) { |
1651 | if (!walk_page_buffers(NULL, page_buffers(page), 0, | 1702 | if (!walk_page_buffers(NULL, page_buffers(page), 0, |
1652 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | 1703 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { |
@@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page, | |||
1689 | if (ext3_journal_current_handle()) | 1740 | if (ext3_journal_current_handle()) |
1690 | goto no_write; | 1741 | goto no_write; |
1691 | 1742 | ||
1743 | trace_ext3_journalled_writepage(page); | ||
1692 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1744 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1693 | if (IS_ERR(handle)) { | 1745 | if (IS_ERR(handle)) { |
1694 | ret = PTR_ERR(handle); | 1746 | ret = PTR_ERR(handle); |
@@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page, | |||
1715 | if (ret == 0) | 1767 | if (ret == 0) |
1716 | ret = err; | 1768 | ret = err; |
1717 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1769 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1770 | atomic_set(&EXT3_I(inode)->i_datasync_tid, | ||
1771 | handle->h_transaction->t_tid); | ||
1718 | unlock_page(page); | 1772 | unlock_page(page); |
1719 | } else { | 1773 | } else { |
1720 | /* | 1774 | /* |
@@ -1739,6 +1793,7 @@ out_unlock: | |||
1739 | 1793 | ||
1740 | static int ext3_readpage(struct file *file, struct page *page) | 1794 | static int ext3_readpage(struct file *file, struct page *page) |
1741 | { | 1795 | { |
1796 | trace_ext3_readpage(page); | ||
1742 | return mpage_readpage(page, ext3_get_block); | 1797 | return mpage_readpage(page, ext3_get_block); |
1743 | } | 1798 | } |
1744 | 1799 | ||
@@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset) | |||
1753 | { | 1808 | { |
1754 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1809 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1755 | 1810 | ||
1811 | trace_ext3_invalidatepage(page, offset); | ||
1812 | |||
1756 | /* | 1813 | /* |
1757 | * If it's a full truncate we just forget about the pending dirtying | 1814 | * If it's a full truncate we just forget about the pending dirtying |
1758 | */ | 1815 | */ |
@@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait) | |||
1766 | { | 1823 | { |
1767 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1824 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1768 | 1825 | ||
1826 | trace_ext3_releasepage(page); | ||
1769 | WARN_ON(PageChecked(page)); | 1827 | WARN_ON(PageChecked(page)); |
1770 | if (!page_has_buffers(page)) | 1828 | if (!page_has_buffers(page)) |
1771 | return 0; | 1829 | return 0; |
@@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | |||
1794 | size_t count = iov_length(iov, nr_segs); | 1852 | size_t count = iov_length(iov, nr_segs); |
1795 | int retries = 0; | 1853 | int retries = 0; |
1796 | 1854 | ||
1855 | trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
1856 | |||
1797 | if (rw == WRITE) { | 1857 | if (rw == WRITE) { |
1798 | loff_t final_size = offset + count; | 1858 | loff_t final_size = offset + count; |
1799 | 1859 | ||
@@ -1827,7 +1887,7 @@ retry: | |||
1827 | loff_t end = offset + iov_length(iov, nr_segs); | 1887 | loff_t end = offset + iov_length(iov, nr_segs); |
1828 | 1888 | ||
1829 | if (end > isize) | 1889 | if (end > isize) |
1830 | vmtruncate(inode, isize); | 1890 | ext3_truncate_failed_direct_write(inode); |
1831 | } | 1891 | } |
1832 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1892 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1833 | goto retry; | 1893 | goto retry; |
@@ -1841,7 +1901,7 @@ retry: | |||
1841 | /* This is really bad luck. We've written the data | 1901 | /* This is really bad luck. We've written the data |
1842 | * but cannot extend i_size. Truncate allocated blocks | 1902 | * but cannot extend i_size. Truncate allocated blocks |
1843 | * and pretend the write failed... */ | 1903 | * and pretend the write failed... */ |
1844 | ext3_truncate(inode); | 1904 | ext3_truncate_failed_direct_write(inode); |
1845 | ret = PTR_ERR(handle); | 1905 | ret = PTR_ERR(handle); |
1846 | goto out; | 1906 | goto out; |
1847 | } | 1907 | } |
@@ -1867,6 +1927,8 @@ retry: | |||
1867 | ret = err; | 1927 | ret = err; |
1868 | } | 1928 | } |
1869 | out: | 1929 | out: |
1930 | trace_ext3_direct_IO_exit(inode, offset, | ||
1931 | iov_length(iov, nr_segs), rw, ret); | ||
1870 | return ret; | 1932 | return ret; |
1871 | } | 1933 | } |
1872 | 1934 | ||
@@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode) | |||
1949 | * This required during truncate. We need to physically zero the tail end | 2011 | * This required during truncate. We need to physically zero the tail end |
1950 | * of that block so it doesn't yield old data if the file is later grown. | 2012 | * of that block so it doesn't yield old data if the file is later grown. |
1951 | */ | 2013 | */ |
1952 | static int ext3_block_truncate_page(handle_t *handle, struct page *page, | 2014 | static int ext3_block_truncate_page(struct inode *inode, loff_t from) |
1953 | struct address_space *mapping, loff_t from) | ||
1954 | { | 2015 | { |
1955 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 2016 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
1956 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 2017 | unsigned offset = from & (PAGE_CACHE_SIZE - 1); |
1957 | unsigned blocksize, iblock, length, pos; | 2018 | unsigned blocksize, iblock, length, pos; |
1958 | struct inode *inode = mapping->host; | 2019 | struct page *page; |
2020 | handle_t *handle = NULL; | ||
1959 | struct buffer_head *bh; | 2021 | struct buffer_head *bh; |
1960 | int err = 0; | 2022 | int err = 0; |
1961 | 2023 | ||
2024 | /* Truncated on block boundary - nothing to do */ | ||
1962 | blocksize = inode->i_sb->s_blocksize; | 2025 | blocksize = inode->i_sb->s_blocksize; |
2026 | if ((from & (blocksize - 1)) == 0) | ||
2027 | return 0; | ||
2028 | |||
2029 | page = grab_cache_page(inode->i_mapping, index); | ||
2030 | if (!page) | ||
2031 | return -ENOMEM; | ||
1963 | length = blocksize - (offset & (blocksize - 1)); | 2032 | length = blocksize - (offset & (blocksize - 1)); |
1964 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 2033 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
1965 | 2034 | ||
@@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
2004 | goto unlock; | 2073 | goto unlock; |
2005 | } | 2074 | } |
2006 | 2075 | ||
2076 | /* data=writeback mode doesn't need transaction to zero-out data */ | ||
2077 | if (!ext3_should_writeback_data(inode)) { | ||
2078 | /* We journal at most one block */ | ||
2079 | handle = ext3_journal_start(inode, 1); | ||
2080 | if (IS_ERR(handle)) { | ||
2081 | clear_highpage(page); | ||
2082 | flush_dcache_page(page); | ||
2083 | err = PTR_ERR(handle); | ||
2084 | goto unlock; | ||
2085 | } | ||
2086 | } | ||
2087 | |||
2007 | if (ext3_should_journal_data(inode)) { | 2088 | if (ext3_should_journal_data(inode)) { |
2008 | BUFFER_TRACE(bh, "get write access"); | 2089 | BUFFER_TRACE(bh, "get write access"); |
2009 | err = ext3_journal_get_write_access(handle, bh); | 2090 | err = ext3_journal_get_write_access(handle, bh); |
2010 | if (err) | 2091 | if (err) |
2011 | goto unlock; | 2092 | goto stop; |
2012 | } | 2093 | } |
2013 | 2094 | ||
2014 | zero_user(page, offset, length); | 2095 | zero_user(page, offset, length); |
@@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
2022 | err = ext3_journal_dirty_data(handle, bh); | 2103 | err = ext3_journal_dirty_data(handle, bh); |
2023 | mark_buffer_dirty(bh); | 2104 | mark_buffer_dirty(bh); |
2024 | } | 2105 | } |
2106 | stop: | ||
2107 | if (handle) | ||
2108 | ext3_journal_stop(handle); | ||
2025 | 2109 | ||
2026 | unlock: | 2110 | unlock: |
2027 | unlock_page(page); | 2111 | unlock_page(page); |
@@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
2390 | 2474 | ||
2391 | int ext3_can_truncate(struct inode *inode) | 2475 | int ext3_can_truncate(struct inode *inode) |
2392 | { | 2476 | { |
2393 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
2394 | return 0; | ||
2395 | if (S_ISREG(inode->i_mode)) | 2477 | if (S_ISREG(inode->i_mode)) |
2396 | return 1; | 2478 | return 1; |
2397 | if (S_ISDIR(inode->i_mode)) | 2479 | if (S_ISDIR(inode->i_mode)) |
@@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode) | |||
2435 | struct ext3_inode_info *ei = EXT3_I(inode); | 2517 | struct ext3_inode_info *ei = EXT3_I(inode); |
2436 | __le32 *i_data = ei->i_data; | 2518 | __le32 *i_data = ei->i_data; |
2437 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); | 2519 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); |
2438 | struct address_space *mapping = inode->i_mapping; | ||
2439 | int offsets[4]; | 2520 | int offsets[4]; |
2440 | Indirect chain[4]; | 2521 | Indirect chain[4]; |
2441 | Indirect *partial; | 2522 | Indirect *partial; |
@@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode) | |||
2443 | int n; | 2524 | int n; |
2444 | long last_block; | 2525 | long last_block; |
2445 | unsigned blocksize = inode->i_sb->s_blocksize; | 2526 | unsigned blocksize = inode->i_sb->s_blocksize; |
2446 | struct page *page; | 2527 | |
2528 | trace_ext3_truncate_enter(inode); | ||
2447 | 2529 | ||
2448 | if (!ext3_can_truncate(inode)) | 2530 | if (!ext3_can_truncate(inode)) |
2449 | goto out_notrans; | 2531 | goto out_notrans; |
@@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode) | |||
2451 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) | 2533 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) |
2452 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); | 2534 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); |
2453 | 2535 | ||
2454 | /* | ||
2455 | * We have to lock the EOF page here, because lock_page() nests | ||
2456 | * outside journal_start(). | ||
2457 | */ | ||
2458 | if ((inode->i_size & (blocksize - 1)) == 0) { | ||
2459 | /* Block boundary? Nothing to do */ | ||
2460 | page = NULL; | ||
2461 | } else { | ||
2462 | page = grab_cache_page(mapping, | ||
2463 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
2464 | if (!page) | ||
2465 | goto out_notrans; | ||
2466 | } | ||
2467 | |||
2468 | handle = start_transaction(inode); | 2536 | handle = start_transaction(inode); |
2469 | if (IS_ERR(handle)) { | 2537 | if (IS_ERR(handle)) |
2470 | if (page) { | ||
2471 | clear_highpage(page); | ||
2472 | flush_dcache_page(page); | ||
2473 | unlock_page(page); | ||
2474 | page_cache_release(page); | ||
2475 | } | ||
2476 | goto out_notrans; | 2538 | goto out_notrans; |
2477 | } | ||
2478 | 2539 | ||
2479 | last_block = (inode->i_size + blocksize-1) | 2540 | last_block = (inode->i_size + blocksize-1) |
2480 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); | 2541 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); |
2481 | |||
2482 | if (page) | ||
2483 | ext3_block_truncate_page(handle, page, mapping, inode->i_size); | ||
2484 | |||
2485 | n = ext3_block_to_path(inode, last_block, offsets, NULL); | 2542 | n = ext3_block_to_path(inode, last_block, offsets, NULL); |
2486 | if (n == 0) | 2543 | if (n == 0) |
2487 | goto out_stop; /* error */ | 2544 | goto out_stop; /* error */ |
@@ -2596,6 +2653,7 @@ out_stop: | |||
2596 | ext3_orphan_del(handle, inode); | 2653 | ext3_orphan_del(handle, inode); |
2597 | 2654 | ||
2598 | ext3_journal_stop(handle); | 2655 | ext3_journal_stop(handle); |
2656 | trace_ext3_truncate_exit(inode); | ||
2599 | return; | 2657 | return; |
2600 | out_notrans: | 2658 | out_notrans: |
2601 | /* | 2659 | /* |
@@ -2604,6 +2662,7 @@ out_notrans: | |||
2604 | */ | 2662 | */ |
2605 | if (inode->i_nlink) | 2663 | if (inode->i_nlink) |
2606 | ext3_orphan_del(NULL, inode); | 2664 | ext3_orphan_del(NULL, inode); |
2665 | trace_ext3_truncate_exit(inode); | ||
2607 | } | 2666 | } |
2608 | 2667 | ||
2609 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, | 2668 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, |
@@ -2745,6 +2804,7 @@ make_io: | |||
2745 | * has in-inode xattrs, or we don't have this inode in memory. | 2804 | * has in-inode xattrs, or we don't have this inode in memory. |
2746 | * Read the block from disk. | 2805 | * Read the block from disk. |
2747 | */ | 2806 | */ |
2807 | trace_ext3_load_inode(inode); | ||
2748 | get_bh(bh); | 2808 | get_bh(bh); |
2749 | bh->b_end_io = end_buffer_read_sync; | 2809 | bh->b_end_io = end_buffer_read_sync; |
2750 | submit_bh(READ_META, bh); | 2810 | submit_bh(READ_META, bh); |
@@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) | |||
3229 | } | 3289 | } |
3230 | 3290 | ||
3231 | error = ext3_orphan_add(handle, inode); | 3291 | error = ext3_orphan_add(handle, inode); |
3292 | if (error) { | ||
3293 | ext3_journal_stop(handle); | ||
3294 | goto err_out; | ||
3295 | } | ||
3232 | EXT3_I(inode)->i_disksize = attr->ia_size; | 3296 | EXT3_I(inode)->i_disksize = attr->ia_size; |
3233 | rc = ext3_mark_inode_dirty(handle, inode); | 3297 | error = ext3_mark_inode_dirty(handle, inode); |
3234 | if (!error) | ||
3235 | error = rc; | ||
3236 | ext3_journal_stop(handle); | 3298 | ext3_journal_stop(handle); |
3299 | if (error) { | ||
3300 | /* Some hard fs error must have happened. Bail out. */ | ||
3301 | ext3_orphan_del(NULL, inode); | ||
3302 | goto err_out; | ||
3303 | } | ||
3304 | rc = ext3_block_truncate_page(inode, attr->ia_size); | ||
3305 | if (rc) { | ||
3306 | /* Cleanup orphan list and exit */ | ||
3307 | handle = ext3_journal_start(inode, 3); | ||
3308 | if (IS_ERR(handle)) { | ||
3309 | ext3_orphan_del(NULL, inode); | ||
3310 | goto err_out; | ||
3311 | } | ||
3312 | ext3_orphan_del(handle, inode); | ||
3313 | ext3_journal_stop(handle); | ||
3314 | goto err_out; | ||
3315 | } | ||
3237 | } | 3316 | } |
3238 | 3317 | ||
3239 | if ((attr->ia_valid & ATTR_SIZE) && | 3318 | if ((attr->ia_valid & ATTR_SIZE) && |
3240 | attr->ia_size != i_size_read(inode)) { | 3319 | attr->ia_size != i_size_read(inode)) { |
3241 | rc = vmtruncate(inode, attr->ia_size); | 3320 | truncate_setsize(inode, attr->ia_size); |
3242 | if (rc) | 3321 | ext3_truncate(inode); |
3243 | goto err_out; | ||
3244 | } | 3322 | } |
3245 | 3323 | ||
3246 | setattr_copy(inode, attr); | 3324 | setattr_copy(inode, attr); |
@@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
3374 | int err; | 3452 | int err; |
3375 | 3453 | ||
3376 | might_sleep(); | 3454 | might_sleep(); |
3455 | trace_ext3_mark_inode_dirty(inode, _RET_IP_); | ||
3377 | err = ext3_reserve_inode_write(handle, inode, &iloc); | 3456 | err = ext3_reserve_inode_write(handle, inode, &iloc); |
3378 | if (!err) | 3457 | if (!err) |
3379 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); | 3458 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); |
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index f4090bd2f345..c7f43944f160 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c | |||
@@ -285,7 +285,7 @@ group_add_out: | |||
285 | if (!capable(CAP_SYS_ADMIN)) | 285 | if (!capable(CAP_SYS_ADMIN)) |
286 | return -EPERM; | 286 | return -EPERM; |
287 | 287 | ||
288 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 288 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, |
289 | sizeof(range))) | 289 | sizeof(range))) |
290 | return -EFAULT; | 290 | return -EFAULT; |
291 | 291 | ||
@@ -293,7 +293,7 @@ group_add_out: | |||
293 | if (ret < 0) | 293 | if (ret < 0) |
294 | return ret; | 294 | return ret; |
295 | 295 | ||
296 | if (copy_to_user((struct fstrim_range *)arg, &range, | 296 | if (copy_to_user((struct fstrim_range __user *)arg, &range, |
297 | sizeof(range))) | 297 | sizeof(range))) |
298 | return -EFAULT; | 298 | return -EFAULT; |
299 | 299 | ||
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 3b57230a17bb..5571708b6a58 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/quotaops.h> | 36 | #include <linux/quotaops.h> |
37 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <trace/events/ext3.h> | ||
39 | 40 | ||
40 | #include "namei.h" | 41 | #include "namei.h" |
41 | #include "xattr.h" | 42 | #include "xattr.h" |
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent | |||
287 | while (len--) printk("%c", *name++); | 288 | while (len--) printk("%c", *name++); |
288 | ext3fs_dirhash(de->name, de->name_len, &h); | 289 | ext3fs_dirhash(de->name, de->name_len, &h); |
289 | printk(":%x.%u ", h.hash, | 290 | printk(":%x.%u ", h.hash, |
290 | ((char *) de - base)); | 291 | (unsigned) ((char *) de - base)); |
291 | } | 292 | } |
292 | space += EXT3_DIR_REC_LEN(de->name_len); | 293 | space += EXT3_DIR_REC_LEN(de->name_len); |
293 | names++; | 294 | names++; |
@@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, | |||
1013 | 1014 | ||
1014 | *err = -ENOENT; | 1015 | *err = -ENOENT; |
1015 | errout: | 1016 | errout: |
1016 | dxtrace(printk("%s not found\n", name)); | 1017 | dxtrace(printk("%s not found\n", entry->name)); |
1017 | dx_release (frames); | 1018 | dx_release (frames); |
1018 | return NULL; | 1019 | return NULL; |
1019 | } | 1020 | } |
@@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) | |||
2140 | struct ext3_dir_entry_2 * de; | 2141 | struct ext3_dir_entry_2 * de; |
2141 | handle_t *handle; | 2142 | handle_t *handle; |
2142 | 2143 | ||
2144 | trace_ext3_unlink_enter(dir, dentry); | ||
2143 | /* Initialize quotas before so that eventual writes go | 2145 | /* Initialize quotas before so that eventual writes go |
2144 | * in separate transaction */ | 2146 | * in separate transaction */ |
2145 | dquot_initialize(dir); | 2147 | dquot_initialize(dir); |
@@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) | |||
2185 | end_unlink: | 2187 | end_unlink: |
2186 | ext3_journal_stop(handle); | 2188 | ext3_journal_stop(handle); |
2187 | brelse (bh); | 2189 | brelse (bh); |
2190 | trace_ext3_unlink_exit(dentry, retval); | ||
2188 | return retval; | 2191 | return retval; |
2189 | } | 2192 | } |
2190 | 2193 | ||
@@ -2206,9 +2209,11 @@ static int ext3_symlink (struct inode * dir, | |||
2206 | /* | 2209 | /* |
2207 | * For non-fast symlinks, we just allocate inode and put it on | 2210 | * For non-fast symlinks, we just allocate inode and put it on |
2208 | * orphan list in the first transaction => we need bitmap, | 2211 | * orphan list in the first transaction => we need bitmap, |
2209 | * group descriptor, sb, inode block, quota blocks. | 2212 | * group descriptor, sb, inode block, quota blocks, and |
2213 | * possibly selinux xattr blocks. | ||
2210 | */ | 2214 | */ |
2211 | credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | 2215 | credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + |
2216 | EXT3_XATTR_TRANS_BLOCKS; | ||
2212 | } else { | 2217 | } else { |
2213 | /* | 2218 | /* |
2214 | * Fast symlink. We have to add entry to directory | 2219 | * Fast symlink. We have to add entry to directory |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index b57ea2f91269..7beb69ae0015 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -44,6 +44,9 @@ | |||
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "namei.h" | 45 | #include "namei.h" |
46 | 46 | ||
47 | #define CREATE_TRACE_POINTS | ||
48 | #include <trace/events/ext3.h> | ||
49 | |||
47 | #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED | 50 | #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED |
48 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA | 51 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA |
49 | #else | 52 | #else |
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) | |||
497 | return &ei->vfs_inode; | 500 | return &ei->vfs_inode; |
498 | } | 501 | } |
499 | 502 | ||
503 | static int ext3_drop_inode(struct inode *inode) | ||
504 | { | ||
505 | int drop = generic_drop_inode(inode); | ||
506 | |||
507 | trace_ext3_drop_inode(inode, drop); | ||
508 | return drop; | ||
509 | } | ||
510 | |||
500 | static void ext3_i_callback(struct rcu_head *head) | 511 | static void ext3_i_callback(struct rcu_head *head) |
501 | { | 512 | { |
502 | struct inode *inode = container_of(head, struct inode, i_rcu); | 513 | struct inode *inode = container_of(head, struct inode, i_rcu); |
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = { | |||
788 | .destroy_inode = ext3_destroy_inode, | 799 | .destroy_inode = ext3_destroy_inode, |
789 | .write_inode = ext3_write_inode, | 800 | .write_inode = ext3_write_inode, |
790 | .dirty_inode = ext3_dirty_inode, | 801 | .dirty_inode = ext3_dirty_inode, |
802 | .drop_inode = ext3_drop_inode, | ||
791 | .evict_inode = ext3_evict_inode, | 803 | .evict_inode = ext3_evict_inode, |
792 | .put_super = ext3_put_super, | 804 | .put_super = ext3_put_super, |
793 | .sync_fs = ext3_sync_fs, | 805 | .sync_fs = ext3_sync_fs, |
@@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait) | |||
2509 | { | 2521 | { |
2510 | tid_t target; | 2522 | tid_t target; |
2511 | 2523 | ||
2524 | trace_ext3_sync_fs(sb, wait); | ||
2512 | if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { | 2525 | if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { |
2513 | if (wait) | 2526 | if (wait) |
2514 | log_wait_commit(EXT3_SB(sb)->s_journal, target); | 2527 | log_wait_commit(EXT3_SB(sb)->s_journal, target); |
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 32e6cc23bd9a..d565759d82ee 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c | |||
@@ -803,8 +803,16 @@ inserted: | |||
803 | /* We need to allocate a new block */ | 803 | /* We need to allocate a new block */ |
804 | ext3_fsblk_t goal = ext3_group_first_block_no(sb, | 804 | ext3_fsblk_t goal = ext3_group_first_block_no(sb, |
805 | EXT3_I(inode)->i_block_group); | 805 | EXT3_I(inode)->i_block_group); |
806 | ext3_fsblk_t block = ext3_new_block(handle, inode, | 806 | ext3_fsblk_t block; |
807 | goal, &error); | 807 | |
808 | /* | ||
809 | * Protect us agaist concurrent allocations to the | ||
810 | * same inode from ext3_..._writepage(). Reservation | ||
811 | * code does not expect racing allocations. | ||
812 | */ | ||
813 | mutex_lock(&EXT3_I(inode)->truncate_mutex); | ||
814 | block = ext3_new_block(handle, inode, goal, &error); | ||
815 | mutex_unlock(&EXT3_I(inode)->truncate_mutex); | ||
808 | if (error) | 816 | if (error) |
809 | goto cleanup; | 817 | goto cleanup; |
810 | ea_idebug(inode, "creating block %d", block); | 818 | ea_idebug(inode, "creating block %d", block); |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 04109460ba9e..56fd8f865930 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o | |||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ |
10 | mmp.o | 10 | mmp.o indirect.o |
11 | 11 | ||
12 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 12 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index dca2d1ded931..a5c29bb3b835 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -198,12 +198,10 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, | |||
198 | case ACL_TYPE_ACCESS: | 198 | case ACL_TYPE_ACCESS: |
199 | name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; | 199 | name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; |
200 | if (acl) { | 200 | if (acl) { |
201 | mode_t mode = inode->i_mode; | 201 | error = posix_acl_equiv_mode(acl, &inode->i_mode); |
202 | error = posix_acl_equiv_mode(acl, &mode); | ||
203 | if (error < 0) | 202 | if (error < 0) |
204 | return error; | 203 | return error; |
205 | else { | 204 | else { |
206 | inode->i_mode = mode; | ||
207 | inode->i_ctime = ext4_current_time(inode); | 205 | inode->i_ctime = ext4_current_time(inode); |
208 | ext4_mark_inode_dirty(handle, inode); | 206 | ext4_mark_inode_dirty(handle, inode); |
209 | if (error == 0) | 207 | if (error == 0) |
@@ -259,19 +257,16 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
259 | inode->i_mode &= ~current_umask(); | 257 | inode->i_mode &= ~current_umask(); |
260 | } | 258 | } |
261 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { | 259 | if (test_opt(inode->i_sb, POSIX_ACL) && acl) { |
262 | mode_t mode = inode->i_mode; | ||
263 | |||
264 | if (S_ISDIR(inode->i_mode)) { | 260 | if (S_ISDIR(inode->i_mode)) { |
265 | error = ext4_set_acl(handle, inode, | 261 | error = ext4_set_acl(handle, inode, |
266 | ACL_TYPE_DEFAULT, acl); | 262 | ACL_TYPE_DEFAULT, acl); |
267 | if (error) | 263 | if (error) |
268 | goto cleanup; | 264 | goto cleanup; |
269 | } | 265 | } |
270 | error = posix_acl_create(&acl, GFP_NOFS, &mode); | 266 | error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); |
271 | if (error < 0) | 267 | if (error < 0) |
272 | return error; | 268 | return error; |
273 | 269 | ||
274 | inode->i_mode = mode; | ||
275 | if (error > 0) { | 270 | if (error > 0) { |
276 | /* This is an extended ACL */ | 271 | /* This is an extended ACL */ |
277 | error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); | 272 | error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 264f6949511e..f8224adf496e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -620,3 +620,51 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
620 | 620 | ||
621 | } | 621 | } |
622 | 622 | ||
623 | /** | ||
624 | * ext4_inode_to_goal_block - return a hint for block allocation | ||
625 | * @inode: inode for block allocation | ||
626 | * | ||
627 | * Return the ideal location to start allocating blocks for a | ||
628 | * newly created inode. | ||
629 | */ | ||
630 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) | ||
631 | { | ||
632 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
633 | ext4_group_t block_group; | ||
634 | ext4_grpblk_t colour; | ||
635 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
636 | ext4_fsblk_t bg_start; | ||
637 | ext4_fsblk_t last_block; | ||
638 | |||
639 | block_group = ei->i_block_group; | ||
640 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
641 | /* | ||
642 | * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME | ||
643 | * block groups per flexgroup, reserve the first block | ||
644 | * group for directories and special files. Regular | ||
645 | * files will start at the second block group. This | ||
646 | * tends to speed up directory access and improves | ||
647 | * fsck times. | ||
648 | */ | ||
649 | block_group &= ~(flex_size-1); | ||
650 | if (S_ISREG(inode->i_mode)) | ||
651 | block_group++; | ||
652 | } | ||
653 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | ||
654 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | ||
655 | |||
656 | /* | ||
657 | * If we are doing delayed allocation, we don't need take | ||
658 | * colour into account. | ||
659 | */ | ||
660 | if (test_opt(inode->i_sb, DELALLOC)) | ||
661 | return bg_start; | ||
662 | |||
663 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | ||
664 | colour = (current->pid % 16) * | ||
665 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | ||
666 | else | ||
667 | colour = (current->pid % 16) * ((last_block - bg_start) / 16); | ||
668 | return bg_start + colour; | ||
669 | } | ||
670 | |||
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index fac90f3fba80..8efb2f0a3447 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -246,3 +246,24 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, | |||
246 | return 1; | 246 | return 1; |
247 | } | 247 | } |
248 | 248 | ||
249 | int ext4_check_blockref(const char *function, unsigned int line, | ||
250 | struct inode *inode, __le32 *p, unsigned int max) | ||
251 | { | ||
252 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
253 | __le32 *bref = p; | ||
254 | unsigned int blk; | ||
255 | |||
256 | while (bref < p+max) { | ||
257 | blk = le32_to_cpu(*bref++); | ||
258 | if (blk && | ||
259 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
260 | blk, 1))) { | ||
261 | es->s_last_error_block = cpu_to_le64(blk); | ||
262 | ext4_error_inode(inode, function, line, blk, | ||
263 | "invalid block"); | ||
264 | return -EIO; | ||
265 | } | ||
266 | } | ||
267 | return 0; | ||
268 | } | ||
269 | |||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fa44df879711..b7d7bd0f066e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -175,6 +175,7 @@ struct mpage_da_data { | |||
175 | */ | 175 | */ |
176 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 176 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
177 | #define EXT4_IO_END_ERROR 0x0002 | 177 | #define EXT4_IO_END_ERROR 0x0002 |
178 | #define EXT4_IO_END_QUEUED 0x0004 | ||
178 | 179 | ||
179 | struct ext4_io_page { | 180 | struct ext4_io_page { |
180 | struct page *p_page; | 181 | struct page *p_page; |
@@ -526,6 +527,7 @@ struct ext4_new_group_data { | |||
526 | #define EXT4_FREE_BLOCKS_METADATA 0x0001 | 527 | #define EXT4_FREE_BLOCKS_METADATA 0x0001 |
527 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 | 528 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 |
528 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 | 529 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 |
530 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 | ||
529 | 531 | ||
530 | /* | 532 | /* |
531 | * ioctl commands | 533 | * ioctl commands |
@@ -939,6 +941,8 @@ struct ext4_inode_info { | |||
939 | #define ext4_find_next_zero_bit find_next_zero_bit_le | 941 | #define ext4_find_next_zero_bit find_next_zero_bit_le |
940 | #define ext4_find_next_bit find_next_bit_le | 942 | #define ext4_find_next_bit find_next_bit_le |
941 | 943 | ||
944 | extern void ext4_set_bits(void *bm, int cur, int len); | ||
945 | |||
942 | /* | 946 | /* |
943 | * Maximal mount counts between two filesystem checks | 947 | * Maximal mount counts between two filesystem checks |
944 | */ | 948 | */ |
@@ -1126,7 +1130,8 @@ struct ext4_sb_info { | |||
1126 | struct journal_s *s_journal; | 1130 | struct journal_s *s_journal; |
1127 | struct list_head s_orphan; | 1131 | struct list_head s_orphan; |
1128 | struct mutex s_orphan_lock; | 1132 | struct mutex s_orphan_lock; |
1129 | struct mutex s_resize_lock; | 1133 | unsigned long s_resize_flags; /* Flags indicating if there |
1134 | is a resizer */ | ||
1130 | unsigned long s_commit_interval; | 1135 | unsigned long s_commit_interval; |
1131 | u32 s_max_batch_time; | 1136 | u32 s_max_batch_time; |
1132 | u32 s_min_batch_time; | 1137 | u32 s_min_batch_time; |
@@ -1214,6 +1219,9 @@ struct ext4_sb_info { | |||
1214 | 1219 | ||
1215 | /* Kernel thread for multiple mount protection */ | 1220 | /* Kernel thread for multiple mount protection */ |
1216 | struct task_struct *s_mmp_tsk; | 1221 | struct task_struct *s_mmp_tsk; |
1222 | |||
1223 | /* record the last minlen when FITRIM is called. */ | ||
1224 | atomic_t s_last_trim_minblks; | ||
1217 | }; | 1225 | }; |
1218 | 1226 | ||
1219 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1227 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1743,6 +1751,7 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb, | |||
1743 | struct ext4_group_desc *desc); | 1751 | struct ext4_group_desc *desc); |
1744 | #define ext4_free_blocks_after_init(sb, group, desc) \ | 1752 | #define ext4_free_blocks_after_init(sb, group, desc) \ |
1745 | ext4_init_block_bitmap(sb, NULL, group, desc) | 1753 | ext4_init_block_bitmap(sb, NULL, group, desc) |
1754 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); | ||
1746 | 1755 | ||
1747 | /* dir.c */ | 1756 | /* dir.c */ |
1748 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, | 1757 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, |
@@ -1793,7 +1802,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
1793 | unsigned long count, int flags); | 1802 | unsigned long count, int flags); |
1794 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1803 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1795 | ext4_group_t i, struct ext4_group_desc *desc); | 1804 | ext4_group_t i, struct ext4_group_desc *desc); |
1796 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 1805 | extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, |
1797 | ext4_fsblk_t block, unsigned long count); | 1806 | ext4_fsblk_t block, unsigned long count); |
1798 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); | 1807 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
1799 | 1808 | ||
@@ -1834,6 +1843,17 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |||
1834 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1843 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1835 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1844 | extern void ext4_da_update_reserve_space(struct inode *inode, |
1836 | int used, int quota_claim); | 1845 | int used, int quota_claim); |
1846 | |||
1847 | /* indirect.c */ | ||
1848 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | ||
1849 | struct ext4_map_blocks *map, int flags); | ||
1850 | extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | ||
1851 | const struct iovec *iov, loff_t offset, | ||
1852 | unsigned long nr_segs); | ||
1853 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | ||
1854 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); | ||
1855 | extern void ext4_ind_truncate(struct inode *inode); | ||
1856 | |||
1837 | /* ioctl.c */ | 1857 | /* ioctl.c */ |
1838 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1858 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
1839 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); | 1859 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
@@ -1855,6 +1875,9 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1855 | ext4_fsblk_t n_blocks_count); | 1875 | ext4_fsblk_t n_blocks_count); |
1856 | 1876 | ||
1857 | /* super.c */ | 1877 | /* super.c */ |
1878 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | ||
1879 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); | ||
1880 | extern void ext4_kvfree(void *ptr); | ||
1858 | extern void __ext4_error(struct super_block *, const char *, unsigned int, | 1881 | extern void __ext4_error(struct super_block *, const char *, unsigned int, |
1859 | const char *, ...) | 1882 | const char *, ...) |
1860 | __attribute__ ((format (printf, 4, 5))); | 1883 | __attribute__ ((format (printf, 4, 5))); |
@@ -2067,11 +2090,19 @@ struct ext4_group_info { | |||
2067 | * 5 free 8-block regions. */ | 2090 | * 5 free 8-block regions. */ |
2068 | }; | 2091 | }; |
2069 | 2092 | ||
2070 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 2093 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
2094 | #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 | ||
2071 | 2095 | ||
2072 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 2096 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
2073 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 2097 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
2074 | 2098 | ||
2099 | #define EXT4_MB_GRP_WAS_TRIMMED(grp) \ | ||
2100 | (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | ||
2101 | #define EXT4_MB_GRP_SET_TRIMMED(grp) \ | ||
2102 | (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | ||
2103 | #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ | ||
2104 | (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | ||
2105 | |||
2075 | #define EXT4_MAX_CONTENTION 8 | 2106 | #define EXT4_MAX_CONTENTION 8 |
2076 | #define EXT4_CONTENTION_THRESHOLD 2 | 2107 | #define EXT4_CONTENTION_THRESHOLD 2 |
2077 | 2108 | ||
@@ -2123,6 +2154,19 @@ static inline void ext4_mark_super_dirty(struct super_block *sb) | |||
2123 | } | 2154 | } |
2124 | 2155 | ||
2125 | /* | 2156 | /* |
2157 | * Block validity checking | ||
2158 | */ | ||
2159 | #define ext4_check_indirect_blockref(inode, bh) \ | ||
2160 | ext4_check_blockref(__func__, __LINE__, inode, \ | ||
2161 | (__le32 *)(bh)->b_data, \ | ||
2162 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | ||
2163 | |||
2164 | #define ext4_ind_check_inode(inode) \ | ||
2165 | ext4_check_blockref(__func__, __LINE__, inode, \ | ||
2166 | EXT4_I(inode)->i_data, \ | ||
2167 | EXT4_NDIR_BLOCKS) | ||
2168 | |||
2169 | /* | ||
2126 | * Inodes and files operations | 2170 | * Inodes and files operations |
2127 | */ | 2171 | */ |
2128 | 2172 | ||
@@ -2151,6 +2195,8 @@ extern void ext4_exit_system_zone(void); | |||
2151 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | 2195 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, |
2152 | ext4_fsblk_t start_blk, | 2196 | ext4_fsblk_t start_blk, |
2153 | unsigned int count); | 2197 | unsigned int count); |
2198 | extern int ext4_check_blockref(const char *, unsigned int, | ||
2199 | struct inode *, __le32 *, unsigned int); | ||
2154 | 2200 | ||
2155 | /* extents.c */ | 2201 | /* extents.c */ |
2156 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 2202 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
@@ -2230,6 +2276,10 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
2230 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | 2276 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; |
2231 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | 2277 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; |
2232 | 2278 | ||
2279 | #define EXT4_RESIZING 0 | ||
2280 | extern int ext4_resize_begin(struct super_block *sb); | ||
2281 | extern void ext4_resize_end(struct super_block *sb); | ||
2282 | |||
2233 | #endif /* __KERNEL__ */ | 2283 | #endif /* __KERNEL__ */ |
2234 | 2284 | ||
2235 | #endif /* _EXT4_H */ | 2285 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index bb85757689b6..5802fa1dab18 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode) | |||
289 | 289 | ||
290 | static inline int ext4_should_writeback_data(struct inode *inode) | 290 | static inline int ext4_should_writeback_data(struct inode *inode) |
291 | { | 291 | { |
292 | if (!S_ISREG(inode->i_mode)) | ||
293 | return 0; | ||
294 | if (EXT4_JOURNAL(inode) == NULL) | 292 | if (EXT4_JOURNAL(inode) == NULL) |
295 | return 1; | 293 | return 1; |
294 | if (!S_ISREG(inode->i_mode)) | ||
295 | return 0; | ||
296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) | 296 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
297 | return 0; | 297 | return 0; |
298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 298 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f815cc81e7a2..57cf568a98ab 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -114,12 +114,6 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
114 | struct ext4_ext_path *path, | 114 | struct ext4_ext_path *path, |
115 | ext4_lblk_t block) | 115 | ext4_lblk_t block) |
116 | { | 116 | { |
117 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
118 | ext4_fsblk_t bg_start; | ||
119 | ext4_fsblk_t last_block; | ||
120 | ext4_grpblk_t colour; | ||
121 | ext4_group_t block_group; | ||
122 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
123 | int depth; | 117 | int depth; |
124 | 118 | ||
125 | if (path) { | 119 | if (path) { |
@@ -161,36 +155,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
161 | } | 155 | } |
162 | 156 | ||
163 | /* OK. use inode's group */ | 157 | /* OK. use inode's group */ |
164 | block_group = ei->i_block_group; | 158 | return ext4_inode_to_goal_block(inode); |
165 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
166 | /* | ||
167 | * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME | ||
168 | * block groups per flexgroup, reserve the first block | ||
169 | * group for directories and special files. Regular | ||
170 | * files will start at the second block group. This | ||
171 | * tends to speed up directory access and improves | ||
172 | * fsck times. | ||
173 | */ | ||
174 | block_group &= ~(flex_size-1); | ||
175 | if (S_ISREG(inode->i_mode)) | ||
176 | block_group++; | ||
177 | } | ||
178 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | ||
179 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | ||
180 | |||
181 | /* | ||
182 | * If we are doing delayed allocation, we don't need take | ||
183 | * colour into account. | ||
184 | */ | ||
185 | if (test_opt(inode->i_sb, DELALLOC)) | ||
186 | return bg_start; | ||
187 | |||
188 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | ||
189 | colour = (current->pid % 16) * | ||
190 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | ||
191 | else | ||
192 | colour = (current->pid % 16) * ((last_block - bg_start) / 16); | ||
193 | return bg_start + colour + block; | ||
194 | } | 159 | } |
195 | 160 | ||
196 | /* | 161 | /* |
@@ -776,6 +741,16 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
776 | logical, le32_to_cpu(curp->p_idx->ei_block)); | 741 | logical, le32_to_cpu(curp->p_idx->ei_block)); |
777 | return -EIO; | 742 | return -EIO; |
778 | } | 743 | } |
744 | |||
745 | if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) | ||
746 | >= le16_to_cpu(curp->p_hdr->eh_max))) { | ||
747 | EXT4_ERROR_INODE(inode, | ||
748 | "eh_entries %d >= eh_max %d!", | ||
749 | le16_to_cpu(curp->p_hdr->eh_entries), | ||
750 | le16_to_cpu(curp->p_hdr->eh_max)); | ||
751 | return -EIO; | ||
752 | } | ||
753 | |||
779 | len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; | 754 | len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; |
780 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { | 755 | if (logical > le32_to_cpu(curp->p_idx->ei_block)) { |
781 | /* insert after */ | 756 | /* insert after */ |
@@ -805,13 +780,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
805 | ext4_idx_store_pblock(ix, ptr); | 780 | ext4_idx_store_pblock(ix, ptr); |
806 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); | 781 | le16_add_cpu(&curp->p_hdr->eh_entries, 1); |
807 | 782 | ||
808 | if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) | ||
809 | > le16_to_cpu(curp->p_hdr->eh_max))) { | ||
810 | EXT4_ERROR_INODE(inode, | ||
811 | "logical %d == ei_block %d!", | ||
812 | logical, le32_to_cpu(curp->p_idx->ei_block)); | ||
813 | return -EIO; | ||
814 | } | ||
815 | if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { | 783 | if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { |
816 | EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); | 784 | EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); |
817 | return -EIO; | 785 | return -EIO; |
@@ -1446,8 +1414,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1446 | * ext4_ext_next_leaf_block: | 1414 | * ext4_ext_next_leaf_block: |
1447 | * returns first allocated block from next leaf or EXT_MAX_BLOCKS | 1415 | * returns first allocated block from next leaf or EXT_MAX_BLOCKS |
1448 | */ | 1416 | */ |
1449 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | 1417 | static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) |
1450 | struct ext4_ext_path *path) | ||
1451 | { | 1418 | { |
1452 | int depth; | 1419 | int depth; |
1453 | 1420 | ||
@@ -1757,7 +1724,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1757 | goto merge; | 1724 | goto merge; |
1758 | } | 1725 | } |
1759 | 1726 | ||
1760 | repeat: | ||
1761 | depth = ext_depth(inode); | 1727 | depth = ext_depth(inode); |
1762 | eh = path[depth].p_hdr; | 1728 | eh = path[depth].p_hdr; |
1763 | if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) | 1729 | if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) |
@@ -1765,9 +1731,10 @@ repeat: | |||
1765 | 1731 | ||
1766 | /* probably next leaf has space for us? */ | 1732 | /* probably next leaf has space for us? */ |
1767 | fex = EXT_LAST_EXTENT(eh); | 1733 | fex = EXT_LAST_EXTENT(eh); |
1768 | next = ext4_ext_next_leaf_block(inode, path); | 1734 | next = EXT_MAX_BLOCKS; |
1769 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) | 1735 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) |
1770 | && next != EXT_MAX_BLOCKS) { | 1736 | next = ext4_ext_next_leaf_block(path); |
1737 | if (next != EXT_MAX_BLOCKS) { | ||
1771 | ext_debug("next leaf block - %d\n", next); | 1738 | ext_debug("next leaf block - %d\n", next); |
1772 | BUG_ON(npath != NULL); | 1739 | BUG_ON(npath != NULL); |
1773 | npath = ext4_ext_find_extent(inode, next, NULL); | 1740 | npath = ext4_ext_find_extent(inode, next, NULL); |
@@ -1779,7 +1746,7 @@ repeat: | |||
1779 | ext_debug("next leaf isn't full(%d)\n", | 1746 | ext_debug("next leaf isn't full(%d)\n", |
1780 | le16_to_cpu(eh->eh_entries)); | 1747 | le16_to_cpu(eh->eh_entries)); |
1781 | path = npath; | 1748 | path = npath; |
1782 | goto repeat; | 1749 | goto has_space; |
1783 | } | 1750 | } |
1784 | ext_debug("next leaf has no free space(%d,%d)\n", | 1751 | ext_debug("next leaf has no free space(%d,%d)\n", |
1785 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); | 1752 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); |
@@ -1839,7 +1806,7 @@ has_space: | |||
1839 | ext4_ext_pblock(newext), | 1806 | ext4_ext_pblock(newext), |
1840 | ext4_ext_is_uninitialized(newext), | 1807 | ext4_ext_is_uninitialized(newext), |
1841 | ext4_ext_get_actual_len(newext), | 1808 | ext4_ext_get_actual_len(newext), |
1842 | nearex, len, nearex + 1, nearex + 2); | 1809 | nearex, len, nearex, nearex + 1); |
1843 | memmove(nearex + 1, nearex, len); | 1810 | memmove(nearex + 1, nearex, len); |
1844 | path[depth].p_ext = nearex; | 1811 | path[depth].p_ext = nearex; |
1845 | } | 1812 | } |
@@ -2052,7 +2019,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2052 | } | 2019 | } |
2053 | 2020 | ||
2054 | /* | 2021 | /* |
2055 | * ext4_ext_in_cache() | 2022 | * ext4_ext_check_cache() |
2056 | * Checks to see if the given block is in the cache. | 2023 | * Checks to see if the given block is in the cache. |
2057 | * If it is, the cached extent is stored in the given | 2024 | * If it is, the cached extent is stored in the given |
2058 | * cache extent pointer. If the cached extent is a hole, | 2025 | * cache extent pointer. If the cached extent is a hole, |
@@ -2134,8 +2101,6 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | |||
2134 | /* | 2101 | /* |
2135 | * ext4_ext_rm_idx: | 2102 | * ext4_ext_rm_idx: |
2136 | * removes index from the index block. | 2103 | * removes index from the index block. |
2137 | * It's used in truncate case only, thus all requests are for | ||
2138 | * last index in the block only. | ||
2139 | */ | 2104 | */ |
2140 | static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | 2105 | static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, |
2141 | struct ext4_ext_path *path) | 2106 | struct ext4_ext_path *path) |
@@ -2153,6 +2118,13 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2153 | err = ext4_ext_get_access(handle, inode, path); | 2118 | err = ext4_ext_get_access(handle, inode, path); |
2154 | if (err) | 2119 | if (err) |
2155 | return err; | 2120 | return err; |
2121 | |||
2122 | if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) { | ||
2123 | int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx; | ||
2124 | len *= sizeof(struct ext4_extent_idx); | ||
2125 | memmove(path->p_idx, path->p_idx + 1, len); | ||
2126 | } | ||
2127 | |||
2156 | le16_add_cpu(&path->p_hdr->eh_entries, -1); | 2128 | le16_add_cpu(&path->p_hdr->eh_entries, -1); |
2157 | err = ext4_ext_dirty(handle, inode, path); | 2129 | err = ext4_ext_dirty(handle, inode, path); |
2158 | if (err) | 2130 | if (err) |
@@ -2534,8 +2506,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2534 | return 1; | 2506 | return 1; |
2535 | } | 2507 | } |
2536 | 2508 | ||
2537 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | 2509 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) |
2538 | ext4_lblk_t end) | ||
2539 | { | 2510 | { |
2540 | struct super_block *sb = inode->i_sb; | 2511 | struct super_block *sb = inode->i_sb; |
2541 | int depth = ext_depth(inode); | 2512 | int depth = ext_depth(inode); |
@@ -2575,7 +2546,7 @@ again: | |||
2575 | if (i == depth) { | 2546 | if (i == depth) { |
2576 | /* this is leaf block */ | 2547 | /* this is leaf block */ |
2577 | err = ext4_ext_rm_leaf(handle, inode, path, | 2548 | err = ext4_ext_rm_leaf(handle, inode, path, |
2578 | start, end); | 2549 | start, EXT_MAX_BLOCKS - 1); |
2579 | /* root level has p_bh == NULL, brelse() eats this */ | 2550 | /* root level has p_bh == NULL, brelse() eats this */ |
2580 | brelse(path[i].p_bh); | 2551 | brelse(path[i].p_bh); |
2581 | path[i].p_bh = NULL; | 2552 | path[i].p_bh = NULL; |
@@ -3107,12 +3078,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3107 | struct ext4_ext_path *path) | 3078 | struct ext4_ext_path *path) |
3108 | { | 3079 | { |
3109 | struct ext4_extent *ex; | 3080 | struct ext4_extent *ex; |
3110 | struct ext4_extent_header *eh; | ||
3111 | int depth; | 3081 | int depth; |
3112 | int err = 0; | 3082 | int err = 0; |
3113 | 3083 | ||
3114 | depth = ext_depth(inode); | 3084 | depth = ext_depth(inode); |
3115 | eh = path[depth].p_hdr; | ||
3116 | ex = path[depth].p_ext; | 3085 | ex = path[depth].p_ext; |
3117 | 3086 | ||
3118 | ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" | 3087 | ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" |
@@ -3357,8 +3326,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3357 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 3326 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
3358 | 3327 | ||
3359 | /* check in cache */ | 3328 | /* check in cache */ |
3360 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && | 3329 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && |
3361 | ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { | 3330 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
3362 | if (!newex.ee_start_lo && !newex.ee_start_hi) { | 3331 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
3363 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3332 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
3364 | /* | 3333 | /* |
@@ -3497,8 +3466,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3497 | 3466 | ||
3498 | ext4_ext_mark_uninitialized(ex); | 3467 | ext4_ext_mark_uninitialized(ex); |
3499 | 3468 | ||
3500 | err = ext4_ext_remove_space(inode, map->m_lblk, | 3469 | ext4_ext_invalidate_cache(inode); |
3501 | map->m_lblk + punched_out); | 3470 | |
3471 | err = ext4_ext_rm_leaf(handle, inode, path, | ||
3472 | map->m_lblk, map->m_lblk + punched_out); | ||
3473 | |||
3474 | if (!err && path->p_hdr->eh_entries == 0) { | ||
3475 | /* | ||
3476 | * Punch hole freed all of this sub tree, | ||
3477 | * so we need to correct eh_depth | ||
3478 | */ | ||
3479 | err = ext4_ext_get_access(handle, inode, path); | ||
3480 | if (err == 0) { | ||
3481 | ext_inode_hdr(inode)->eh_depth = 0; | ||
3482 | ext_inode_hdr(inode)->eh_max = | ||
3483 | cpu_to_le16(ext4_ext_space_root( | ||
3484 | inode, 0)); | ||
3485 | |||
3486 | err = ext4_ext_dirty( | ||
3487 | handle, inode, path); | ||
3488 | } | ||
3489 | } | ||
3502 | 3490 | ||
3503 | goto out2; | 3491 | goto out2; |
3504 | } | 3492 | } |
@@ -3596,17 +3584,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3596 | } | 3584 | } |
3597 | 3585 | ||
3598 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); | 3586 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); |
3599 | if (err) | 3587 | if (!err) |
3600 | goto out2; | 3588 | err = ext4_ext_insert_extent(handle, inode, path, |
3601 | 3589 | &newex, flags); | |
3602 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | ||
3603 | if (err) { | 3590 | if (err) { |
3591 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? | ||
3592 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; | ||
3604 | /* free data blocks we just allocated */ | 3593 | /* free data blocks we just allocated */ |
3605 | /* not a good idea to call discard here directly, | 3594 | /* not a good idea to call discard here directly, |
3606 | * but otherwise we'd need to call it every free() */ | 3595 | * but otherwise we'd need to call it every free() */ |
3607 | ext4_discard_preallocations(inode); | 3596 | ext4_discard_preallocations(inode); |
3608 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), | 3597 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), |
3609 | ext4_ext_get_actual_len(&newex), 0); | 3598 | ext4_ext_get_actual_len(&newex), fb_flags); |
3610 | goto out2; | 3599 | goto out2; |
3611 | } | 3600 | } |
3612 | 3601 | ||
@@ -3699,7 +3688,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
3699 | 3688 | ||
3700 | last_block = (inode->i_size + sb->s_blocksize - 1) | 3689 | last_block = (inode->i_size + sb->s_blocksize - 1) |
3701 | >> EXT4_BLOCK_SIZE_BITS(sb); | 3690 | >> EXT4_BLOCK_SIZE_BITS(sb); |
3702 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); | 3691 | err = ext4_ext_remove_space(inode, last_block); |
3703 | 3692 | ||
3704 | /* In a multi-transaction truncate, we only make the final | 3693 | /* In a multi-transaction truncate, we only make the final |
3705 | * transaction synchronous. | 3694 | * transaction synchronous. |
@@ -3835,7 +3824,7 @@ retry: | |||
3835 | blkbits) >> blkbits)) | 3824 | blkbits) >> blkbits)) |
3836 | new_size = offset + len; | 3825 | new_size = offset + len; |
3837 | else | 3826 | else |
3838 | new_size = (map.m_lblk + ret) << blkbits; | 3827 | new_size = ((loff_t) map.m_lblk + ret) << blkbits; |
3839 | 3828 | ||
3840 | ext4_falloc_update_inode(inode, mode, new_size, | 3829 | ext4_falloc_update_inode(inode, mode, new_size, |
3841 | (map.m_flags & EXT4_MAP_NEW)); | 3830 | (map.m_flags & EXT4_MAP_NEW)); |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index da3bed3e0c29..036f78f7a1ef 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -129,15 +129,30 @@ static int ext4_sync_parent(struct inode *inode) | |||
129 | { | 129 | { |
130 | struct writeback_control wbc; | 130 | struct writeback_control wbc; |
131 | struct dentry *dentry = NULL; | 131 | struct dentry *dentry = NULL; |
132 | struct inode *next; | ||
132 | int ret = 0; | 133 | int ret = 0; |
133 | 134 | ||
134 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { | 135 | if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) |
136 | return 0; | ||
137 | inode = igrab(inode); | ||
138 | while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { | ||
135 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); | 139 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); |
136 | dentry = list_entry(inode->i_dentry.next, | 140 | dentry = NULL; |
137 | struct dentry, d_alias); | 141 | spin_lock(&inode->i_lock); |
138 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) | 142 | if (!list_empty(&inode->i_dentry)) { |
143 | dentry = list_first_entry(&inode->i_dentry, | ||
144 | struct dentry, d_alias); | ||
145 | dget(dentry); | ||
146 | } | ||
147 | spin_unlock(&inode->i_lock); | ||
148 | if (!dentry) | ||
139 | break; | 149 | break; |
140 | inode = dentry->d_parent->d_inode; | 150 | next = igrab(dentry->d_parent->d_inode); |
151 | dput(dentry); | ||
152 | if (!next) | ||
153 | break; | ||
154 | iput(inode); | ||
155 | inode = next; | ||
141 | ret = sync_mapping_buffers(inode->i_mapping); | 156 | ret = sync_mapping_buffers(inode->i_mapping); |
142 | if (ret) | 157 | if (ret) |
143 | break; | 158 | break; |
@@ -148,6 +163,7 @@ static int ext4_sync_parent(struct inode *inode) | |||
148 | if (ret) | 163 | if (ret) |
149 | break; | 164 | break; |
150 | } | 165 | } |
166 | iput(inode); | ||
151 | return ret; | 167 | return ret; |
152 | } | 168 | } |
153 | 169 | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 21bb2f61e502..9c63f273b550 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -1287,7 +1287,7 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | |||
1287 | group, used_blks, | 1287 | group, used_blks, |
1288 | ext4_itable_unused_count(sb, gdp)); | 1288 | ext4_itable_unused_count(sb, gdp)); |
1289 | ret = 1; | 1289 | ret = 1; |
1290 | goto out; | 1290 | goto err_out; |
1291 | } | 1291 | } |
1292 | 1292 | ||
1293 | blk = ext4_inode_table(sb, gdp) + used_blks; | 1293 | blk = ext4_inode_table(sb, gdp) + used_blks; |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c new file mode 100644 index 000000000000..0962642119c0 --- /dev/null +++ b/fs/ext4/indirect.c | |||
@@ -0,0 +1,1487 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/indirect.c | ||
3 | * | ||
4 | * from | ||
5 | * | ||
6 | * linux/fs/ext4/inode.c | ||
7 | * | ||
8 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
9 | * Remy Card (card@masi.ibp.fr) | ||
10 | * Laboratoire MASI - Institut Blaise Pascal | ||
11 | * Universite Pierre et Marie Curie (Paris VI) | ||
12 | * | ||
13 | * from | ||
14 | * | ||
15 | * linux/fs/minix/inode.c | ||
16 | * | ||
17 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
18 | * | ||
19 | * Goal-directed block allocation by Stephen Tweedie | ||
20 | * (sct@redhat.com), 1993, 1998 | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include "ext4_jbd2.h" | ||
25 | #include "truncate.h" | ||
26 | |||
27 | #include <trace/events/ext4.h> | ||
28 | |||
29 | typedef struct { | ||
30 | __le32 *p; | ||
31 | __le32 key; | ||
32 | struct buffer_head *bh; | ||
33 | } Indirect; | ||
34 | |||
35 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | ||
36 | { | ||
37 | p->key = *(p->p = v); | ||
38 | p->bh = bh; | ||
39 | } | ||
40 | |||
41 | /** | ||
42 | * ext4_block_to_path - parse the block number into array of offsets | ||
43 | * @inode: inode in question (we are only interested in its superblock) | ||
44 | * @i_block: block number to be parsed | ||
45 | * @offsets: array to store the offsets in | ||
46 | * @boundary: set this non-zero if the referred-to block is likely to be | ||
47 | * followed (on disk) by an indirect block. | ||
48 | * | ||
49 | * To store the locations of file's data ext4 uses a data structure common | ||
50 | * for UNIX filesystems - tree of pointers anchored in the inode, with | ||
51 | * data blocks at leaves and indirect blocks in intermediate nodes. | ||
52 | * This function translates the block number into path in that tree - | ||
53 | * return value is the path length and @offsets[n] is the offset of | ||
54 | * pointer to (n+1)th node in the nth one. If @block is out of range | ||
55 | * (negative or too large) warning is printed and zero returned. | ||
56 | * | ||
57 | * Note: function doesn't find node addresses, so no IO is needed. All | ||
58 | * we need to know is the capacity of indirect blocks (taken from the | ||
59 | * inode->i_sb). | ||
60 | */ | ||
61 | |||
62 | /* | ||
63 | * Portability note: the last comparison (check that we fit into triple | ||
64 | * indirect block) is spelled differently, because otherwise on an | ||
65 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | ||
66 | * if our filesystem had 8Kb blocks. We might use long long, but that would | ||
67 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | ||
68 | * i_block would have to be negative in the very beginning, so we would not | ||
69 | * get there at all. | ||
70 | */ | ||
71 | |||
72 | static int ext4_block_to_path(struct inode *inode, | ||
73 | ext4_lblk_t i_block, | ||
74 | ext4_lblk_t offsets[4], int *boundary) | ||
75 | { | ||
76 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
77 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | ||
78 | const long direct_blocks = EXT4_NDIR_BLOCKS, | ||
79 | indirect_blocks = ptrs, | ||
80 | double_blocks = (1 << (ptrs_bits * 2)); | ||
81 | int n = 0; | ||
82 | int final = 0; | ||
83 | |||
84 | if (i_block < direct_blocks) { | ||
85 | offsets[n++] = i_block; | ||
86 | final = direct_blocks; | ||
87 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | ||
88 | offsets[n++] = EXT4_IND_BLOCK; | ||
89 | offsets[n++] = i_block; | ||
90 | final = ptrs; | ||
91 | } else if ((i_block -= indirect_blocks) < double_blocks) { | ||
92 | offsets[n++] = EXT4_DIND_BLOCK; | ||
93 | offsets[n++] = i_block >> ptrs_bits; | ||
94 | offsets[n++] = i_block & (ptrs - 1); | ||
95 | final = ptrs; | ||
96 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | ||
97 | offsets[n++] = EXT4_TIND_BLOCK; | ||
98 | offsets[n++] = i_block >> (ptrs_bits * 2); | ||
99 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | ||
100 | offsets[n++] = i_block & (ptrs - 1); | ||
101 | final = ptrs; | ||
102 | } else { | ||
103 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", | ||
104 | i_block + direct_blocks + | ||
105 | indirect_blocks + double_blocks, inode->i_ino); | ||
106 | } | ||
107 | if (boundary) | ||
108 | *boundary = final - 1 - (i_block & (ptrs - 1)); | ||
109 | return n; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * ext4_get_branch - read the chain of indirect blocks leading to data | ||
114 | * @inode: inode in question | ||
115 | * @depth: depth of the chain (1 - direct pointer, etc.) | ||
116 | * @offsets: offsets of pointers in inode/indirect blocks | ||
117 | * @chain: place to store the result | ||
118 | * @err: here we store the error value | ||
119 | * | ||
120 | * Function fills the array of triples <key, p, bh> and returns %NULL | ||
121 | * if everything went OK or the pointer to the last filled triple | ||
122 | * (incomplete one) otherwise. Upon the return chain[i].key contains | ||
123 | * the number of (i+1)-th block in the chain (as it is stored in memory, | ||
124 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | ||
125 | * number (it points into struct inode for i==0 and into the bh->b_data | ||
126 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | ||
127 | * block for i>0 and NULL for i==0. In other words, it holds the block | ||
128 | * numbers of the chain, addresses they were taken from (and where we can | ||
129 | * verify that chain did not change) and buffer_heads hosting these | ||
130 | * numbers. | ||
131 | * | ||
132 | * Function stops when it stumbles upon zero pointer (absent block) | ||
133 | * (pointer to last triple returned, *@err == 0) | ||
134 | * or when it gets an IO error reading an indirect block | ||
135 | * (ditto, *@err == -EIO) | ||
136 | * or when it reads all @depth-1 indirect blocks successfully and finds | ||
137 | * the whole chain, all way to the data (returns %NULL, *err == 0). | ||
138 | * | ||
139 | * Need to be called with | ||
140 | * down_read(&EXT4_I(inode)->i_data_sem) | ||
141 | */ | ||
142 | static Indirect *ext4_get_branch(struct inode *inode, int depth, | ||
143 | ext4_lblk_t *offsets, | ||
144 | Indirect chain[4], int *err) | ||
145 | { | ||
146 | struct super_block *sb = inode->i_sb; | ||
147 | Indirect *p = chain; | ||
148 | struct buffer_head *bh; | ||
149 | |||
150 | *err = 0; | ||
151 | /* i_data is not going away, no lock needed */ | ||
152 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); | ||
153 | if (!p->key) | ||
154 | goto no_block; | ||
155 | while (--depth) { | ||
156 | bh = sb_getblk(sb, le32_to_cpu(p->key)); | ||
157 | if (unlikely(!bh)) | ||
158 | goto failure; | ||
159 | |||
160 | if (!bh_uptodate_or_lock(bh)) { | ||
161 | if (bh_submit_read(bh) < 0) { | ||
162 | put_bh(bh); | ||
163 | goto failure; | ||
164 | } | ||
165 | /* validate block references */ | ||
166 | if (ext4_check_indirect_blockref(inode, bh)) { | ||
167 | put_bh(bh); | ||
168 | goto failure; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | ||
173 | /* Reader: end */ | ||
174 | if (!p->key) | ||
175 | goto no_block; | ||
176 | } | ||
177 | return NULL; | ||
178 | |||
179 | failure: | ||
180 | *err = -EIO; | ||
181 | no_block: | ||
182 | return p; | ||
183 | } | ||
184 | |||
185 | /** | ||
186 | * ext4_find_near - find a place for allocation with sufficient locality | ||
187 | * @inode: owner | ||
188 | * @ind: descriptor of indirect block. | ||
189 | * | ||
190 | * This function returns the preferred place for block allocation. | ||
191 | * It is used when heuristic for sequential allocation fails. | ||
192 | * Rules are: | ||
193 | * + if there is a block to the left of our position - allocate near it. | ||
194 | * + if pointer will live in indirect block - allocate near that block. | ||
195 | * + if pointer will live in inode - allocate in the same | ||
196 | * cylinder group. | ||
197 | * | ||
198 | * In the latter case we colour the starting block by the callers PID to | ||
199 | * prevent it from clashing with concurrent allocations for a different inode | ||
200 | * in the same block group. The PID is used here so that functionally related | ||
201 | * files will be close-by on-disk. | ||
202 | * | ||
203 | * Caller must make sure that @ind is valid and will stay that way. | ||
204 | */ | ||
205 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | ||
206 | { | ||
207 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
208 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | ||
209 | __le32 *p; | ||
210 | |||
211 | /* Try to find previous block */ | ||
212 | for (p = ind->p - 1; p >= start; p--) { | ||
213 | if (*p) | ||
214 | return le32_to_cpu(*p); | ||
215 | } | ||
216 | |||
217 | /* No such thing, so let's try location of indirect block */ | ||
218 | if (ind->bh) | ||
219 | return ind->bh->b_blocknr; | ||
220 | |||
221 | /* | ||
222 | * It is going to be referred to from the inode itself? OK, just put it | ||
223 | * into the same cylinder group then. | ||
224 | */ | ||
225 | return ext4_inode_to_goal_block(inode); | ||
226 | } | ||
227 | |||
228 | /** | ||
229 | * ext4_find_goal - find a preferred place for allocation. | ||
230 | * @inode: owner | ||
231 | * @block: block we want | ||
232 | * @partial: pointer to the last triple within a chain | ||
233 | * | ||
234 | * Normally this function find the preferred place for block allocation, | ||
235 | * returns it. | ||
236 | * Because this is only used for non-extent files, we limit the block nr | ||
237 | * to 32 bits. | ||
238 | */ | ||
239 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | ||
240 | Indirect *partial) | ||
241 | { | ||
242 | ext4_fsblk_t goal; | ||
243 | |||
244 | /* | ||
245 | * XXX need to get goal block from mballoc's data structures | ||
246 | */ | ||
247 | |||
248 | goal = ext4_find_near(inode, partial); | ||
249 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
250 | return goal; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * ext4_blks_to_allocate - Look up the block map and count the number | ||
255 | * of direct blocks need to be allocated for the given branch. | ||
256 | * | ||
257 | * @branch: chain of indirect blocks | ||
258 | * @k: number of blocks need for indirect blocks | ||
259 | * @blks: number of data blocks to be mapped. | ||
260 | * @blocks_to_boundary: the offset in the indirect block | ||
261 | * | ||
262 | * return the total number of blocks to be allocate, including the | ||
263 | * direct and indirect blocks. | ||
264 | */ | ||
265 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | ||
266 | int blocks_to_boundary) | ||
267 | { | ||
268 | unsigned int count = 0; | ||
269 | |||
270 | /* | ||
271 | * Simple case, [t,d]Indirect block(s) has not allocated yet | ||
272 | * then it's clear blocks on that path have not allocated | ||
273 | */ | ||
274 | if (k > 0) { | ||
275 | /* right now we don't handle cross boundary allocation */ | ||
276 | if (blks < blocks_to_boundary + 1) | ||
277 | count += blks; | ||
278 | else | ||
279 | count += blocks_to_boundary + 1; | ||
280 | return count; | ||
281 | } | ||
282 | |||
283 | count++; | ||
284 | while (count < blks && count <= blocks_to_boundary && | ||
285 | le32_to_cpu(*(branch[0].p + count)) == 0) { | ||
286 | count++; | ||
287 | } | ||
288 | return count; | ||
289 | } | ||
290 | |||
291 | /** | ||
292 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch | ||
293 | * @handle: handle for this transaction | ||
294 | * @inode: inode which needs allocated blocks | ||
295 | * @iblock: the logical block to start allocated at | ||
296 | * @goal: preferred physical block of allocation | ||
297 | * @indirect_blks: the number of blocks need to allocate for indirect | ||
298 | * blocks | ||
299 | * @blks: number of desired blocks | ||
300 | * @new_blocks: on return it will store the new block numbers for | ||
301 | * the indirect blocks(if needed) and the first direct block, | ||
302 | * @err: on return it will store the error code | ||
303 | * | ||
304 | * This function will return the number of blocks allocated as | ||
305 | * requested by the passed-in parameters. | ||
306 | */ | ||
307 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | ||
308 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
309 | int indirect_blks, int blks, | ||
310 | ext4_fsblk_t new_blocks[4], int *err) | ||
311 | { | ||
312 | struct ext4_allocation_request ar; | ||
313 | int target, i; | ||
314 | unsigned long count = 0, blk_allocated = 0; | ||
315 | int index = 0; | ||
316 | ext4_fsblk_t current_block = 0; | ||
317 | int ret = 0; | ||
318 | |||
319 | /* | ||
320 | * Here we try to allocate the requested multiple blocks at once, | ||
321 | * on a best-effort basis. | ||
322 | * To build a branch, we should allocate blocks for | ||
323 | * the indirect blocks(if not allocated yet), and at least | ||
324 | * the first direct block of this branch. That's the | ||
325 | * minimum number of blocks need to allocate(required) | ||
326 | */ | ||
327 | /* first we try to allocate the indirect blocks */ | ||
328 | target = indirect_blks; | ||
329 | while (target > 0) { | ||
330 | count = target; | ||
331 | /* allocating blocks for indirect blocks and direct blocks */ | ||
332 | current_block = ext4_new_meta_blocks(handle, inode, goal, | ||
333 | 0, &count, err); | ||
334 | if (*err) | ||
335 | goto failed_out; | ||
336 | |||
337 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
338 | EXT4_ERROR_INODE(inode, | ||
339 | "current_block %llu + count %lu > %d!", | ||
340 | current_block, count, | ||
341 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
342 | *err = -EIO; | ||
343 | goto failed_out; | ||
344 | } | ||
345 | |||
346 | target -= count; | ||
347 | /* allocate blocks for indirect blocks */ | ||
348 | while (index < indirect_blks && count) { | ||
349 | new_blocks[index++] = current_block++; | ||
350 | count--; | ||
351 | } | ||
352 | if (count > 0) { | ||
353 | /* | ||
354 | * save the new block number | ||
355 | * for the first direct block | ||
356 | */ | ||
357 | new_blocks[index] = current_block; | ||
358 | printk(KERN_INFO "%s returned more blocks than " | ||
359 | "requested\n", __func__); | ||
360 | WARN_ON(1); | ||
361 | break; | ||
362 | } | ||
363 | } | ||
364 | |||
365 | target = blks - count ; | ||
366 | blk_allocated = count; | ||
367 | if (!target) | ||
368 | goto allocated; | ||
369 | /* Now allocate data blocks */ | ||
370 | memset(&ar, 0, sizeof(ar)); | ||
371 | ar.inode = inode; | ||
372 | ar.goal = goal; | ||
373 | ar.len = target; | ||
374 | ar.logical = iblock; | ||
375 | if (S_ISREG(inode->i_mode)) | ||
376 | /* enable in-core preallocation only for regular files */ | ||
377 | ar.flags = EXT4_MB_HINT_DATA; | ||
378 | |||
379 | current_block = ext4_mb_new_blocks(handle, &ar, err); | ||
380 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
381 | EXT4_ERROR_INODE(inode, | ||
382 | "current_block %llu + ar.len %d > %d!", | ||
383 | current_block, ar.len, | ||
384 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
385 | *err = -EIO; | ||
386 | goto failed_out; | ||
387 | } | ||
388 | |||
389 | if (*err && (target == blks)) { | ||
390 | /* | ||
391 | * if the allocation failed and we didn't allocate | ||
392 | * any blocks before | ||
393 | */ | ||
394 | goto failed_out; | ||
395 | } | ||
396 | if (!*err) { | ||
397 | if (target == blks) { | ||
398 | /* | ||
399 | * save the new block number | ||
400 | * for the first direct block | ||
401 | */ | ||
402 | new_blocks[index] = current_block; | ||
403 | } | ||
404 | blk_allocated += ar.len; | ||
405 | } | ||
406 | allocated: | ||
407 | /* total number of blocks allocated for direct blocks */ | ||
408 | ret = blk_allocated; | ||
409 | *err = 0; | ||
410 | return ret; | ||
411 | failed_out: | ||
412 | for (i = 0; i < index; i++) | ||
413 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
414 | return ret; | ||
415 | } | ||
416 | |||
417 | /** | ||
418 | * ext4_alloc_branch - allocate and set up a chain of blocks. | ||
419 | * @handle: handle for this transaction | ||
420 | * @inode: owner | ||
421 | * @indirect_blks: number of allocated indirect blocks | ||
422 | * @blks: number of allocated direct blocks | ||
423 | * @goal: preferred place for allocation | ||
424 | * @offsets: offsets (in the blocks) to store the pointers to next. | ||
425 | * @branch: place to store the chain in. | ||
426 | * | ||
427 | * This function allocates blocks, zeroes out all but the last one, | ||
428 | * links them into chain and (if we are synchronous) writes them to disk. | ||
429 | * In other words, it prepares a branch that can be spliced onto the | ||
430 | * inode. It stores the information about that chain in the branch[], in | ||
431 | * the same format as ext4_get_branch() would do. We are calling it after | ||
432 | * we had read the existing part of chain and partial points to the last | ||
433 | * triple of that (one with zero ->key). Upon the exit we have the same | ||
434 | * picture as after the successful ext4_get_block(), except that in one | ||
435 | * place chain is disconnected - *branch->p is still zero (we did not | ||
436 | * set the last link), but branch->key contains the number that should | ||
437 | * be placed into *branch->p to fill that gap. | ||
438 | * | ||
439 | * If allocation fails we free all blocks we've allocated (and forget | ||
440 | * their buffer_heads) and return the error value the from failed | ||
441 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | ||
442 | * as described above and return 0. | ||
443 | */ | ||
444 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | ||
445 | ext4_lblk_t iblock, int indirect_blks, | ||
446 | int *blks, ext4_fsblk_t goal, | ||
447 | ext4_lblk_t *offsets, Indirect *branch) | ||
448 | { | ||
449 | int blocksize = inode->i_sb->s_blocksize; | ||
450 | int i, n = 0; | ||
451 | int err = 0; | ||
452 | struct buffer_head *bh; | ||
453 | int num; | ||
454 | ext4_fsblk_t new_blocks[4]; | ||
455 | ext4_fsblk_t current_block; | ||
456 | |||
457 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, | ||
458 | *blks, new_blocks, &err); | ||
459 | if (err) | ||
460 | return err; | ||
461 | |||
462 | branch[0].key = cpu_to_le32(new_blocks[0]); | ||
463 | /* | ||
464 | * metadata blocks and data blocks are allocated. | ||
465 | */ | ||
466 | for (n = 1; n <= indirect_blks; n++) { | ||
467 | /* | ||
468 | * Get buffer_head for parent block, zero it out | ||
469 | * and set the pointer to new one, then send | ||
470 | * parent to disk. | ||
471 | */ | ||
472 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | ||
473 | if (unlikely(!bh)) { | ||
474 | err = -EIO; | ||
475 | goto failed; | ||
476 | } | ||
477 | |||
478 | branch[n].bh = bh; | ||
479 | lock_buffer(bh); | ||
480 | BUFFER_TRACE(bh, "call get_create_access"); | ||
481 | err = ext4_journal_get_create_access(handle, bh); | ||
482 | if (err) { | ||
483 | /* Don't brelse(bh) here; it's done in | ||
484 | * ext4_journal_forget() below */ | ||
485 | unlock_buffer(bh); | ||
486 | goto failed; | ||
487 | } | ||
488 | |||
489 | memset(bh->b_data, 0, blocksize); | ||
490 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | ||
491 | branch[n].key = cpu_to_le32(new_blocks[n]); | ||
492 | *branch[n].p = branch[n].key; | ||
493 | if (n == indirect_blks) { | ||
494 | current_block = new_blocks[n]; | ||
495 | /* | ||
496 | * End of chain, update the last new metablock of | ||
497 | * the chain to point to the new allocated | ||
498 | * data blocks numbers | ||
499 | */ | ||
500 | for (i = 1; i < num; i++) | ||
501 | *(branch[n].p + i) = cpu_to_le32(++current_block); | ||
502 | } | ||
503 | BUFFER_TRACE(bh, "marking uptodate"); | ||
504 | set_buffer_uptodate(bh); | ||
505 | unlock_buffer(bh); | ||
506 | |||
507 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
508 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
509 | if (err) | ||
510 | goto failed; | ||
511 | } | ||
512 | *blks = num; | ||
513 | return err; | ||
514 | failed: | ||
515 | /* Allocation failed, free what we already allocated */ | ||
516 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); | ||
517 | for (i = 1; i <= n ; i++) { | ||
518 | /* | ||
519 | * branch[i].bh is newly allocated, so there is no | ||
520 | * need to revoke the block, which is why we don't | ||
521 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
522 | */ | ||
523 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, | ||
524 | EXT4_FREE_BLOCKS_FORGET); | ||
525 | } | ||
526 | for (i = n+1; i < indirect_blks; i++) | ||
527 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
528 | |||
529 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); | ||
530 | |||
531 | return err; | ||
532 | } | ||
533 | |||
534 | /** | ||
535 | * ext4_splice_branch - splice the allocated branch onto inode. | ||
536 | * @handle: handle for this transaction | ||
537 | * @inode: owner | ||
538 | * @block: (logical) number of block we are adding | ||
539 | * @chain: chain of indirect blocks (with a missing link - see | ||
540 | * ext4_alloc_branch) | ||
541 | * @where: location of missing link | ||
542 | * @num: number of indirect blocks we are adding | ||
543 | * @blks: number of direct blocks we are adding | ||
544 | * | ||
545 | * This function fills the missing link and does all housekeeping needed in | ||
546 | * inode (->i_blocks, etc.). In case of success we end up with the full | ||
547 | * chain to new block and return 0. | ||
548 | */ | ||
549 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | ||
550 | ext4_lblk_t block, Indirect *where, int num, | ||
551 | int blks) | ||
552 | { | ||
553 | int i; | ||
554 | int err = 0; | ||
555 | ext4_fsblk_t current_block; | ||
556 | |||
557 | /* | ||
558 | * If we're splicing into a [td]indirect block (as opposed to the | ||
559 | * inode) then we need to get write access to the [td]indirect block | ||
560 | * before the splice. | ||
561 | */ | ||
562 | if (where->bh) { | ||
563 | BUFFER_TRACE(where->bh, "get_write_access"); | ||
564 | err = ext4_journal_get_write_access(handle, where->bh); | ||
565 | if (err) | ||
566 | goto err_out; | ||
567 | } | ||
568 | /* That's it */ | ||
569 | |||
570 | *where->p = where->key; | ||
571 | |||
572 | /* | ||
573 | * Update the host buffer_head or inode to point to more just allocated | ||
574 | * direct blocks blocks | ||
575 | */ | ||
576 | if (num == 0 && blks > 1) { | ||
577 | current_block = le32_to_cpu(where->key) + 1; | ||
578 | for (i = 1; i < blks; i++) | ||
579 | *(where->p + i) = cpu_to_le32(current_block++); | ||
580 | } | ||
581 | |||
582 | /* We are done with atomic stuff, now do the rest of housekeeping */ | ||
583 | /* had we spliced it onto indirect block? */ | ||
584 | if (where->bh) { | ||
585 | /* | ||
586 | * If we spliced it onto an indirect block, we haven't | ||
587 | * altered the inode. Note however that if it is being spliced | ||
588 | * onto an indirect block at the very end of the file (the | ||
589 | * file is growing) then we *will* alter the inode to reflect | ||
590 | * the new i_size. But that is not done here - it is done in | ||
591 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. | ||
592 | */ | ||
593 | jbd_debug(5, "splicing indirect only\n"); | ||
594 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | ||
595 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); | ||
596 | if (err) | ||
597 | goto err_out; | ||
598 | } else { | ||
599 | /* | ||
600 | * OK, we spliced it into the inode itself on a direct block. | ||
601 | */ | ||
602 | ext4_mark_inode_dirty(handle, inode); | ||
603 | jbd_debug(5, "splicing direct\n"); | ||
604 | } | ||
605 | return err; | ||
606 | |||
607 | err_out: | ||
608 | for (i = 1; i <= num; i++) { | ||
609 | /* | ||
610 | * branch[i].bh is newly allocated, so there is no | ||
611 | * need to revoke the block, which is why we don't | ||
612 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
613 | */ | ||
614 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | ||
615 | EXT4_FREE_BLOCKS_FORGET); | ||
616 | } | ||
617 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), | ||
618 | blks, 0); | ||
619 | |||
620 | return err; | ||
621 | } | ||
622 | |||
623 | /* | ||
624 | * The ext4_ind_map_blocks() function handles non-extents inodes | ||
625 | * (i.e., using the traditional indirect/double-indirect i_blocks | ||
626 | * scheme) for ext4_map_blocks(). | ||
627 | * | ||
628 | * Allocation strategy is simple: if we have to allocate something, we will | ||
629 | * have to go the whole way to leaf. So let's do it before attaching anything | ||
630 | * to tree, set linkage between the newborn blocks, write them if sync is | ||
631 | * required, recheck the path, free and repeat if check fails, otherwise | ||
632 | * set the last missing link (that will protect us from any truncate-generated | ||
633 | * removals - all blocks on the path are immune now) and possibly force the | ||
634 | * write on the parent block. | ||
635 | * That has a nice additional property: no special recovery from the failed | ||
636 | * allocations is needed - we simply release blocks and do not touch anything | ||
637 | * reachable from inode. | ||
638 | * | ||
639 | * `handle' can be NULL if create == 0. | ||
640 | * | ||
641 | * return > 0, # of blocks mapped or allocated. | ||
642 | * return = 0, if plain lookup failed. | ||
643 | * return < 0, error case. | ||
644 | * | ||
645 | * The ext4_ind_get_blocks() function should be called with | ||
646 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem | ||
647 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or | ||
648 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | ||
649 | * blocks. | ||
650 | */ | ||
651 | int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | ||
652 | struct ext4_map_blocks *map, | ||
653 | int flags) | ||
654 | { | ||
655 | int err = -EIO; | ||
656 | ext4_lblk_t offsets[4]; | ||
657 | Indirect chain[4]; | ||
658 | Indirect *partial; | ||
659 | ext4_fsblk_t goal; | ||
660 | int indirect_blks; | ||
661 | int blocks_to_boundary = 0; | ||
662 | int depth; | ||
663 | int count = 0; | ||
664 | ext4_fsblk_t first_block = 0; | ||
665 | |||
666 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
667 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | ||
668 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | ||
669 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | ||
670 | &blocks_to_boundary); | ||
671 | |||
672 | if (depth == 0) | ||
673 | goto out; | ||
674 | |||
675 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); | ||
676 | |||
677 | /* Simplest case - block found, no allocation needed */ | ||
678 | if (!partial) { | ||
679 | first_block = le32_to_cpu(chain[depth - 1].key); | ||
680 | count++; | ||
681 | /*map more blocks*/ | ||
682 | while (count < map->m_len && count <= blocks_to_boundary) { | ||
683 | ext4_fsblk_t blk; | ||
684 | |||
685 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | ||
686 | |||
687 | if (blk == first_block + count) | ||
688 | count++; | ||
689 | else | ||
690 | break; | ||
691 | } | ||
692 | goto got_it; | ||
693 | } | ||
694 | |||
695 | /* Next simple case - plain lookup or failed read of indirect block */ | ||
696 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) | ||
697 | goto cleanup; | ||
698 | |||
699 | /* | ||
700 | * Okay, we need to do block allocation. | ||
701 | */ | ||
702 | goal = ext4_find_goal(inode, map->m_lblk, partial); | ||
703 | |||
704 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | ||
705 | indirect_blks = (chain + depth) - partial - 1; | ||
706 | |||
707 | /* | ||
708 | * Next look up the indirect map to count the totoal number of | ||
709 | * direct blocks to allocate for this branch. | ||
710 | */ | ||
711 | count = ext4_blks_to_allocate(partial, indirect_blks, | ||
712 | map->m_len, blocks_to_boundary); | ||
713 | /* | ||
714 | * Block out ext4_truncate while we alter the tree | ||
715 | */ | ||
716 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, | ||
717 | &count, goal, | ||
718 | offsets + (partial - chain), partial); | ||
719 | |||
720 | /* | ||
721 | * The ext4_splice_branch call will free and forget any buffers | ||
722 | * on the new chain if there is a failure, but that risks using | ||
723 | * up transaction credits, especially for bitmaps where the | ||
724 | * credits cannot be returned. Can we handle this somehow? We | ||
725 | * may need to return -EAGAIN upwards in the worst case. --sct | ||
726 | */ | ||
727 | if (!err) | ||
728 | err = ext4_splice_branch(handle, inode, map->m_lblk, | ||
729 | partial, indirect_blks, count); | ||
730 | if (err) | ||
731 | goto cleanup; | ||
732 | |||
733 | map->m_flags |= EXT4_MAP_NEW; | ||
734 | |||
735 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
736 | got_it: | ||
737 | map->m_flags |= EXT4_MAP_MAPPED; | ||
738 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | ||
739 | map->m_len = count; | ||
740 | if (count > blocks_to_boundary) | ||
741 | map->m_flags |= EXT4_MAP_BOUNDARY; | ||
742 | err = count; | ||
743 | /* Clean up and exit */ | ||
744 | partial = chain + depth - 1; /* the whole chain */ | ||
745 | cleanup: | ||
746 | while (partial > chain) { | ||
747 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
748 | brelse(partial->bh); | ||
749 | partial--; | ||
750 | } | ||
751 | out: | ||
752 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
753 | map->m_pblk, map->m_len, err); | ||
754 | return err; | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * O_DIRECT for ext3 (or indirect map) based files | ||
759 | * | ||
760 | * If the O_DIRECT write will extend the file then add this inode to the | ||
761 | * orphan list. So recovery will truncate it back to the original size | ||
762 | * if the machine crashes during the write. | ||
763 | * | ||
764 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | ||
765 | * crashes then stale disk data _may_ be exposed inside the file. But current | ||
766 | * VFS code falls back into buffered path in that case so we are safe. | ||
767 | */ | ||
768 | ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | ||
769 | const struct iovec *iov, loff_t offset, | ||
770 | unsigned long nr_segs) | ||
771 | { | ||
772 | struct file *file = iocb->ki_filp; | ||
773 | struct inode *inode = file->f_mapping->host; | ||
774 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
775 | handle_t *handle; | ||
776 | ssize_t ret; | ||
777 | int orphan = 0; | ||
778 | size_t count = iov_length(iov, nr_segs); | ||
779 | int retries = 0; | ||
780 | |||
781 | if (rw == WRITE) { | ||
782 | loff_t final_size = offset + count; | ||
783 | |||
784 | if (final_size > inode->i_size) { | ||
785 | /* Credits for sb + inode write */ | ||
786 | handle = ext4_journal_start(inode, 2); | ||
787 | if (IS_ERR(handle)) { | ||
788 | ret = PTR_ERR(handle); | ||
789 | goto out; | ||
790 | } | ||
791 | ret = ext4_orphan_add(handle, inode); | ||
792 | if (ret) { | ||
793 | ext4_journal_stop(handle); | ||
794 | goto out; | ||
795 | } | ||
796 | orphan = 1; | ||
797 | ei->i_disksize = inode->i_size; | ||
798 | ext4_journal_stop(handle); | ||
799 | } | ||
800 | } | ||
801 | |||
802 | retry: | ||
803 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | ||
804 | if (unlikely(!list_empty(&ei->i_completed_io_list))) { | ||
805 | mutex_lock(&inode->i_mutex); | ||
806 | ext4_flush_completed_IO(inode); | ||
807 | mutex_unlock(&inode->i_mutex); | ||
808 | } | ||
809 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
810 | inode->i_sb->s_bdev, iov, | ||
811 | offset, nr_segs, | ||
812 | ext4_get_block, NULL, NULL, 0); | ||
813 | } else { | ||
814 | ret = blockdev_direct_IO(rw, iocb, inode, iov, | ||
815 | offset, nr_segs, ext4_get_block); | ||
816 | |||
817 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
818 | loff_t isize = i_size_read(inode); | ||
819 | loff_t end = offset + iov_length(iov, nr_segs); | ||
820 | |||
821 | if (end > isize) | ||
822 | ext4_truncate_failed_write(inode); | ||
823 | } | ||
824 | } | ||
825 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
826 | goto retry; | ||
827 | |||
828 | if (orphan) { | ||
829 | int err; | ||
830 | |||
831 | /* Credits for sb + inode write */ | ||
832 | handle = ext4_journal_start(inode, 2); | ||
833 | if (IS_ERR(handle)) { | ||
834 | /* This is really bad luck. We've written the data | ||
835 | * but cannot extend i_size. Bail out and pretend | ||
836 | * the write failed... */ | ||
837 | ret = PTR_ERR(handle); | ||
838 | if (inode->i_nlink) | ||
839 | ext4_orphan_del(NULL, inode); | ||
840 | |||
841 | goto out; | ||
842 | } | ||
843 | if (inode->i_nlink) | ||
844 | ext4_orphan_del(handle, inode); | ||
845 | if (ret > 0) { | ||
846 | loff_t end = offset + ret; | ||
847 | if (end > inode->i_size) { | ||
848 | ei->i_disksize = end; | ||
849 | i_size_write(inode, end); | ||
850 | /* | ||
851 | * We're going to return a positive `ret' | ||
852 | * here due to non-zero-length I/O, so there's | ||
853 | * no way of reporting error returns from | ||
854 | * ext4_mark_inode_dirty() to userspace. So | ||
855 | * ignore it. | ||
856 | */ | ||
857 | ext4_mark_inode_dirty(handle, inode); | ||
858 | } | ||
859 | } | ||
860 | err = ext4_journal_stop(handle); | ||
861 | if (ret == 0) | ||
862 | ret = err; | ||
863 | } | ||
864 | out: | ||
865 | return ret; | ||
866 | } | ||
867 | |||
868 | /* | ||
869 | * Calculate the number of metadata blocks need to reserve | ||
870 | * to allocate a new block at @lblocks for non extent file based file | ||
871 | */ | ||
872 | int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | ||
873 | { | ||
874 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
875 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); | ||
876 | int blk_bits; | ||
877 | |||
878 | if (lblock < EXT4_NDIR_BLOCKS) | ||
879 | return 0; | ||
880 | |||
881 | lblock -= EXT4_NDIR_BLOCKS; | ||
882 | |||
883 | if (ei->i_da_metadata_calc_len && | ||
884 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
885 | ei->i_da_metadata_calc_len++; | ||
886 | return 0; | ||
887 | } | ||
888 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
889 | ei->i_da_metadata_calc_len = 1; | ||
890 | blk_bits = order_base_2(lblock); | ||
891 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
892 | } | ||
893 | |||
894 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
895 | { | ||
896 | int indirects; | ||
897 | |||
898 | /* if nrblocks are contiguous */ | ||
899 | if (chunk) { | ||
900 | /* | ||
901 | * With N contiguous data blocks, we need at most | ||
902 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
903 | * 2 dindirect blocks, and 1 tindirect block | ||
904 | */ | ||
905 | return DIV_ROUND_UP(nrblocks, | ||
906 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
907 | } | ||
908 | /* | ||
909 | * if nrblocks are not contiguous, worse case, each block touch | ||
910 | * a indirect block, and each indirect block touch a double indirect | ||
911 | * block, plus a triple indirect block | ||
912 | */ | ||
913 | indirects = nrblocks * 2 + 1; | ||
914 | return indirects; | ||
915 | } | ||
916 | |||
917 | /* | ||
918 | * Truncate transactions can be complex and absolutely huge. So we need to | ||
919 | * be able to restart the transaction at a conventient checkpoint to make | ||
920 | * sure we don't overflow the journal. | ||
921 | * | ||
922 | * start_transaction gets us a new handle for a truncate transaction, | ||
923 | * and extend_transaction tries to extend the existing one a bit. If | ||
924 | * extend fails, we need to propagate the failure up and restart the | ||
925 | * transaction in the top-level truncate loop. --sct | ||
926 | */ | ||
927 | static handle_t *start_transaction(struct inode *inode) | ||
928 | { | ||
929 | handle_t *result; | ||
930 | |||
931 | result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); | ||
932 | if (!IS_ERR(result)) | ||
933 | return result; | ||
934 | |||
935 | ext4_std_error(inode->i_sb, PTR_ERR(result)); | ||
936 | return result; | ||
937 | } | ||
938 | |||
939 | /* | ||
940 | * Try to extend this transaction for the purposes of truncation. | ||
941 | * | ||
942 | * Returns 0 if we managed to create more room. If we can't create more | ||
943 | * room, and the transaction must be restarted we return 1. | ||
944 | */ | ||
945 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | ||
946 | { | ||
947 | if (!ext4_handle_valid(handle)) | ||
948 | return 0; | ||
949 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) | ||
950 | return 0; | ||
951 | if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) | ||
952 | return 0; | ||
953 | return 1; | ||
954 | } | ||
955 | |||
956 | /* | ||
957 | * Probably it should be a library function... search for first non-zero word | ||
958 | * or memcmp with zero_page, whatever is better for particular architecture. | ||
959 | * Linus? | ||
960 | */ | ||
961 | static inline int all_zeroes(__le32 *p, __le32 *q) | ||
962 | { | ||
963 | while (p < q) | ||
964 | if (*p++) | ||
965 | return 0; | ||
966 | return 1; | ||
967 | } | ||
968 | |||
969 | /** | ||
970 | * ext4_find_shared - find the indirect blocks for partial truncation. | ||
971 | * @inode: inode in question | ||
972 | * @depth: depth of the affected branch | ||
973 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) | ||
974 | * @chain: place to store the pointers to partial indirect blocks | ||
975 | * @top: place to the (detached) top of branch | ||
976 | * | ||
977 | * This is a helper function used by ext4_truncate(). | ||
978 | * | ||
979 | * When we do truncate() we may have to clean the ends of several | ||
980 | * indirect blocks but leave the blocks themselves alive. Block is | ||
981 | * partially truncated if some data below the new i_size is referred | ||
982 | * from it (and it is on the path to the first completely truncated | ||
983 | * data block, indeed). We have to free the top of that path along | ||
984 | * with everything to the right of the path. Since no allocation | ||
985 | * past the truncation point is possible until ext4_truncate() | ||
986 | * finishes, we may safely do the latter, but top of branch may | ||
987 | * require special attention - pageout below the truncation point | ||
988 | * might try to populate it. | ||
989 | * | ||
990 | * We atomically detach the top of branch from the tree, store the | ||
991 | * block number of its root in *@top, pointers to buffer_heads of | ||
992 | * partially truncated blocks - in @chain[].bh and pointers to | ||
993 | * their last elements that should not be removed - in | ||
994 | * @chain[].p. Return value is the pointer to last filled element | ||
995 | * of @chain. | ||
996 | * | ||
997 | * The work left to caller to do the actual freeing of subtrees: | ||
998 | * a) free the subtree starting from *@top | ||
999 | * b) free the subtrees whose roots are stored in | ||
1000 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | ||
1001 | * c) free the subtrees growing from the inode past the @chain[0]. | ||
1002 | * (no partially truncated stuff there). */ | ||
1003 | |||
1004 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | ||
1005 | ext4_lblk_t offsets[4], Indirect chain[4], | ||
1006 | __le32 *top) | ||
1007 | { | ||
1008 | Indirect *partial, *p; | ||
1009 | int k, err; | ||
1010 | |||
1011 | *top = 0; | ||
1012 | /* Make k index the deepest non-null offset + 1 */ | ||
1013 | for (k = depth; k > 1 && !offsets[k-1]; k--) | ||
1014 | ; | ||
1015 | partial = ext4_get_branch(inode, k, offsets, chain, &err); | ||
1016 | /* Writer: pointers */ | ||
1017 | if (!partial) | ||
1018 | partial = chain + k-1; | ||
1019 | /* | ||
1020 | * If the branch acquired continuation since we've looked at it - | ||
1021 | * fine, it should all survive and (new) top doesn't belong to us. | ||
1022 | */ | ||
1023 | if (!partial->key && *partial->p) | ||
1024 | /* Writer: end */ | ||
1025 | goto no_top; | ||
1026 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) | ||
1027 | ; | ||
1028 | /* | ||
1029 | * OK, we've found the last block that must survive. The rest of our | ||
1030 | * branch should be detached before unlocking. However, if that rest | ||
1031 | * of branch is all ours and does not grow immediately from the inode | ||
1032 | * it's easier to cheat and just decrement partial->p. | ||
1033 | */ | ||
1034 | if (p == chain + k - 1 && p > chain) { | ||
1035 | p->p--; | ||
1036 | } else { | ||
1037 | *top = *p->p; | ||
1038 | /* Nope, don't do this in ext4. Must leave the tree intact */ | ||
1039 | #if 0 | ||
1040 | *p->p = 0; | ||
1041 | #endif | ||
1042 | } | ||
1043 | /* Writer: end */ | ||
1044 | |||
1045 | while (partial > p) { | ||
1046 | brelse(partial->bh); | ||
1047 | partial--; | ||
1048 | } | ||
1049 | no_top: | ||
1050 | return partial; | ||
1051 | } | ||
1052 | |||
1053 | /* | ||
1054 | * Zero a number of block pointers in either an inode or an indirect block. | ||
1055 | * If we restart the transaction we must again get write access to the | ||
1056 | * indirect block for further modification. | ||
1057 | * | ||
1058 | * We release `count' blocks on disk, but (last - first) may be greater | ||
1059 | * than `count' because there can be holes in there. | ||
1060 | * | ||
1061 | * Return 0 on success, 1 on invalid block range | ||
1062 | * and < 0 on fatal error. | ||
1063 | */ | ||
1064 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | ||
1065 | struct buffer_head *bh, | ||
1066 | ext4_fsblk_t block_to_free, | ||
1067 | unsigned long count, __le32 *first, | ||
1068 | __le32 *last) | ||
1069 | { | ||
1070 | __le32 *p; | ||
1071 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | ||
1072 | int err; | ||
1073 | |||
1074 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
1075 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
1076 | |||
1077 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
1078 | count)) { | ||
1079 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " | ||
1080 | "blocks %llu len %lu", | ||
1081 | (unsigned long long) block_to_free, count); | ||
1082 | return 1; | ||
1083 | } | ||
1084 | |||
1085 | if (try_to_extend_transaction(handle, inode)) { | ||
1086 | if (bh) { | ||
1087 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1088 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1089 | if (unlikely(err)) | ||
1090 | goto out_err; | ||
1091 | } | ||
1092 | err = ext4_mark_inode_dirty(handle, inode); | ||
1093 | if (unlikely(err)) | ||
1094 | goto out_err; | ||
1095 | err = ext4_truncate_restart_trans(handle, inode, | ||
1096 | ext4_blocks_for_truncate(inode)); | ||
1097 | if (unlikely(err)) | ||
1098 | goto out_err; | ||
1099 | if (bh) { | ||
1100 | BUFFER_TRACE(bh, "retaking write access"); | ||
1101 | err = ext4_journal_get_write_access(handle, bh); | ||
1102 | if (unlikely(err)) | ||
1103 | goto out_err; | ||
1104 | } | ||
1105 | } | ||
1106 | |||
1107 | for (p = first; p < last; p++) | ||
1108 | *p = 0; | ||
1109 | |||
1110 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); | ||
1111 | return 0; | ||
1112 | out_err: | ||
1113 | ext4_std_error(inode->i_sb, err); | ||
1114 | return err; | ||
1115 | } | ||
1116 | |||
1117 | /** | ||
1118 | * ext4_free_data - free a list of data blocks | ||
1119 | * @handle: handle for this transaction | ||
1120 | * @inode: inode we are dealing with | ||
1121 | * @this_bh: indirect buffer_head which contains *@first and *@last | ||
1122 | * @first: array of block numbers | ||
1123 | * @last: points immediately past the end of array | ||
1124 | * | ||
1125 | * We are freeing all blocks referred from that array (numbers are stored as | ||
1126 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | ||
1127 | * | ||
1128 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | ||
1129 | * blocks are contiguous then releasing them at one time will only affect one | ||
1130 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't | ||
1131 | * actually use a lot of journal space. | ||
1132 | * | ||
1133 | * @this_bh will be %NULL if @first and @last point into the inode's direct | ||
1134 | * block pointers. | ||
1135 | */ | ||
1136 | static void ext4_free_data(handle_t *handle, struct inode *inode, | ||
1137 | struct buffer_head *this_bh, | ||
1138 | __le32 *first, __le32 *last) | ||
1139 | { | ||
1140 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ | ||
1141 | unsigned long count = 0; /* Number of blocks in the run */ | ||
1142 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind | ||
1143 | corresponding to | ||
1144 | block_to_free */ | ||
1145 | ext4_fsblk_t nr; /* Current block # */ | ||
1146 | __le32 *p; /* Pointer into inode/ind | ||
1147 | for current block */ | ||
1148 | int err = 0; | ||
1149 | |||
1150 | if (this_bh) { /* For indirect block */ | ||
1151 | BUFFER_TRACE(this_bh, "get_write_access"); | ||
1152 | err = ext4_journal_get_write_access(handle, this_bh); | ||
1153 | /* Important: if we can't update the indirect pointers | ||
1154 | * to the blocks, we can't free them. */ | ||
1155 | if (err) | ||
1156 | return; | ||
1157 | } | ||
1158 | |||
1159 | for (p = first; p < last; p++) { | ||
1160 | nr = le32_to_cpu(*p); | ||
1161 | if (nr) { | ||
1162 | /* accumulate blocks to free if they're contiguous */ | ||
1163 | if (count == 0) { | ||
1164 | block_to_free = nr; | ||
1165 | block_to_free_p = p; | ||
1166 | count = 1; | ||
1167 | } else if (nr == block_to_free + count) { | ||
1168 | count++; | ||
1169 | } else { | ||
1170 | err = ext4_clear_blocks(handle, inode, this_bh, | ||
1171 | block_to_free, count, | ||
1172 | block_to_free_p, p); | ||
1173 | if (err) | ||
1174 | break; | ||
1175 | block_to_free = nr; | ||
1176 | block_to_free_p = p; | ||
1177 | count = 1; | ||
1178 | } | ||
1179 | } | ||
1180 | } | ||
1181 | |||
1182 | if (!err && count > 0) | ||
1183 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, | ||
1184 | count, block_to_free_p, p); | ||
1185 | if (err < 0) | ||
1186 | /* fatal error */ | ||
1187 | return; | ||
1188 | |||
1189 | if (this_bh) { | ||
1190 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | ||
1191 | |||
1192 | /* | ||
1193 | * The buffer head should have an attached journal head at this | ||
1194 | * point. However, if the data is corrupted and an indirect | ||
1195 | * block pointed to itself, it would have been detached when | ||
1196 | * the block was cleared. Check for this instead of OOPSing. | ||
1197 | */ | ||
1198 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | ||
1199 | ext4_handle_dirty_metadata(handle, inode, this_bh); | ||
1200 | else | ||
1201 | EXT4_ERROR_INODE(inode, | ||
1202 | "circular indirect block detected at " | ||
1203 | "block %llu", | ||
1204 | (unsigned long long) this_bh->b_blocknr); | ||
1205 | } | ||
1206 | } | ||
1207 | |||
1208 | /** | ||
1209 | * ext4_free_branches - free an array of branches | ||
1210 | * @handle: JBD handle for this transaction | ||
1211 | * @inode: inode we are dealing with | ||
1212 | * @parent_bh: the buffer_head which contains *@first and *@last | ||
1213 | * @first: array of block numbers | ||
1214 | * @last: pointer immediately past the end of array | ||
1215 | * @depth: depth of the branches to free | ||
1216 | * | ||
1217 | * We are freeing all blocks referred from these branches (numbers are | ||
1218 | * stored as little-endian 32-bit) and updating @inode->i_blocks | ||
1219 | * appropriately. | ||
1220 | */ | ||
1221 | static void ext4_free_branches(handle_t *handle, struct inode *inode, | ||
1222 | struct buffer_head *parent_bh, | ||
1223 | __le32 *first, __le32 *last, int depth) | ||
1224 | { | ||
1225 | ext4_fsblk_t nr; | ||
1226 | __le32 *p; | ||
1227 | |||
1228 | if (ext4_handle_is_aborted(handle)) | ||
1229 | return; | ||
1230 | |||
1231 | if (depth--) { | ||
1232 | struct buffer_head *bh; | ||
1233 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
1234 | p = last; | ||
1235 | while (--p >= first) { | ||
1236 | nr = le32_to_cpu(*p); | ||
1237 | if (!nr) | ||
1238 | continue; /* A hole */ | ||
1239 | |||
1240 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
1241 | nr, 1)) { | ||
1242 | EXT4_ERROR_INODE(inode, | ||
1243 | "invalid indirect mapped " | ||
1244 | "block %lu (level %d)", | ||
1245 | (unsigned long) nr, depth); | ||
1246 | break; | ||
1247 | } | ||
1248 | |||
1249 | /* Go read the buffer for the next level down */ | ||
1250 | bh = sb_bread(inode->i_sb, nr); | ||
1251 | |||
1252 | /* | ||
1253 | * A read failure? Report error and clear slot | ||
1254 | * (should be rare). | ||
1255 | */ | ||
1256 | if (!bh) { | ||
1257 | EXT4_ERROR_INODE_BLOCK(inode, nr, | ||
1258 | "Read failure"); | ||
1259 | continue; | ||
1260 | } | ||
1261 | |||
1262 | /* This zaps the entire block. Bottom up. */ | ||
1263 | BUFFER_TRACE(bh, "free child branches"); | ||
1264 | ext4_free_branches(handle, inode, bh, | ||
1265 | (__le32 *) bh->b_data, | ||
1266 | (__le32 *) bh->b_data + addr_per_block, | ||
1267 | depth); | ||
1268 | brelse(bh); | ||
1269 | |||
1270 | /* | ||
1271 | * Everything below this this pointer has been | ||
1272 | * released. Now let this top-of-subtree go. | ||
1273 | * | ||
1274 | * We want the freeing of this indirect block to be | ||
1275 | * atomic in the journal with the updating of the | ||
1276 | * bitmap block which owns it. So make some room in | ||
1277 | * the journal. | ||
1278 | * | ||
1279 | * We zero the parent pointer *after* freeing its | ||
1280 | * pointee in the bitmaps, so if extend_transaction() | ||
1281 | * for some reason fails to put the bitmap changes and | ||
1282 | * the release into the same transaction, recovery | ||
1283 | * will merely complain about releasing a free block, | ||
1284 | * rather than leaking blocks. | ||
1285 | */ | ||
1286 | if (ext4_handle_is_aborted(handle)) | ||
1287 | return; | ||
1288 | if (try_to_extend_transaction(handle, inode)) { | ||
1289 | ext4_mark_inode_dirty(handle, inode); | ||
1290 | ext4_truncate_restart_trans(handle, inode, | ||
1291 | ext4_blocks_for_truncate(inode)); | ||
1292 | } | ||
1293 | |||
1294 | /* | ||
1295 | * The forget flag here is critical because if | ||
1296 | * we are journaling (and not doing data | ||
1297 | * journaling), we have to make sure a revoke | ||
1298 | * record is written to prevent the journal | ||
1299 | * replay from overwriting the (former) | ||
1300 | * indirect block if it gets reallocated as a | ||
1301 | * data block. This must happen in the same | ||
1302 | * transaction where the data blocks are | ||
1303 | * actually freed. | ||
1304 | */ | ||
1305 | ext4_free_blocks(handle, inode, NULL, nr, 1, | ||
1306 | EXT4_FREE_BLOCKS_METADATA| | ||
1307 | EXT4_FREE_BLOCKS_FORGET); | ||
1308 | |||
1309 | if (parent_bh) { | ||
1310 | /* | ||
1311 | * The block which we have just freed is | ||
1312 | * pointed to by an indirect block: journal it | ||
1313 | */ | ||
1314 | BUFFER_TRACE(parent_bh, "get_write_access"); | ||
1315 | if (!ext4_journal_get_write_access(handle, | ||
1316 | parent_bh)){ | ||
1317 | *p = 0; | ||
1318 | BUFFER_TRACE(parent_bh, | ||
1319 | "call ext4_handle_dirty_metadata"); | ||
1320 | ext4_handle_dirty_metadata(handle, | ||
1321 | inode, | ||
1322 | parent_bh); | ||
1323 | } | ||
1324 | } | ||
1325 | } | ||
1326 | } else { | ||
1327 | /* We have reached the bottom of the tree. */ | ||
1328 | BUFFER_TRACE(parent_bh, "free data blocks"); | ||
1329 | ext4_free_data(handle, inode, parent_bh, first, last); | ||
1330 | } | ||
1331 | } | ||
1332 | |||
1333 | void ext4_ind_truncate(struct inode *inode) | ||
1334 | { | ||
1335 | handle_t *handle; | ||
1336 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1337 | __le32 *i_data = ei->i_data; | ||
1338 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
1339 | struct address_space *mapping = inode->i_mapping; | ||
1340 | ext4_lblk_t offsets[4]; | ||
1341 | Indirect chain[4]; | ||
1342 | Indirect *partial; | ||
1343 | __le32 nr = 0; | ||
1344 | int n = 0; | ||
1345 | ext4_lblk_t last_block, max_block; | ||
1346 | unsigned blocksize = inode->i_sb->s_blocksize; | ||
1347 | |||
1348 | handle = start_transaction(inode); | ||
1349 | if (IS_ERR(handle)) | ||
1350 | return; /* AKPM: return what? */ | ||
1351 | |||
1352 | last_block = (inode->i_size + blocksize-1) | ||
1353 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
1354 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
1355 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
1356 | |||
1357 | if (inode->i_size & (blocksize - 1)) | ||
1358 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | ||
1359 | goto out_stop; | ||
1360 | |||
1361 | if (last_block != max_block) { | ||
1362 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | ||
1363 | if (n == 0) | ||
1364 | goto out_stop; /* error */ | ||
1365 | } | ||
1366 | |||
1367 | /* | ||
1368 | * OK. This truncate is going to happen. We add the inode to the | ||
1369 | * orphan list, so that if this truncate spans multiple transactions, | ||
1370 | * and we crash, we will resume the truncate when the filesystem | ||
1371 | * recovers. It also marks the inode dirty, to catch the new size. | ||
1372 | * | ||
1373 | * Implication: the file must always be in a sane, consistent | ||
1374 | * truncatable state while each transaction commits. | ||
1375 | */ | ||
1376 | if (ext4_orphan_add(handle, inode)) | ||
1377 | goto out_stop; | ||
1378 | |||
1379 | /* | ||
1380 | * From here we block out all ext4_get_block() callers who want to | ||
1381 | * modify the block allocation tree. | ||
1382 | */ | ||
1383 | down_write(&ei->i_data_sem); | ||
1384 | |||
1385 | ext4_discard_preallocations(inode); | ||
1386 | |||
1387 | /* | ||
1388 | * The orphan list entry will now protect us from any crash which | ||
1389 | * occurs before the truncate completes, so it is now safe to propagate | ||
1390 | * the new, shorter inode size (held for now in i_size) into the | ||
1391 | * on-disk inode. We do this via i_disksize, which is the value which | ||
1392 | * ext4 *really* writes onto the disk inode. | ||
1393 | */ | ||
1394 | ei->i_disksize = inode->i_size; | ||
1395 | |||
1396 | if (last_block == max_block) { | ||
1397 | /* | ||
1398 | * It is unnecessary to free any data blocks if last_block is | ||
1399 | * equal to the indirect block limit. | ||
1400 | */ | ||
1401 | goto out_unlock; | ||
1402 | } else if (n == 1) { /* direct blocks */ | ||
1403 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | ||
1404 | i_data + EXT4_NDIR_BLOCKS); | ||
1405 | goto do_indirects; | ||
1406 | } | ||
1407 | |||
1408 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | ||
1409 | /* Kill the top of shared branch (not detached) */ | ||
1410 | if (nr) { | ||
1411 | if (partial == chain) { | ||
1412 | /* Shared branch grows from the inode */ | ||
1413 | ext4_free_branches(handle, inode, NULL, | ||
1414 | &nr, &nr+1, (chain+n-1) - partial); | ||
1415 | *partial->p = 0; | ||
1416 | /* | ||
1417 | * We mark the inode dirty prior to restart, | ||
1418 | * and prior to stop. No need for it here. | ||
1419 | */ | ||
1420 | } else { | ||
1421 | /* Shared branch grows from an indirect block */ | ||
1422 | BUFFER_TRACE(partial->bh, "get_write_access"); | ||
1423 | ext4_free_branches(handle, inode, partial->bh, | ||
1424 | partial->p, | ||
1425 | partial->p+1, (chain+n-1) - partial); | ||
1426 | } | ||
1427 | } | ||
1428 | /* Clear the ends of indirect blocks on the shared branch */ | ||
1429 | while (partial > chain) { | ||
1430 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, | ||
1431 | (__le32*)partial->bh->b_data+addr_per_block, | ||
1432 | (chain+n-1) - partial); | ||
1433 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
1434 | brelse(partial->bh); | ||
1435 | partial--; | ||
1436 | } | ||
1437 | do_indirects: | ||
1438 | /* Kill the remaining (whole) subtrees */ | ||
1439 | switch (offsets[0]) { | ||
1440 | default: | ||
1441 | nr = i_data[EXT4_IND_BLOCK]; | ||
1442 | if (nr) { | ||
1443 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | ||
1444 | i_data[EXT4_IND_BLOCK] = 0; | ||
1445 | } | ||
1446 | case EXT4_IND_BLOCK: | ||
1447 | nr = i_data[EXT4_DIND_BLOCK]; | ||
1448 | if (nr) { | ||
1449 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | ||
1450 | i_data[EXT4_DIND_BLOCK] = 0; | ||
1451 | } | ||
1452 | case EXT4_DIND_BLOCK: | ||
1453 | nr = i_data[EXT4_TIND_BLOCK]; | ||
1454 | if (nr) { | ||
1455 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | ||
1456 | i_data[EXT4_TIND_BLOCK] = 0; | ||
1457 | } | ||
1458 | case EXT4_TIND_BLOCK: | ||
1459 | ; | ||
1460 | } | ||
1461 | |||
1462 | out_unlock: | ||
1463 | up_write(&ei->i_data_sem); | ||
1464 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
1465 | ext4_mark_inode_dirty(handle, inode); | ||
1466 | |||
1467 | /* | ||
1468 | * In a multi-transaction truncate, we only make the final transaction | ||
1469 | * synchronous | ||
1470 | */ | ||
1471 | if (IS_SYNC(inode)) | ||
1472 | ext4_handle_sync(handle); | ||
1473 | out_stop: | ||
1474 | /* | ||
1475 | * If this was a simple ftruncate(), and the file will remain alive | ||
1476 | * then we need to clear up the orphan record which we created above. | ||
1477 | * However, if this was a real unlink then we were called by | ||
1478 | * ext4_delete_inode(), and we allow that function to clean up the | ||
1479 | * orphan info for us. | ||
1480 | */ | ||
1481 | if (inode->i_nlink) | ||
1482 | ext4_orphan_del(handle, inode); | ||
1483 | |||
1484 | ext4_journal_stop(handle); | ||
1485 | trace_ext4_truncate_exit(inode); | ||
1486 | } | ||
1487 | |||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 678cde834f19..18d2558b7624 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -12,10 +12,6 @@ | |||
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | * | 14 | * |
15 | * Goal-directed block allocation by Stephen Tweedie | ||
16 | * (sct@redhat.com), 1993, 1998 | ||
17 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | 15 | * 64-bit file support on 64-bit platforms by Jakub Jelinek |
20 | * (jj@sunsite.ms.mff.cuni.cz) | 16 | * (jj@sunsite.ms.mff.cuni.cz) |
21 | * | 17 | * |
@@ -47,6 +43,7 @@ | |||
47 | #include "xattr.h" | 43 | #include "xattr.h" |
48 | #include "acl.h" | 44 | #include "acl.h" |
49 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | #include "truncate.h" | ||
50 | 47 | ||
51 | #include <trace/events/ext4.h> | 48 | #include <trace/events/ext4.h> |
52 | 49 | ||
@@ -89,72 +86,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) | |||
89 | } | 86 | } |
90 | 87 | ||
91 | /* | 88 | /* |
92 | * Work out how many blocks we need to proceed with the next chunk of a | ||
93 | * truncate transaction. | ||
94 | */ | ||
95 | static unsigned long blocks_for_truncate(struct inode *inode) | ||
96 | { | ||
97 | ext4_lblk_t needed; | ||
98 | |||
99 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); | ||
100 | |||
101 | /* Give ourselves just enough room to cope with inodes in which | ||
102 | * i_blocks is corrupt: we've seen disk corruptions in the past | ||
103 | * which resulted in random data in an inode which looked enough | ||
104 | * like a regular file for ext4 to try to delete it. Things | ||
105 | * will go a bit crazy if that happens, but at least we should | ||
106 | * try not to panic the whole kernel. */ | ||
107 | if (needed < 2) | ||
108 | needed = 2; | ||
109 | |||
110 | /* But we need to bound the transaction so we don't overflow the | ||
111 | * journal. */ | ||
112 | if (needed > EXT4_MAX_TRANS_DATA) | ||
113 | needed = EXT4_MAX_TRANS_DATA; | ||
114 | |||
115 | return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * Truncate transactions can be complex and absolutely huge. So we need to | ||
120 | * be able to restart the transaction at a conventient checkpoint to make | ||
121 | * sure we don't overflow the journal. | ||
122 | * | ||
123 | * start_transaction gets us a new handle for a truncate transaction, | ||
124 | * and extend_transaction tries to extend the existing one a bit. If | ||
125 | * extend fails, we need to propagate the failure up and restart the | ||
126 | * transaction in the top-level truncate loop. --sct | ||
127 | */ | ||
128 | static handle_t *start_transaction(struct inode *inode) | ||
129 | { | ||
130 | handle_t *result; | ||
131 | |||
132 | result = ext4_journal_start(inode, blocks_for_truncate(inode)); | ||
133 | if (!IS_ERR(result)) | ||
134 | return result; | ||
135 | |||
136 | ext4_std_error(inode->i_sb, PTR_ERR(result)); | ||
137 | return result; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Try to extend this transaction for the purposes of truncation. | ||
142 | * | ||
143 | * Returns 0 if we managed to create more room. If we can't create more | ||
144 | * room, and the transaction must be restarted we return 1. | ||
145 | */ | ||
146 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | ||
147 | { | ||
148 | if (!ext4_handle_valid(handle)) | ||
149 | return 0; | ||
150 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) | ||
151 | return 0; | ||
152 | if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) | ||
153 | return 0; | ||
154 | return 1; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Restart the transaction associated with *handle. This does a commit, | 89 | * Restart the transaction associated with *handle. This does a commit, |
159 | * so before we call here everything must be consistently dirtied against | 90 | * so before we call here everything must be consistently dirtied against |
160 | * this transaction. | 91 | * this transaction. |
@@ -189,7 +120,37 @@ void ext4_evict_inode(struct inode *inode) | |||
189 | int err; | 120 | int err; |
190 | 121 | ||
191 | trace_ext4_evict_inode(inode); | 122 | trace_ext4_evict_inode(inode); |
123 | |||
124 | ext4_ioend_wait(inode); | ||
125 | |||
192 | if (inode->i_nlink) { | 126 | if (inode->i_nlink) { |
127 | /* | ||
128 | * When journalling data dirty buffers are tracked only in the | ||
129 | * journal. So although mm thinks everything is clean and | ||
130 | * ready for reaping the inode might still have some pages to | ||
131 | * write in the running transaction or waiting to be | ||
132 | * checkpointed. Thus calling jbd2_journal_invalidatepage() | ||
133 | * (via truncate_inode_pages()) to discard these buffers can | ||
134 | * cause data loss. Also even if we did not discard these | ||
135 | * buffers, we would have no way to find them after the inode | ||
136 | * is reaped and thus user could see stale data if he tries to | ||
137 | * read them before the transaction is checkpointed. So be | ||
138 | * careful and force everything to disk here... We use | ||
139 | * ei->i_datasync_tid to store the newest transaction | ||
140 | * containing inode's data. | ||
141 | * | ||
142 | * Note that directories do not have this problem because they | ||
143 | * don't use page cache. | ||
144 | */ | ||
145 | if (ext4_should_journal_data(inode) && | ||
146 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | ||
147 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
148 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; | ||
149 | |||
150 | jbd2_log_start_commit(journal, commit_tid); | ||
151 | jbd2_log_wait_commit(journal, commit_tid); | ||
152 | filemap_write_and_wait(&inode->i_data); | ||
153 | } | ||
193 | truncate_inode_pages(&inode->i_data, 0); | 154 | truncate_inode_pages(&inode->i_data, 0); |
194 | goto no_delete; | 155 | goto no_delete; |
195 | } | 156 | } |
@@ -204,7 +165,7 @@ void ext4_evict_inode(struct inode *inode) | |||
204 | if (is_bad_inode(inode)) | 165 | if (is_bad_inode(inode)) |
205 | goto no_delete; | 166 | goto no_delete; |
206 | 167 | ||
207 | handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); | 168 | handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); |
208 | if (IS_ERR(handle)) { | 169 | if (IS_ERR(handle)) { |
209 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); | 170 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); |
210 | /* | 171 | /* |
@@ -277,793 +238,6 @@ no_delete: | |||
277 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ | 238 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
278 | } | 239 | } |
279 | 240 | ||
280 | typedef struct { | ||
281 | __le32 *p; | ||
282 | __le32 key; | ||
283 | struct buffer_head *bh; | ||
284 | } Indirect; | ||
285 | |||
286 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | ||
287 | { | ||
288 | p->key = *(p->p = v); | ||
289 | p->bh = bh; | ||
290 | } | ||
291 | |||
292 | /** | ||
293 | * ext4_block_to_path - parse the block number into array of offsets | ||
294 | * @inode: inode in question (we are only interested in its superblock) | ||
295 | * @i_block: block number to be parsed | ||
296 | * @offsets: array to store the offsets in | ||
297 | * @boundary: set this non-zero if the referred-to block is likely to be | ||
298 | * followed (on disk) by an indirect block. | ||
299 | * | ||
300 | * To store the locations of file's data ext4 uses a data structure common | ||
301 | * for UNIX filesystems - tree of pointers anchored in the inode, with | ||
302 | * data blocks at leaves and indirect blocks in intermediate nodes. | ||
303 | * This function translates the block number into path in that tree - | ||
304 | * return value is the path length and @offsets[n] is the offset of | ||
305 | * pointer to (n+1)th node in the nth one. If @block is out of range | ||
306 | * (negative or too large) warning is printed and zero returned. | ||
307 | * | ||
308 | * Note: function doesn't find node addresses, so no IO is needed. All | ||
309 | * we need to know is the capacity of indirect blocks (taken from the | ||
310 | * inode->i_sb). | ||
311 | */ | ||
312 | |||
313 | /* | ||
314 | * Portability note: the last comparison (check that we fit into triple | ||
315 | * indirect block) is spelled differently, because otherwise on an | ||
316 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | ||
317 | * if our filesystem had 8Kb blocks. We might use long long, but that would | ||
318 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | ||
319 | * i_block would have to be negative in the very beginning, so we would not | ||
320 | * get there at all. | ||
321 | */ | ||
322 | |||
323 | static int ext4_block_to_path(struct inode *inode, | ||
324 | ext4_lblk_t i_block, | ||
325 | ext4_lblk_t offsets[4], int *boundary) | ||
326 | { | ||
327 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
328 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | ||
329 | const long direct_blocks = EXT4_NDIR_BLOCKS, | ||
330 | indirect_blocks = ptrs, | ||
331 | double_blocks = (1 << (ptrs_bits * 2)); | ||
332 | int n = 0; | ||
333 | int final = 0; | ||
334 | |||
335 | if (i_block < direct_blocks) { | ||
336 | offsets[n++] = i_block; | ||
337 | final = direct_blocks; | ||
338 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | ||
339 | offsets[n++] = EXT4_IND_BLOCK; | ||
340 | offsets[n++] = i_block; | ||
341 | final = ptrs; | ||
342 | } else if ((i_block -= indirect_blocks) < double_blocks) { | ||
343 | offsets[n++] = EXT4_DIND_BLOCK; | ||
344 | offsets[n++] = i_block >> ptrs_bits; | ||
345 | offsets[n++] = i_block & (ptrs - 1); | ||
346 | final = ptrs; | ||
347 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | ||
348 | offsets[n++] = EXT4_TIND_BLOCK; | ||
349 | offsets[n++] = i_block >> (ptrs_bits * 2); | ||
350 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | ||
351 | offsets[n++] = i_block & (ptrs - 1); | ||
352 | final = ptrs; | ||
353 | } else { | ||
354 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", | ||
355 | i_block + direct_blocks + | ||
356 | indirect_blocks + double_blocks, inode->i_ino); | ||
357 | } | ||
358 | if (boundary) | ||
359 | *boundary = final - 1 - (i_block & (ptrs - 1)); | ||
360 | return n; | ||
361 | } | ||
362 | |||
363 | static int __ext4_check_blockref(const char *function, unsigned int line, | ||
364 | struct inode *inode, | ||
365 | __le32 *p, unsigned int max) | ||
366 | { | ||
367 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
368 | __le32 *bref = p; | ||
369 | unsigned int blk; | ||
370 | |||
371 | while (bref < p+max) { | ||
372 | blk = le32_to_cpu(*bref++); | ||
373 | if (blk && | ||
374 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
375 | blk, 1))) { | ||
376 | es->s_last_error_block = cpu_to_le64(blk); | ||
377 | ext4_error_inode(inode, function, line, blk, | ||
378 | "invalid block"); | ||
379 | return -EIO; | ||
380 | } | ||
381 | } | ||
382 | return 0; | ||
383 | } | ||
384 | |||
385 | |||
386 | #define ext4_check_indirect_blockref(inode, bh) \ | ||
387 | __ext4_check_blockref(__func__, __LINE__, inode, \ | ||
388 | (__le32 *)(bh)->b_data, \ | ||
389 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | ||
390 | |||
391 | #define ext4_check_inode_blockref(inode) \ | ||
392 | __ext4_check_blockref(__func__, __LINE__, inode, \ | ||
393 | EXT4_I(inode)->i_data, \ | ||
394 | EXT4_NDIR_BLOCKS) | ||
395 | |||
396 | /** | ||
397 | * ext4_get_branch - read the chain of indirect blocks leading to data | ||
398 | * @inode: inode in question | ||
399 | * @depth: depth of the chain (1 - direct pointer, etc.) | ||
400 | * @offsets: offsets of pointers in inode/indirect blocks | ||
401 | * @chain: place to store the result | ||
402 | * @err: here we store the error value | ||
403 | * | ||
404 | * Function fills the array of triples <key, p, bh> and returns %NULL | ||
405 | * if everything went OK or the pointer to the last filled triple | ||
406 | * (incomplete one) otherwise. Upon the return chain[i].key contains | ||
407 | * the number of (i+1)-th block in the chain (as it is stored in memory, | ||
408 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | ||
409 | * number (it points into struct inode for i==0 and into the bh->b_data | ||
410 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | ||
411 | * block for i>0 and NULL for i==0. In other words, it holds the block | ||
412 | * numbers of the chain, addresses they were taken from (and where we can | ||
413 | * verify that chain did not change) and buffer_heads hosting these | ||
414 | * numbers. | ||
415 | * | ||
416 | * Function stops when it stumbles upon zero pointer (absent block) | ||
417 | * (pointer to last triple returned, *@err == 0) | ||
418 | * or when it gets an IO error reading an indirect block | ||
419 | * (ditto, *@err == -EIO) | ||
420 | * or when it reads all @depth-1 indirect blocks successfully and finds | ||
421 | * the whole chain, all way to the data (returns %NULL, *err == 0). | ||
422 | * | ||
423 | * Need to be called with | ||
424 | * down_read(&EXT4_I(inode)->i_data_sem) | ||
425 | */ | ||
426 | static Indirect *ext4_get_branch(struct inode *inode, int depth, | ||
427 | ext4_lblk_t *offsets, | ||
428 | Indirect chain[4], int *err) | ||
429 | { | ||
430 | struct super_block *sb = inode->i_sb; | ||
431 | Indirect *p = chain; | ||
432 | struct buffer_head *bh; | ||
433 | |||
434 | *err = 0; | ||
435 | /* i_data is not going away, no lock needed */ | ||
436 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); | ||
437 | if (!p->key) | ||
438 | goto no_block; | ||
439 | while (--depth) { | ||
440 | bh = sb_getblk(sb, le32_to_cpu(p->key)); | ||
441 | if (unlikely(!bh)) | ||
442 | goto failure; | ||
443 | |||
444 | if (!bh_uptodate_or_lock(bh)) { | ||
445 | if (bh_submit_read(bh) < 0) { | ||
446 | put_bh(bh); | ||
447 | goto failure; | ||
448 | } | ||
449 | /* validate block references */ | ||
450 | if (ext4_check_indirect_blockref(inode, bh)) { | ||
451 | put_bh(bh); | ||
452 | goto failure; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | ||
457 | /* Reader: end */ | ||
458 | if (!p->key) | ||
459 | goto no_block; | ||
460 | } | ||
461 | return NULL; | ||
462 | |||
463 | failure: | ||
464 | *err = -EIO; | ||
465 | no_block: | ||
466 | return p; | ||
467 | } | ||
468 | |||
469 | /** | ||
470 | * ext4_find_near - find a place for allocation with sufficient locality | ||
471 | * @inode: owner | ||
472 | * @ind: descriptor of indirect block. | ||
473 | * | ||
474 | * This function returns the preferred place for block allocation. | ||
475 | * It is used when heuristic for sequential allocation fails. | ||
476 | * Rules are: | ||
477 | * + if there is a block to the left of our position - allocate near it. | ||
478 | * + if pointer will live in indirect block - allocate near that block. | ||
479 | * + if pointer will live in inode - allocate in the same | ||
480 | * cylinder group. | ||
481 | * | ||
482 | * In the latter case we colour the starting block by the callers PID to | ||
483 | * prevent it from clashing with concurrent allocations for a different inode | ||
484 | * in the same block group. The PID is used here so that functionally related | ||
485 | * files will be close-by on-disk. | ||
486 | * | ||
487 | * Caller must make sure that @ind is valid and will stay that way. | ||
488 | */ | ||
489 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | ||
490 | { | ||
491 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
492 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | ||
493 | __le32 *p; | ||
494 | ext4_fsblk_t bg_start; | ||
495 | ext4_fsblk_t last_block; | ||
496 | ext4_grpblk_t colour; | ||
497 | ext4_group_t block_group; | ||
498 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
499 | |||
500 | /* Try to find previous block */ | ||
501 | for (p = ind->p - 1; p >= start; p--) { | ||
502 | if (*p) | ||
503 | return le32_to_cpu(*p); | ||
504 | } | ||
505 | |||
506 | /* No such thing, so let's try location of indirect block */ | ||
507 | if (ind->bh) | ||
508 | return ind->bh->b_blocknr; | ||
509 | |||
510 | /* | ||
511 | * It is going to be referred to from the inode itself? OK, just put it | ||
512 | * into the same cylinder group then. | ||
513 | */ | ||
514 | block_group = ei->i_block_group; | ||
515 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
516 | block_group &= ~(flex_size-1); | ||
517 | if (S_ISREG(inode->i_mode)) | ||
518 | block_group++; | ||
519 | } | ||
520 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | ||
521 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | ||
522 | |||
523 | /* | ||
524 | * If we are doing delayed allocation, we don't need take | ||
525 | * colour into account. | ||
526 | */ | ||
527 | if (test_opt(inode->i_sb, DELALLOC)) | ||
528 | return bg_start; | ||
529 | |||
530 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | ||
531 | colour = (current->pid % 16) * | ||
532 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | ||
533 | else | ||
534 | colour = (current->pid % 16) * ((last_block - bg_start) / 16); | ||
535 | return bg_start + colour; | ||
536 | } | ||
537 | |||
538 | /** | ||
539 | * ext4_find_goal - find a preferred place for allocation. | ||
540 | * @inode: owner | ||
541 | * @block: block we want | ||
542 | * @partial: pointer to the last triple within a chain | ||
543 | * | ||
544 | * Normally this function find the preferred place for block allocation, | ||
545 | * returns it. | ||
546 | * Because this is only used for non-extent files, we limit the block nr | ||
547 | * to 32 bits. | ||
548 | */ | ||
549 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | ||
550 | Indirect *partial) | ||
551 | { | ||
552 | ext4_fsblk_t goal; | ||
553 | |||
554 | /* | ||
555 | * XXX need to get goal block from mballoc's data structures | ||
556 | */ | ||
557 | |||
558 | goal = ext4_find_near(inode, partial); | ||
559 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
560 | return goal; | ||
561 | } | ||
562 | |||
563 | /** | ||
564 | * ext4_blks_to_allocate - Look up the block map and count the number | ||
565 | * of direct blocks need to be allocated for the given branch. | ||
566 | * | ||
567 | * @branch: chain of indirect blocks | ||
568 | * @k: number of blocks need for indirect blocks | ||
569 | * @blks: number of data blocks to be mapped. | ||
570 | * @blocks_to_boundary: the offset in the indirect block | ||
571 | * | ||
572 | * return the total number of blocks to be allocate, including the | ||
573 | * direct and indirect blocks. | ||
574 | */ | ||
575 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | ||
576 | int blocks_to_boundary) | ||
577 | { | ||
578 | unsigned int count = 0; | ||
579 | |||
580 | /* | ||
581 | * Simple case, [t,d]Indirect block(s) has not allocated yet | ||
582 | * then it's clear blocks on that path have not allocated | ||
583 | */ | ||
584 | if (k > 0) { | ||
585 | /* right now we don't handle cross boundary allocation */ | ||
586 | if (blks < blocks_to_boundary + 1) | ||
587 | count += blks; | ||
588 | else | ||
589 | count += blocks_to_boundary + 1; | ||
590 | return count; | ||
591 | } | ||
592 | |||
593 | count++; | ||
594 | while (count < blks && count <= blocks_to_boundary && | ||
595 | le32_to_cpu(*(branch[0].p + count)) == 0) { | ||
596 | count++; | ||
597 | } | ||
598 | return count; | ||
599 | } | ||
600 | |||
601 | /** | ||
602 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch | ||
603 | * @handle: handle for this transaction | ||
604 | * @inode: inode which needs allocated blocks | ||
605 | * @iblock: the logical block to start allocated at | ||
606 | * @goal: preferred physical block of allocation | ||
607 | * @indirect_blks: the number of blocks need to allocate for indirect | ||
608 | * blocks | ||
609 | * @blks: number of desired blocks | ||
610 | * @new_blocks: on return it will store the new block numbers for | ||
611 | * the indirect blocks(if needed) and the first direct block, | ||
612 | * @err: on return it will store the error code | ||
613 | * | ||
614 | * This function will return the number of blocks allocated as | ||
615 | * requested by the passed-in parameters. | ||
616 | */ | ||
617 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | ||
618 | ext4_lblk_t iblock, ext4_fsblk_t goal, | ||
619 | int indirect_blks, int blks, | ||
620 | ext4_fsblk_t new_blocks[4], int *err) | ||
621 | { | ||
622 | struct ext4_allocation_request ar; | ||
623 | int target, i; | ||
624 | unsigned long count = 0, blk_allocated = 0; | ||
625 | int index = 0; | ||
626 | ext4_fsblk_t current_block = 0; | ||
627 | int ret = 0; | ||
628 | |||
629 | /* | ||
630 | * Here we try to allocate the requested multiple blocks at once, | ||
631 | * on a best-effort basis. | ||
632 | * To build a branch, we should allocate blocks for | ||
633 | * the indirect blocks(if not allocated yet), and at least | ||
634 | * the first direct block of this branch. That's the | ||
635 | * minimum number of blocks need to allocate(required) | ||
636 | */ | ||
637 | /* first we try to allocate the indirect blocks */ | ||
638 | target = indirect_blks; | ||
639 | while (target > 0) { | ||
640 | count = target; | ||
641 | /* allocating blocks for indirect blocks and direct blocks */ | ||
642 | current_block = ext4_new_meta_blocks(handle, inode, goal, | ||
643 | 0, &count, err); | ||
644 | if (*err) | ||
645 | goto failed_out; | ||
646 | |||
647 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
648 | EXT4_ERROR_INODE(inode, | ||
649 | "current_block %llu + count %lu > %d!", | ||
650 | current_block, count, | ||
651 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
652 | *err = -EIO; | ||
653 | goto failed_out; | ||
654 | } | ||
655 | |||
656 | target -= count; | ||
657 | /* allocate blocks for indirect blocks */ | ||
658 | while (index < indirect_blks && count) { | ||
659 | new_blocks[index++] = current_block++; | ||
660 | count--; | ||
661 | } | ||
662 | if (count > 0) { | ||
663 | /* | ||
664 | * save the new block number | ||
665 | * for the first direct block | ||
666 | */ | ||
667 | new_blocks[index] = current_block; | ||
668 | printk(KERN_INFO "%s returned more blocks than " | ||
669 | "requested\n", __func__); | ||
670 | WARN_ON(1); | ||
671 | break; | ||
672 | } | ||
673 | } | ||
674 | |||
675 | target = blks - count ; | ||
676 | blk_allocated = count; | ||
677 | if (!target) | ||
678 | goto allocated; | ||
679 | /* Now allocate data blocks */ | ||
680 | memset(&ar, 0, sizeof(ar)); | ||
681 | ar.inode = inode; | ||
682 | ar.goal = goal; | ||
683 | ar.len = target; | ||
684 | ar.logical = iblock; | ||
685 | if (S_ISREG(inode->i_mode)) | ||
686 | /* enable in-core preallocation only for regular files */ | ||
687 | ar.flags = EXT4_MB_HINT_DATA; | ||
688 | |||
689 | current_block = ext4_mb_new_blocks(handle, &ar, err); | ||
690 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { | ||
691 | EXT4_ERROR_INODE(inode, | ||
692 | "current_block %llu + ar.len %d > %d!", | ||
693 | current_block, ar.len, | ||
694 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
695 | *err = -EIO; | ||
696 | goto failed_out; | ||
697 | } | ||
698 | |||
699 | if (*err && (target == blks)) { | ||
700 | /* | ||
701 | * if the allocation failed and we didn't allocate | ||
702 | * any blocks before | ||
703 | */ | ||
704 | goto failed_out; | ||
705 | } | ||
706 | if (!*err) { | ||
707 | if (target == blks) { | ||
708 | /* | ||
709 | * save the new block number | ||
710 | * for the first direct block | ||
711 | */ | ||
712 | new_blocks[index] = current_block; | ||
713 | } | ||
714 | blk_allocated += ar.len; | ||
715 | } | ||
716 | allocated: | ||
717 | /* total number of blocks allocated for direct blocks */ | ||
718 | ret = blk_allocated; | ||
719 | *err = 0; | ||
720 | return ret; | ||
721 | failed_out: | ||
722 | for (i = 0; i < index; i++) | ||
723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
724 | return ret; | ||
725 | } | ||
726 | |||
727 | /** | ||
728 | * ext4_alloc_branch - allocate and set up a chain of blocks. | ||
729 | * @handle: handle for this transaction | ||
730 | * @inode: owner | ||
731 | * @indirect_blks: number of allocated indirect blocks | ||
732 | * @blks: number of allocated direct blocks | ||
733 | * @goal: preferred place for allocation | ||
734 | * @offsets: offsets (in the blocks) to store the pointers to next. | ||
735 | * @branch: place to store the chain in. | ||
736 | * | ||
737 | * This function allocates blocks, zeroes out all but the last one, | ||
738 | * links them into chain and (if we are synchronous) writes them to disk. | ||
739 | * In other words, it prepares a branch that can be spliced onto the | ||
740 | * inode. It stores the information about that chain in the branch[], in | ||
741 | * the same format as ext4_get_branch() would do. We are calling it after | ||
742 | * we had read the existing part of chain and partial points to the last | ||
743 | * triple of that (one with zero ->key). Upon the exit we have the same | ||
744 | * picture as after the successful ext4_get_block(), except that in one | ||
745 | * place chain is disconnected - *branch->p is still zero (we did not | ||
746 | * set the last link), but branch->key contains the number that should | ||
747 | * be placed into *branch->p to fill that gap. | ||
748 | * | ||
749 | * If allocation fails we free all blocks we've allocated (and forget | ||
750 | * their buffer_heads) and return the error value the from failed | ||
751 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | ||
752 | * as described above and return 0. | ||
753 | */ | ||
754 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | ||
755 | ext4_lblk_t iblock, int indirect_blks, | ||
756 | int *blks, ext4_fsblk_t goal, | ||
757 | ext4_lblk_t *offsets, Indirect *branch) | ||
758 | { | ||
759 | int blocksize = inode->i_sb->s_blocksize; | ||
760 | int i, n = 0; | ||
761 | int err = 0; | ||
762 | struct buffer_head *bh; | ||
763 | int num; | ||
764 | ext4_fsblk_t new_blocks[4]; | ||
765 | ext4_fsblk_t current_block; | ||
766 | |||
767 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, | ||
768 | *blks, new_blocks, &err); | ||
769 | if (err) | ||
770 | return err; | ||
771 | |||
772 | branch[0].key = cpu_to_le32(new_blocks[0]); | ||
773 | /* | ||
774 | * metadata blocks and data blocks are allocated. | ||
775 | */ | ||
776 | for (n = 1; n <= indirect_blks; n++) { | ||
777 | /* | ||
778 | * Get buffer_head for parent block, zero it out | ||
779 | * and set the pointer to new one, then send | ||
780 | * parent to disk. | ||
781 | */ | ||
782 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | ||
783 | if (unlikely(!bh)) { | ||
784 | err = -EIO; | ||
785 | goto failed; | ||
786 | } | ||
787 | |||
788 | branch[n].bh = bh; | ||
789 | lock_buffer(bh); | ||
790 | BUFFER_TRACE(bh, "call get_create_access"); | ||
791 | err = ext4_journal_get_create_access(handle, bh); | ||
792 | if (err) { | ||
793 | /* Don't brelse(bh) here; it's done in | ||
794 | * ext4_journal_forget() below */ | ||
795 | unlock_buffer(bh); | ||
796 | goto failed; | ||
797 | } | ||
798 | |||
799 | memset(bh->b_data, 0, blocksize); | ||
800 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | ||
801 | branch[n].key = cpu_to_le32(new_blocks[n]); | ||
802 | *branch[n].p = branch[n].key; | ||
803 | if (n == indirect_blks) { | ||
804 | current_block = new_blocks[n]; | ||
805 | /* | ||
806 | * End of chain, update the last new metablock of | ||
807 | * the chain to point to the new allocated | ||
808 | * data blocks numbers | ||
809 | */ | ||
810 | for (i = 1; i < num; i++) | ||
811 | *(branch[n].p + i) = cpu_to_le32(++current_block); | ||
812 | } | ||
813 | BUFFER_TRACE(bh, "marking uptodate"); | ||
814 | set_buffer_uptodate(bh); | ||
815 | unlock_buffer(bh); | ||
816 | |||
817 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
818 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
819 | if (err) | ||
820 | goto failed; | ||
821 | } | ||
822 | *blks = num; | ||
823 | return err; | ||
824 | failed: | ||
825 | /* Allocation failed, free what we already allocated */ | ||
826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); | ||
827 | for (i = 1; i <= n ; i++) { | ||
828 | /* | ||
829 | * branch[i].bh is newly allocated, so there is no | ||
830 | * need to revoke the block, which is why we don't | ||
831 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
832 | */ | ||
833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, | ||
834 | EXT4_FREE_BLOCKS_FORGET); | ||
835 | } | ||
836 | for (i = n+1; i < indirect_blks; i++) | ||
837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | ||
838 | |||
839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); | ||
840 | |||
841 | return err; | ||
842 | } | ||
843 | |||
844 | /** | ||
845 | * ext4_splice_branch - splice the allocated branch onto inode. | ||
846 | * @handle: handle for this transaction | ||
847 | * @inode: owner | ||
848 | * @block: (logical) number of block we are adding | ||
849 | * @chain: chain of indirect blocks (with a missing link - see | ||
850 | * ext4_alloc_branch) | ||
851 | * @where: location of missing link | ||
852 | * @num: number of indirect blocks we are adding | ||
853 | * @blks: number of direct blocks we are adding | ||
854 | * | ||
855 | * This function fills the missing link and does all housekeeping needed in | ||
856 | * inode (->i_blocks, etc.). In case of success we end up with the full | ||
857 | * chain to new block and return 0. | ||
858 | */ | ||
859 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | ||
860 | ext4_lblk_t block, Indirect *where, int num, | ||
861 | int blks) | ||
862 | { | ||
863 | int i; | ||
864 | int err = 0; | ||
865 | ext4_fsblk_t current_block; | ||
866 | |||
867 | /* | ||
868 | * If we're splicing into a [td]indirect block (as opposed to the | ||
869 | * inode) then we need to get write access to the [td]indirect block | ||
870 | * before the splice. | ||
871 | */ | ||
872 | if (where->bh) { | ||
873 | BUFFER_TRACE(where->bh, "get_write_access"); | ||
874 | err = ext4_journal_get_write_access(handle, where->bh); | ||
875 | if (err) | ||
876 | goto err_out; | ||
877 | } | ||
878 | /* That's it */ | ||
879 | |||
880 | *where->p = where->key; | ||
881 | |||
882 | /* | ||
883 | * Update the host buffer_head or inode to point to more just allocated | ||
884 | * direct blocks blocks | ||
885 | */ | ||
886 | if (num == 0 && blks > 1) { | ||
887 | current_block = le32_to_cpu(where->key) + 1; | ||
888 | for (i = 1; i < blks; i++) | ||
889 | *(where->p + i) = cpu_to_le32(current_block++); | ||
890 | } | ||
891 | |||
892 | /* We are done with atomic stuff, now do the rest of housekeeping */ | ||
893 | /* had we spliced it onto indirect block? */ | ||
894 | if (where->bh) { | ||
895 | /* | ||
896 | * If we spliced it onto an indirect block, we haven't | ||
897 | * altered the inode. Note however that if it is being spliced | ||
898 | * onto an indirect block at the very end of the file (the | ||
899 | * file is growing) then we *will* alter the inode to reflect | ||
900 | * the new i_size. But that is not done here - it is done in | ||
901 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. | ||
902 | */ | ||
903 | jbd_debug(5, "splicing indirect only\n"); | ||
904 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | ||
905 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); | ||
906 | if (err) | ||
907 | goto err_out; | ||
908 | } else { | ||
909 | /* | ||
910 | * OK, we spliced it into the inode itself on a direct block. | ||
911 | */ | ||
912 | ext4_mark_inode_dirty(handle, inode); | ||
913 | jbd_debug(5, "splicing direct\n"); | ||
914 | } | ||
915 | return err; | ||
916 | |||
917 | err_out: | ||
918 | for (i = 1; i <= num; i++) { | ||
919 | /* | ||
920 | * branch[i].bh is newly allocated, so there is no | ||
921 | * need to revoke the block, which is why we don't | ||
922 | * need to set EXT4_FREE_BLOCKS_METADATA. | ||
923 | */ | ||
924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | ||
925 | EXT4_FREE_BLOCKS_FORGET); | ||
926 | } | ||
927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), | ||
928 | blks, 0); | ||
929 | |||
930 | return err; | ||
931 | } | ||
932 | |||
933 | /* | ||
934 | * The ext4_ind_map_blocks() function handles non-extents inodes | ||
935 | * (i.e., using the traditional indirect/double-indirect i_blocks | ||
936 | * scheme) for ext4_map_blocks(). | ||
937 | * | ||
938 | * Allocation strategy is simple: if we have to allocate something, we will | ||
939 | * have to go the whole way to leaf. So let's do it before attaching anything | ||
940 | * to tree, set linkage between the newborn blocks, write them if sync is | ||
941 | * required, recheck the path, free and repeat if check fails, otherwise | ||
942 | * set the last missing link (that will protect us from any truncate-generated | ||
943 | * removals - all blocks on the path are immune now) and possibly force the | ||
944 | * write on the parent block. | ||
945 | * That has a nice additional property: no special recovery from the failed | ||
946 | * allocations is needed - we simply release blocks and do not touch anything | ||
947 | * reachable from inode. | ||
948 | * | ||
949 | * `handle' can be NULL if create == 0. | ||
950 | * | ||
951 | * return > 0, # of blocks mapped or allocated. | ||
952 | * return = 0, if plain lookup failed. | ||
953 | * return < 0, error case. | ||
954 | * | ||
955 | * The ext4_ind_get_blocks() function should be called with | ||
956 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem | ||
957 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or | ||
958 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | ||
959 | * blocks. | ||
960 | */ | ||
961 | static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | ||
962 | struct ext4_map_blocks *map, | ||
963 | int flags) | ||
964 | { | ||
965 | int err = -EIO; | ||
966 | ext4_lblk_t offsets[4]; | ||
967 | Indirect chain[4]; | ||
968 | Indirect *partial; | ||
969 | ext4_fsblk_t goal; | ||
970 | int indirect_blks; | ||
971 | int blocks_to_boundary = 0; | ||
972 | int depth; | ||
973 | int count = 0; | ||
974 | ext4_fsblk_t first_block = 0; | ||
975 | |||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | ||
978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | ||
979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | ||
980 | &blocks_to_boundary); | ||
981 | |||
982 | if (depth == 0) | ||
983 | goto out; | ||
984 | |||
985 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); | ||
986 | |||
987 | /* Simplest case - block found, no allocation needed */ | ||
988 | if (!partial) { | ||
989 | first_block = le32_to_cpu(chain[depth - 1].key); | ||
990 | count++; | ||
991 | /*map more blocks*/ | ||
992 | while (count < map->m_len && count <= blocks_to_boundary) { | ||
993 | ext4_fsblk_t blk; | ||
994 | |||
995 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | ||
996 | |||
997 | if (blk == first_block + count) | ||
998 | count++; | ||
999 | else | ||
1000 | break; | ||
1001 | } | ||
1002 | goto got_it; | ||
1003 | } | ||
1004 | |||
1005 | /* Next simple case - plain lookup or failed read of indirect block */ | ||
1006 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) | ||
1007 | goto cleanup; | ||
1008 | |||
1009 | /* | ||
1010 | * Okay, we need to do block allocation. | ||
1011 | */ | ||
1012 | goal = ext4_find_goal(inode, map->m_lblk, partial); | ||
1013 | |||
1014 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | ||
1015 | indirect_blks = (chain + depth) - partial - 1; | ||
1016 | |||
1017 | /* | ||
1018 | * Next look up the indirect map to count the totoal number of | ||
1019 | * direct blocks to allocate for this branch. | ||
1020 | */ | ||
1021 | count = ext4_blks_to_allocate(partial, indirect_blks, | ||
1022 | map->m_len, blocks_to_boundary); | ||
1023 | /* | ||
1024 | * Block out ext4_truncate while we alter the tree | ||
1025 | */ | ||
1026 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, | ||
1027 | &count, goal, | ||
1028 | offsets + (partial - chain), partial); | ||
1029 | |||
1030 | /* | ||
1031 | * The ext4_splice_branch call will free and forget any buffers | ||
1032 | * on the new chain if there is a failure, but that risks using | ||
1033 | * up transaction credits, especially for bitmaps where the | ||
1034 | * credits cannot be returned. Can we handle this somehow? We | ||
1035 | * may need to return -EAGAIN upwards in the worst case. --sct | ||
1036 | */ | ||
1037 | if (!err) | ||
1038 | err = ext4_splice_branch(handle, inode, map->m_lblk, | ||
1039 | partial, indirect_blks, count); | ||
1040 | if (err) | ||
1041 | goto cleanup; | ||
1042 | |||
1043 | map->m_flags |= EXT4_MAP_NEW; | ||
1044 | |||
1045 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
1046 | got_it: | ||
1047 | map->m_flags |= EXT4_MAP_MAPPED; | ||
1048 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | ||
1049 | map->m_len = count; | ||
1050 | if (count > blocks_to_boundary) | ||
1051 | map->m_flags |= EXT4_MAP_BOUNDARY; | ||
1052 | err = count; | ||
1053 | /* Clean up and exit */ | ||
1054 | partial = chain + depth - 1; /* the whole chain */ | ||
1055 | cleanup: | ||
1056 | while (partial > chain) { | ||
1057 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
1058 | brelse(partial->bh); | ||
1059 | partial--; | ||
1060 | } | ||
1061 | out: | ||
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1063 | map->m_pblk, map->m_len, err); | ||
1064 | return err; | ||
1065 | } | ||
1066 | |||
1067 | #ifdef CONFIG_QUOTA | 241 | #ifdef CONFIG_QUOTA |
1068 | qsize_t *ext4_get_reserved_space(struct inode *inode) | 242 | qsize_t *ext4_get_reserved_space(struct inode *inode) |
1069 | { | 243 | { |
@@ -1073,33 +247,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) | |||
1073 | 247 | ||
1074 | /* | 248 | /* |
1075 | * Calculate the number of metadata blocks need to reserve | 249 | * Calculate the number of metadata blocks need to reserve |
1076 | * to allocate a new block at @lblocks for non extent file based file | ||
1077 | */ | ||
1078 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, | ||
1079 | sector_t lblock) | ||
1080 | { | ||
1081 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1082 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); | ||
1083 | int blk_bits; | ||
1084 | |||
1085 | if (lblock < EXT4_NDIR_BLOCKS) | ||
1086 | return 0; | ||
1087 | |||
1088 | lblock -= EXT4_NDIR_BLOCKS; | ||
1089 | |||
1090 | if (ei->i_da_metadata_calc_len && | ||
1091 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
1092 | ei->i_da_metadata_calc_len++; | ||
1093 | return 0; | ||
1094 | } | ||
1095 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
1096 | ei->i_da_metadata_calc_len = 1; | ||
1097 | blk_bits = order_base_2(lblock); | ||
1098 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
1099 | } | ||
1100 | |||
1101 | /* | ||
1102 | * Calculate the number of metadata blocks need to reserve | ||
1103 | * to allocate a block located at @lblock | 250 | * to allocate a block located at @lblock |
1104 | */ | 251 | */ |
1105 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | 252 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
@@ -1107,7 +254,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | |||
1107 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 254 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1108 | return ext4_ext_calc_metadata_amount(inode, lblock); | 255 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1109 | 256 | ||
1110 | return ext4_indirect_calc_metadata_amount(inode, lblock); | 257 | return ext4_ind_calc_metadata_amount(inode, lblock); |
1111 | } | 258 | } |
1112 | 259 | ||
1113 | /* | 260 | /* |
@@ -1589,16 +736,6 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1589 | return ret; | 736 | return ret; |
1590 | } | 737 | } |
1591 | 738 | ||
1592 | /* | ||
1593 | * Truncate blocks that were not used by write. We have to truncate the | ||
1594 | * pagecache as well so that corresponding buffers get properly unmapped. | ||
1595 | */ | ||
1596 | static void ext4_truncate_failed_write(struct inode *inode) | ||
1597 | { | ||
1598 | truncate_inode_pages(inode->i_mapping, inode->i_size); | ||
1599 | ext4_truncate(inode); | ||
1600 | } | ||
1601 | |||
1602 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 739 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
1603 | struct buffer_head *bh_result, int create); | 740 | struct buffer_head *bh_result, int create); |
1604 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 741 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
@@ -1849,6 +986,8 @@ static int ext4_journalled_write_end(struct file *file, | |||
1849 | from = pos & (PAGE_CACHE_SIZE - 1); | 986 | from = pos & (PAGE_CACHE_SIZE - 1); |
1850 | to = from + len; | 987 | to = from + len; |
1851 | 988 | ||
989 | BUG_ON(!ext4_handle_valid(handle)); | ||
990 | |||
1852 | if (copied < len) { | 991 | if (copied < len) { |
1853 | if (!PageUptodate(page)) | 992 | if (!PageUptodate(page)) |
1854 | copied = 0; | 993 | copied = 0; |
@@ -1863,6 +1002,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1863 | if (new_i_size > inode->i_size) | 1002 | if (new_i_size > inode->i_size) |
1864 | i_size_write(inode, pos+copied); | 1003 | i_size_write(inode, pos+copied); |
1865 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 1004 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1005 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; | ||
1866 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 1006 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1867 | ext4_update_i_disksize(inode, new_i_size); | 1007 | ext4_update_i_disksize(inode, new_i_size); |
1868 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1008 | ret2 = ext4_mark_inode_dirty(handle, inode); |
@@ -2148,7 +1288,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2148 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) | 1288 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) |
2149 | err = ext4_bio_write_page(&io_submit, page, | 1289 | err = ext4_bio_write_page(&io_submit, page, |
2150 | len, mpd->wbc); | 1290 | len, mpd->wbc); |
2151 | else | 1291 | else if (buffer_uninit(page_bufs)) { |
1292 | ext4_set_bh_endio(page_bufs, inode); | ||
1293 | err = block_write_full_page_endio(page, | ||
1294 | noalloc_get_block_write, | ||
1295 | mpd->wbc, ext4_end_io_buffer_write); | ||
1296 | } else | ||
2152 | err = block_write_full_page(page, | 1297 | err = block_write_full_page(page, |
2153 | noalloc_get_block_write, mpd->wbc); | 1298 | noalloc_get_block_write, mpd->wbc); |
2154 | 1299 | ||
@@ -2564,6 +1709,8 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2564 | goto out; | 1709 | goto out; |
2565 | } | 1710 | } |
2566 | 1711 | ||
1712 | BUG_ON(!ext4_handle_valid(handle)); | ||
1713 | |||
2567 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1714 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, |
2568 | do_journal_get_write_access); | 1715 | do_journal_get_write_access); |
2569 | 1716 | ||
@@ -2571,6 +1718,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2571 | write_end_fn); | 1718 | write_end_fn); |
2572 | if (ret == 0) | 1719 | if (ret == 0) |
2573 | ret = err; | 1720 | ret = err; |
1721 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; | ||
2574 | err = ext4_journal_stop(handle); | 1722 | err = ext4_journal_stop(handle); |
2575 | if (!ret) | 1723 | if (!ret) |
2576 | ret = err; | 1724 | ret = err; |
@@ -2741,7 +1889,7 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2741 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 1889 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2742 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 1890 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2743 | 1891 | ||
2744 | if (wbc->sync_mode == WB_SYNC_ALL) | 1892 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2745 | tag = PAGECACHE_TAG_TOWRITE; | 1893 | tag = PAGECACHE_TAG_TOWRITE; |
2746 | else | 1894 | else |
2747 | tag = PAGECACHE_TAG_DIRTY; | 1895 | tag = PAGECACHE_TAG_DIRTY; |
@@ -2973,7 +2121,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2973 | } | 2121 | } |
2974 | 2122 | ||
2975 | retry: | 2123 | retry: |
2976 | if (wbc->sync_mode == WB_SYNC_ALL) | 2124 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2977 | tag_pages_for_writeback(mapping, index, end); | 2125 | tag_pages_for_writeback(mapping, index, end); |
2978 | 2126 | ||
2979 | while (!ret && wbc->nr_to_write > 0) { | 2127 | while (!ret && wbc->nr_to_write > 0) { |
@@ -3450,112 +2598,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3450 | } | 2598 | } |
3451 | 2599 | ||
3452 | /* | 2600 | /* |
3453 | * O_DIRECT for ext3 (or indirect map) based files | ||
3454 | * | ||
3455 | * If the O_DIRECT write will extend the file then add this inode to the | ||
3456 | * orphan list. So recovery will truncate it back to the original size | ||
3457 | * if the machine crashes during the write. | ||
3458 | * | ||
3459 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | ||
3460 | * crashes then stale disk data _may_ be exposed inside the file. But current | ||
3461 | * VFS code falls back into buffered path in that case so we are safe. | ||
3462 | */ | ||
3463 | static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | ||
3464 | const struct iovec *iov, loff_t offset, | ||
3465 | unsigned long nr_segs) | ||
3466 | { | ||
3467 | struct file *file = iocb->ki_filp; | ||
3468 | struct inode *inode = file->f_mapping->host; | ||
3469 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3470 | handle_t *handle; | ||
3471 | ssize_t ret; | ||
3472 | int orphan = 0; | ||
3473 | size_t count = iov_length(iov, nr_segs); | ||
3474 | int retries = 0; | ||
3475 | |||
3476 | if (rw == WRITE) { | ||
3477 | loff_t final_size = offset + count; | ||
3478 | |||
3479 | if (final_size > inode->i_size) { | ||
3480 | /* Credits for sb + inode write */ | ||
3481 | handle = ext4_journal_start(inode, 2); | ||
3482 | if (IS_ERR(handle)) { | ||
3483 | ret = PTR_ERR(handle); | ||
3484 | goto out; | ||
3485 | } | ||
3486 | ret = ext4_orphan_add(handle, inode); | ||
3487 | if (ret) { | ||
3488 | ext4_journal_stop(handle); | ||
3489 | goto out; | ||
3490 | } | ||
3491 | orphan = 1; | ||
3492 | ei->i_disksize = inode->i_size; | ||
3493 | ext4_journal_stop(handle); | ||
3494 | } | ||
3495 | } | ||
3496 | |||
3497 | retry: | ||
3498 | if (rw == READ && ext4_should_dioread_nolock(inode)) | ||
3499 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
3500 | inode->i_sb->s_bdev, iov, | ||
3501 | offset, nr_segs, | ||
3502 | ext4_get_block, NULL, NULL, 0); | ||
3503 | else { | ||
3504 | ret = blockdev_direct_IO(rw, iocb, inode, iov, | ||
3505 | offset, nr_segs, ext4_get_block); | ||
3506 | |||
3507 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
3508 | loff_t isize = i_size_read(inode); | ||
3509 | loff_t end = offset + iov_length(iov, nr_segs); | ||
3510 | |||
3511 | if (end > isize) | ||
3512 | ext4_truncate_failed_write(inode); | ||
3513 | } | ||
3514 | } | ||
3515 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
3516 | goto retry; | ||
3517 | |||
3518 | if (orphan) { | ||
3519 | int err; | ||
3520 | |||
3521 | /* Credits for sb + inode write */ | ||
3522 | handle = ext4_journal_start(inode, 2); | ||
3523 | if (IS_ERR(handle)) { | ||
3524 | /* This is really bad luck. We've written the data | ||
3525 | * but cannot extend i_size. Bail out and pretend | ||
3526 | * the write failed... */ | ||
3527 | ret = PTR_ERR(handle); | ||
3528 | if (inode->i_nlink) | ||
3529 | ext4_orphan_del(NULL, inode); | ||
3530 | |||
3531 | goto out; | ||
3532 | } | ||
3533 | if (inode->i_nlink) | ||
3534 | ext4_orphan_del(handle, inode); | ||
3535 | if (ret > 0) { | ||
3536 | loff_t end = offset + ret; | ||
3537 | if (end > inode->i_size) { | ||
3538 | ei->i_disksize = end; | ||
3539 | i_size_write(inode, end); | ||
3540 | /* | ||
3541 | * We're going to return a positive `ret' | ||
3542 | * here due to non-zero-length I/O, so there's | ||
3543 | * no way of reporting error returns from | ||
3544 | * ext4_mark_inode_dirty() to userspace. So | ||
3545 | * ignore it. | ||
3546 | */ | ||
3547 | ext4_mark_inode_dirty(handle, inode); | ||
3548 | } | ||
3549 | } | ||
3550 | err = ext4_journal_stop(handle); | ||
3551 | if (ret == 0) | ||
3552 | ret = err; | ||
3553 | } | ||
3554 | out: | ||
3555 | return ret; | ||
3556 | } | ||
3557 | |||
3558 | /* | ||
3559 | * ext4_get_block used when preparing for a DIO write or buffer write. | 2601 | * ext4_get_block used when preparing for a DIO write or buffer write. |
3560 | * We allocate an uinitialized extent if blocks haven't been allocated. | 2602 | * We allocate an uinitialized extent if blocks haven't been allocated. |
3561 | * The extent will be converted to initialized after the IO is complete. | 2603 | * The extent will be converted to initialized after the IO is complete. |
@@ -3638,8 +2680,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3638 | goto out; | 2680 | goto out; |
3639 | } | 2681 | } |
3640 | 2682 | ||
3641 | io_end->flag = EXT4_IO_END_UNWRITTEN; | 2683 | /* |
2684 | * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now, | ||
2685 | * but being more careful is always safe for the future change. | ||
2686 | */ | ||
3642 | inode = io_end->inode; | 2687 | inode = io_end->inode; |
2688 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
2689 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
2690 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
2691 | } | ||
3643 | 2692 | ||
3644 | /* Add the io_end to per-inode completed io list*/ | 2693 | /* Add the io_end to per-inode completed io list*/ |
3645 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 2694 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
@@ -4033,383 +3082,6 @@ unlock: | |||
4033 | return err; | 3082 | return err; |
4034 | } | 3083 | } |
4035 | 3084 | ||
4036 | /* | ||
4037 | * Probably it should be a library function... search for first non-zero word | ||
4038 | * or memcmp with zero_page, whatever is better for particular architecture. | ||
4039 | * Linus? | ||
4040 | */ | ||
4041 | static inline int all_zeroes(__le32 *p, __le32 *q) | ||
4042 | { | ||
4043 | while (p < q) | ||
4044 | if (*p++) | ||
4045 | return 0; | ||
4046 | return 1; | ||
4047 | } | ||
4048 | |||
4049 | /** | ||
4050 | * ext4_find_shared - find the indirect blocks for partial truncation. | ||
4051 | * @inode: inode in question | ||
4052 | * @depth: depth of the affected branch | ||
4053 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) | ||
4054 | * @chain: place to store the pointers to partial indirect blocks | ||
4055 | * @top: place to the (detached) top of branch | ||
4056 | * | ||
4057 | * This is a helper function used by ext4_truncate(). | ||
4058 | * | ||
4059 | * When we do truncate() we may have to clean the ends of several | ||
4060 | * indirect blocks but leave the blocks themselves alive. Block is | ||
4061 | * partially truncated if some data below the new i_size is referred | ||
4062 | * from it (and it is on the path to the first completely truncated | ||
4063 | * data block, indeed). We have to free the top of that path along | ||
4064 | * with everything to the right of the path. Since no allocation | ||
4065 | * past the truncation point is possible until ext4_truncate() | ||
4066 | * finishes, we may safely do the latter, but top of branch may | ||
4067 | * require special attention - pageout below the truncation point | ||
4068 | * might try to populate it. | ||
4069 | * | ||
4070 | * We atomically detach the top of branch from the tree, store the | ||
4071 | * block number of its root in *@top, pointers to buffer_heads of | ||
4072 | * partially truncated blocks - in @chain[].bh and pointers to | ||
4073 | * their last elements that should not be removed - in | ||
4074 | * @chain[].p. Return value is the pointer to last filled element | ||
4075 | * of @chain. | ||
4076 | * | ||
4077 | * The work left to caller to do the actual freeing of subtrees: | ||
4078 | * a) free the subtree starting from *@top | ||
4079 | * b) free the subtrees whose roots are stored in | ||
4080 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | ||
4081 | * c) free the subtrees growing from the inode past the @chain[0]. | ||
4082 | * (no partially truncated stuff there). */ | ||
4083 | |||
4084 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | ||
4085 | ext4_lblk_t offsets[4], Indirect chain[4], | ||
4086 | __le32 *top) | ||
4087 | { | ||
4088 | Indirect *partial, *p; | ||
4089 | int k, err; | ||
4090 | |||
4091 | *top = 0; | ||
4092 | /* Make k index the deepest non-null offset + 1 */ | ||
4093 | for (k = depth; k > 1 && !offsets[k-1]; k--) | ||
4094 | ; | ||
4095 | partial = ext4_get_branch(inode, k, offsets, chain, &err); | ||
4096 | /* Writer: pointers */ | ||
4097 | if (!partial) | ||
4098 | partial = chain + k-1; | ||
4099 | /* | ||
4100 | * If the branch acquired continuation since we've looked at it - | ||
4101 | * fine, it should all survive and (new) top doesn't belong to us. | ||
4102 | */ | ||
4103 | if (!partial->key && *partial->p) | ||
4104 | /* Writer: end */ | ||
4105 | goto no_top; | ||
4106 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) | ||
4107 | ; | ||
4108 | /* | ||
4109 | * OK, we've found the last block that must survive. The rest of our | ||
4110 | * branch should be detached before unlocking. However, if that rest | ||
4111 | * of branch is all ours and does not grow immediately from the inode | ||
4112 | * it's easier to cheat and just decrement partial->p. | ||
4113 | */ | ||
4114 | if (p == chain + k - 1 && p > chain) { | ||
4115 | p->p--; | ||
4116 | } else { | ||
4117 | *top = *p->p; | ||
4118 | /* Nope, don't do this in ext4. Must leave the tree intact */ | ||
4119 | #if 0 | ||
4120 | *p->p = 0; | ||
4121 | #endif | ||
4122 | } | ||
4123 | /* Writer: end */ | ||
4124 | |||
4125 | while (partial > p) { | ||
4126 | brelse(partial->bh); | ||
4127 | partial--; | ||
4128 | } | ||
4129 | no_top: | ||
4130 | return partial; | ||
4131 | } | ||
4132 | |||
4133 | /* | ||
4134 | * Zero a number of block pointers in either an inode or an indirect block. | ||
4135 | * If we restart the transaction we must again get write access to the | ||
4136 | * indirect block for further modification. | ||
4137 | * | ||
4138 | * We release `count' blocks on disk, but (last - first) may be greater | ||
4139 | * than `count' because there can be holes in there. | ||
4140 | * | ||
4141 | * Return 0 on success, 1 on invalid block range | ||
4142 | * and < 0 on fatal error. | ||
4143 | */ | ||
4144 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | ||
4145 | struct buffer_head *bh, | ||
4146 | ext4_fsblk_t block_to_free, | ||
4147 | unsigned long count, __le32 *first, | ||
4148 | __le32 *last) | ||
4149 | { | ||
4150 | __le32 *p; | ||
4151 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | ||
4152 | int err; | ||
4153 | |||
4154 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
4155 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
4156 | |||
4157 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
4158 | count)) { | ||
4159 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " | ||
4160 | "blocks %llu len %lu", | ||
4161 | (unsigned long long) block_to_free, count); | ||
4162 | return 1; | ||
4163 | } | ||
4164 | |||
4165 | if (try_to_extend_transaction(handle, inode)) { | ||
4166 | if (bh) { | ||
4167 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
4168 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
4169 | if (unlikely(err)) | ||
4170 | goto out_err; | ||
4171 | } | ||
4172 | err = ext4_mark_inode_dirty(handle, inode); | ||
4173 | if (unlikely(err)) | ||
4174 | goto out_err; | ||
4175 | err = ext4_truncate_restart_trans(handle, inode, | ||
4176 | blocks_for_truncate(inode)); | ||
4177 | if (unlikely(err)) | ||
4178 | goto out_err; | ||
4179 | if (bh) { | ||
4180 | BUFFER_TRACE(bh, "retaking write access"); | ||
4181 | err = ext4_journal_get_write_access(handle, bh); | ||
4182 | if (unlikely(err)) | ||
4183 | goto out_err; | ||
4184 | } | ||
4185 | } | ||
4186 | |||
4187 | for (p = first; p < last; p++) | ||
4188 | *p = 0; | ||
4189 | |||
4190 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); | ||
4191 | return 0; | ||
4192 | out_err: | ||
4193 | ext4_std_error(inode->i_sb, err); | ||
4194 | return err; | ||
4195 | } | ||
4196 | |||
4197 | /** | ||
4198 | * ext4_free_data - free a list of data blocks | ||
4199 | * @handle: handle for this transaction | ||
4200 | * @inode: inode we are dealing with | ||
4201 | * @this_bh: indirect buffer_head which contains *@first and *@last | ||
4202 | * @first: array of block numbers | ||
4203 | * @last: points immediately past the end of array | ||
4204 | * | ||
4205 | * We are freeing all blocks referred from that array (numbers are stored as | ||
4206 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | ||
4207 | * | ||
4208 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | ||
4209 | * blocks are contiguous then releasing them at one time will only affect one | ||
4210 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't | ||
4211 | * actually use a lot of journal space. | ||
4212 | * | ||
4213 | * @this_bh will be %NULL if @first and @last point into the inode's direct | ||
4214 | * block pointers. | ||
4215 | */ | ||
4216 | static void ext4_free_data(handle_t *handle, struct inode *inode, | ||
4217 | struct buffer_head *this_bh, | ||
4218 | __le32 *first, __le32 *last) | ||
4219 | { | ||
4220 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ | ||
4221 | unsigned long count = 0; /* Number of blocks in the run */ | ||
4222 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind | ||
4223 | corresponding to | ||
4224 | block_to_free */ | ||
4225 | ext4_fsblk_t nr; /* Current block # */ | ||
4226 | __le32 *p; /* Pointer into inode/ind | ||
4227 | for current block */ | ||
4228 | int err = 0; | ||
4229 | |||
4230 | if (this_bh) { /* For indirect block */ | ||
4231 | BUFFER_TRACE(this_bh, "get_write_access"); | ||
4232 | err = ext4_journal_get_write_access(handle, this_bh); | ||
4233 | /* Important: if we can't update the indirect pointers | ||
4234 | * to the blocks, we can't free them. */ | ||
4235 | if (err) | ||
4236 | return; | ||
4237 | } | ||
4238 | |||
4239 | for (p = first; p < last; p++) { | ||
4240 | nr = le32_to_cpu(*p); | ||
4241 | if (nr) { | ||
4242 | /* accumulate blocks to free if they're contiguous */ | ||
4243 | if (count == 0) { | ||
4244 | block_to_free = nr; | ||
4245 | block_to_free_p = p; | ||
4246 | count = 1; | ||
4247 | } else if (nr == block_to_free + count) { | ||
4248 | count++; | ||
4249 | } else { | ||
4250 | err = ext4_clear_blocks(handle, inode, this_bh, | ||
4251 | block_to_free, count, | ||
4252 | block_to_free_p, p); | ||
4253 | if (err) | ||
4254 | break; | ||
4255 | block_to_free = nr; | ||
4256 | block_to_free_p = p; | ||
4257 | count = 1; | ||
4258 | } | ||
4259 | } | ||
4260 | } | ||
4261 | |||
4262 | if (!err && count > 0) | ||
4263 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, | ||
4264 | count, block_to_free_p, p); | ||
4265 | if (err < 0) | ||
4266 | /* fatal error */ | ||
4267 | return; | ||
4268 | |||
4269 | if (this_bh) { | ||
4270 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | ||
4271 | |||
4272 | /* | ||
4273 | * The buffer head should have an attached journal head at this | ||
4274 | * point. However, if the data is corrupted and an indirect | ||
4275 | * block pointed to itself, it would have been detached when | ||
4276 | * the block was cleared. Check for this instead of OOPSing. | ||
4277 | */ | ||
4278 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | ||
4279 | ext4_handle_dirty_metadata(handle, inode, this_bh); | ||
4280 | else | ||
4281 | EXT4_ERROR_INODE(inode, | ||
4282 | "circular indirect block detected at " | ||
4283 | "block %llu", | ||
4284 | (unsigned long long) this_bh->b_blocknr); | ||
4285 | } | ||
4286 | } | ||
4287 | |||
4288 | /** | ||
4289 | * ext4_free_branches - free an array of branches | ||
4290 | * @handle: JBD handle for this transaction | ||
4291 | * @inode: inode we are dealing with | ||
4292 | * @parent_bh: the buffer_head which contains *@first and *@last | ||
4293 | * @first: array of block numbers | ||
4294 | * @last: pointer immediately past the end of array | ||
4295 | * @depth: depth of the branches to free | ||
4296 | * | ||
4297 | * We are freeing all blocks referred from these branches (numbers are | ||
4298 | * stored as little-endian 32-bit) and updating @inode->i_blocks | ||
4299 | * appropriately. | ||
4300 | */ | ||
4301 | static void ext4_free_branches(handle_t *handle, struct inode *inode, | ||
4302 | struct buffer_head *parent_bh, | ||
4303 | __le32 *first, __le32 *last, int depth) | ||
4304 | { | ||
4305 | ext4_fsblk_t nr; | ||
4306 | __le32 *p; | ||
4307 | |||
4308 | if (ext4_handle_is_aborted(handle)) | ||
4309 | return; | ||
4310 | |||
4311 | if (depth--) { | ||
4312 | struct buffer_head *bh; | ||
4313 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4314 | p = last; | ||
4315 | while (--p >= first) { | ||
4316 | nr = le32_to_cpu(*p); | ||
4317 | if (!nr) | ||
4318 | continue; /* A hole */ | ||
4319 | |||
4320 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
4321 | nr, 1)) { | ||
4322 | EXT4_ERROR_INODE(inode, | ||
4323 | "invalid indirect mapped " | ||
4324 | "block %lu (level %d)", | ||
4325 | (unsigned long) nr, depth); | ||
4326 | break; | ||
4327 | } | ||
4328 | |||
4329 | /* Go read the buffer for the next level down */ | ||
4330 | bh = sb_bread(inode->i_sb, nr); | ||
4331 | |||
4332 | /* | ||
4333 | * A read failure? Report error and clear slot | ||
4334 | * (should be rare). | ||
4335 | */ | ||
4336 | if (!bh) { | ||
4337 | EXT4_ERROR_INODE_BLOCK(inode, nr, | ||
4338 | "Read failure"); | ||
4339 | continue; | ||
4340 | } | ||
4341 | |||
4342 | /* This zaps the entire block. Bottom up. */ | ||
4343 | BUFFER_TRACE(bh, "free child branches"); | ||
4344 | ext4_free_branches(handle, inode, bh, | ||
4345 | (__le32 *) bh->b_data, | ||
4346 | (__le32 *) bh->b_data + addr_per_block, | ||
4347 | depth); | ||
4348 | brelse(bh); | ||
4349 | |||
4350 | /* | ||
4351 | * Everything below this this pointer has been | ||
4352 | * released. Now let this top-of-subtree go. | ||
4353 | * | ||
4354 | * We want the freeing of this indirect block to be | ||
4355 | * atomic in the journal with the updating of the | ||
4356 | * bitmap block which owns it. So make some room in | ||
4357 | * the journal. | ||
4358 | * | ||
4359 | * We zero the parent pointer *after* freeing its | ||
4360 | * pointee in the bitmaps, so if extend_transaction() | ||
4361 | * for some reason fails to put the bitmap changes and | ||
4362 | * the release into the same transaction, recovery | ||
4363 | * will merely complain about releasing a free block, | ||
4364 | * rather than leaking blocks. | ||
4365 | */ | ||
4366 | if (ext4_handle_is_aborted(handle)) | ||
4367 | return; | ||
4368 | if (try_to_extend_transaction(handle, inode)) { | ||
4369 | ext4_mark_inode_dirty(handle, inode); | ||
4370 | ext4_truncate_restart_trans(handle, inode, | ||
4371 | blocks_for_truncate(inode)); | ||
4372 | } | ||
4373 | |||
4374 | /* | ||
4375 | * The forget flag here is critical because if | ||
4376 | * we are journaling (and not doing data | ||
4377 | * journaling), we have to make sure a revoke | ||
4378 | * record is written to prevent the journal | ||
4379 | * replay from overwriting the (former) | ||
4380 | * indirect block if it gets reallocated as a | ||
4381 | * data block. This must happen in the same | ||
4382 | * transaction where the data blocks are | ||
4383 | * actually freed. | ||
4384 | */ | ||
4385 | ext4_free_blocks(handle, inode, NULL, nr, 1, | ||
4386 | EXT4_FREE_BLOCKS_METADATA| | ||
4387 | EXT4_FREE_BLOCKS_FORGET); | ||
4388 | |||
4389 | if (parent_bh) { | ||
4390 | /* | ||
4391 | * The block which we have just freed is | ||
4392 | * pointed to by an indirect block: journal it | ||
4393 | */ | ||
4394 | BUFFER_TRACE(parent_bh, "get_write_access"); | ||
4395 | if (!ext4_journal_get_write_access(handle, | ||
4396 | parent_bh)){ | ||
4397 | *p = 0; | ||
4398 | BUFFER_TRACE(parent_bh, | ||
4399 | "call ext4_handle_dirty_metadata"); | ||
4400 | ext4_handle_dirty_metadata(handle, | ||
4401 | inode, | ||
4402 | parent_bh); | ||
4403 | } | ||
4404 | } | ||
4405 | } | ||
4406 | } else { | ||
4407 | /* We have reached the bottom of the tree. */ | ||
4408 | BUFFER_TRACE(parent_bh, "free data blocks"); | ||
4409 | ext4_free_data(handle, inode, parent_bh, first, last); | ||
4410 | } | ||
4411 | } | ||
4412 | |||
4413 | int ext4_can_truncate(struct inode *inode) | 3085 | int ext4_can_truncate(struct inode *inode) |
4414 | { | 3086 | { |
4415 | if (S_ISREG(inode->i_mode)) | 3087 | if (S_ISREG(inode->i_mode)) |
@@ -4476,19 +3148,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4476 | */ | 3148 | */ |
4477 | void ext4_truncate(struct inode *inode) | 3149 | void ext4_truncate(struct inode *inode) |
4478 | { | 3150 | { |
4479 | handle_t *handle; | ||
4480 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
4481 | __le32 *i_data = ei->i_data; | ||
4482 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4483 | struct address_space *mapping = inode->i_mapping; | ||
4484 | ext4_lblk_t offsets[4]; | ||
4485 | Indirect chain[4]; | ||
4486 | Indirect *partial; | ||
4487 | __le32 nr = 0; | ||
4488 | int n = 0; | ||
4489 | ext4_lblk_t last_block, max_block; | ||
4490 | unsigned blocksize = inode->i_sb->s_blocksize; | ||
4491 | |||
4492 | trace_ext4_truncate_enter(inode); | 3151 | trace_ext4_truncate_enter(inode); |
4493 | 3152 | ||
4494 | if (!ext4_can_truncate(inode)) | 3153 | if (!ext4_can_truncate(inode)) |
@@ -4499,149 +3158,11 @@ void ext4_truncate(struct inode *inode) | |||
4499 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 3158 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4500 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 3159 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4501 | 3160 | ||
4502 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 3161 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
4503 | ext4_ext_truncate(inode); | 3162 | ext4_ext_truncate(inode); |
4504 | trace_ext4_truncate_exit(inode); | 3163 | else |
4505 | return; | 3164 | ext4_ind_truncate(inode); |
4506 | } | ||
4507 | |||
4508 | handle = start_transaction(inode); | ||
4509 | if (IS_ERR(handle)) | ||
4510 | return; /* AKPM: return what? */ | ||
4511 | |||
4512 | last_block = (inode->i_size + blocksize-1) | ||
4513 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4514 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
4515 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4516 | |||
4517 | if (inode->i_size & (blocksize - 1)) | ||
4518 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | ||
4519 | goto out_stop; | ||
4520 | |||
4521 | if (last_block != max_block) { | ||
4522 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | ||
4523 | if (n == 0) | ||
4524 | goto out_stop; /* error */ | ||
4525 | } | ||
4526 | |||
4527 | /* | ||
4528 | * OK. This truncate is going to happen. We add the inode to the | ||
4529 | * orphan list, so that if this truncate spans multiple transactions, | ||
4530 | * and we crash, we will resume the truncate when the filesystem | ||
4531 | * recovers. It also marks the inode dirty, to catch the new size. | ||
4532 | * | ||
4533 | * Implication: the file must always be in a sane, consistent | ||
4534 | * truncatable state while each transaction commits. | ||
4535 | */ | ||
4536 | if (ext4_orphan_add(handle, inode)) | ||
4537 | goto out_stop; | ||
4538 | |||
4539 | /* | ||
4540 | * From here we block out all ext4_get_block() callers who want to | ||
4541 | * modify the block allocation tree. | ||
4542 | */ | ||
4543 | down_write(&ei->i_data_sem); | ||
4544 | |||
4545 | ext4_discard_preallocations(inode); | ||
4546 | |||
4547 | /* | ||
4548 | * The orphan list entry will now protect us from any crash which | ||
4549 | * occurs before the truncate completes, so it is now safe to propagate | ||
4550 | * the new, shorter inode size (held for now in i_size) into the | ||
4551 | * on-disk inode. We do this via i_disksize, which is the value which | ||
4552 | * ext4 *really* writes onto the disk inode. | ||
4553 | */ | ||
4554 | ei->i_disksize = inode->i_size; | ||
4555 | |||
4556 | if (last_block == max_block) { | ||
4557 | /* | ||
4558 | * It is unnecessary to free any data blocks if last_block is | ||
4559 | * equal to the indirect block limit. | ||
4560 | */ | ||
4561 | goto out_unlock; | ||
4562 | } else if (n == 1) { /* direct blocks */ | ||
4563 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | ||
4564 | i_data + EXT4_NDIR_BLOCKS); | ||
4565 | goto do_indirects; | ||
4566 | } | ||
4567 | |||
4568 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | ||
4569 | /* Kill the top of shared branch (not detached) */ | ||
4570 | if (nr) { | ||
4571 | if (partial == chain) { | ||
4572 | /* Shared branch grows from the inode */ | ||
4573 | ext4_free_branches(handle, inode, NULL, | ||
4574 | &nr, &nr+1, (chain+n-1) - partial); | ||
4575 | *partial->p = 0; | ||
4576 | /* | ||
4577 | * We mark the inode dirty prior to restart, | ||
4578 | * and prior to stop. No need for it here. | ||
4579 | */ | ||
4580 | } else { | ||
4581 | /* Shared branch grows from an indirect block */ | ||
4582 | BUFFER_TRACE(partial->bh, "get_write_access"); | ||
4583 | ext4_free_branches(handle, inode, partial->bh, | ||
4584 | partial->p, | ||
4585 | partial->p+1, (chain+n-1) - partial); | ||
4586 | } | ||
4587 | } | ||
4588 | /* Clear the ends of indirect blocks on the shared branch */ | ||
4589 | while (partial > chain) { | ||
4590 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, | ||
4591 | (__le32*)partial->bh->b_data+addr_per_block, | ||
4592 | (chain+n-1) - partial); | ||
4593 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
4594 | brelse(partial->bh); | ||
4595 | partial--; | ||
4596 | } | ||
4597 | do_indirects: | ||
4598 | /* Kill the remaining (whole) subtrees */ | ||
4599 | switch (offsets[0]) { | ||
4600 | default: | ||
4601 | nr = i_data[EXT4_IND_BLOCK]; | ||
4602 | if (nr) { | ||
4603 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | ||
4604 | i_data[EXT4_IND_BLOCK] = 0; | ||
4605 | } | ||
4606 | case EXT4_IND_BLOCK: | ||
4607 | nr = i_data[EXT4_DIND_BLOCK]; | ||
4608 | if (nr) { | ||
4609 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | ||
4610 | i_data[EXT4_DIND_BLOCK] = 0; | ||
4611 | } | ||
4612 | case EXT4_DIND_BLOCK: | ||
4613 | nr = i_data[EXT4_TIND_BLOCK]; | ||
4614 | if (nr) { | ||
4615 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | ||
4616 | i_data[EXT4_TIND_BLOCK] = 0; | ||
4617 | } | ||
4618 | case EXT4_TIND_BLOCK: | ||
4619 | ; | ||
4620 | } | ||
4621 | |||
4622 | out_unlock: | ||
4623 | up_write(&ei->i_data_sem); | ||
4624 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4625 | ext4_mark_inode_dirty(handle, inode); | ||
4626 | |||
4627 | /* | ||
4628 | * In a multi-transaction truncate, we only make the final transaction | ||
4629 | * synchronous | ||
4630 | */ | ||
4631 | if (IS_SYNC(inode)) | ||
4632 | ext4_handle_sync(handle); | ||
4633 | out_stop: | ||
4634 | /* | ||
4635 | * If this was a simple ftruncate(), and the file will remain alive | ||
4636 | * then we need to clear up the orphan record which we created above. | ||
4637 | * However, if this was a real unlink then we were called by | ||
4638 | * ext4_delete_inode(), and we allow that function to clean up the | ||
4639 | * orphan info for us. | ||
4640 | */ | ||
4641 | if (inode->i_nlink) | ||
4642 | ext4_orphan_del(handle, inode); | ||
4643 | 3165 | ||
4644 | ext4_journal_stop(handle); | ||
4645 | trace_ext4_truncate_exit(inode); | 3166 | trace_ext4_truncate_exit(inode); |
4646 | } | 3167 | } |
4647 | 3168 | ||
@@ -5012,7 +3533,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5012 | (S_ISLNK(inode->i_mode) && | 3533 | (S_ISLNK(inode->i_mode) && |
5013 | !ext4_inode_is_fast_symlink(inode))) { | 3534 | !ext4_inode_is_fast_symlink(inode))) { |
5014 | /* Validate block references which are part of inode */ | 3535 | /* Validate block references which are part of inode */ |
5015 | ret = ext4_check_inode_blockref(inode); | 3536 | ret = ext4_ind_check_inode(inode); |
5016 | } | 3537 | } |
5017 | if (ret) | 3538 | if (ret) |
5018 | goto bad_inode; | 3539 | goto bad_inode; |
@@ -5459,34 +3980,10 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
5459 | return 0; | 3980 | return 0; |
5460 | } | 3981 | } |
5461 | 3982 | ||
5462 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | ||
5463 | int chunk) | ||
5464 | { | ||
5465 | int indirects; | ||
5466 | |||
5467 | /* if nrblocks are contiguous */ | ||
5468 | if (chunk) { | ||
5469 | /* | ||
5470 | * With N contiguous data blocks, we need at most | ||
5471 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
5472 | * 2 dindirect blocks, and 1 tindirect block | ||
5473 | */ | ||
5474 | return DIV_ROUND_UP(nrblocks, | ||
5475 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
5476 | } | ||
5477 | /* | ||
5478 | * if nrblocks are not contiguous, worse case, each block touch | ||
5479 | * a indirect block, and each indirect block touch a double indirect | ||
5480 | * block, plus a triple indirect block | ||
5481 | */ | ||
5482 | indirects = nrblocks * 2 + 1; | ||
5483 | return indirects; | ||
5484 | } | ||
5485 | |||
5486 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 3983 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5487 | { | 3984 | { |
5488 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3985 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
5489 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); | 3986 | return ext4_ind_trans_blocks(inode, nrblocks, chunk); |
5490 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 3987 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); |
5491 | } | 3988 | } |
5492 | 3989 | ||
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 808c554e773f..f18bfe37aff8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -202,8 +202,9 @@ setversion_out: | |||
202 | struct super_block *sb = inode->i_sb; | 202 | struct super_block *sb = inode->i_sb; |
203 | int err, err2=0; | 203 | int err, err2=0; |
204 | 204 | ||
205 | if (!capable(CAP_SYS_RESOURCE)) | 205 | err = ext4_resize_begin(sb); |
206 | return -EPERM; | 206 | if (err) |
207 | return err; | ||
207 | 208 | ||
208 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | 209 | if (get_user(n_blocks_count, (__u32 __user *)arg)) |
209 | return -EFAULT; | 210 | return -EFAULT; |
@@ -221,6 +222,7 @@ setversion_out: | |||
221 | if (err == 0) | 222 | if (err == 0) |
222 | err = err2; | 223 | err = err2; |
223 | mnt_drop_write(filp->f_path.mnt); | 224 | mnt_drop_write(filp->f_path.mnt); |
225 | ext4_resize_end(sb); | ||
224 | 226 | ||
225 | return err; | 227 | return err; |
226 | } | 228 | } |
@@ -271,8 +273,9 @@ mext_out: | |||
271 | struct super_block *sb = inode->i_sb; | 273 | struct super_block *sb = inode->i_sb; |
272 | int err, err2=0; | 274 | int err, err2=0; |
273 | 275 | ||
274 | if (!capable(CAP_SYS_RESOURCE)) | 276 | err = ext4_resize_begin(sb); |
275 | return -EPERM; | 277 | if (err) |
278 | return err; | ||
276 | 279 | ||
277 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, | 280 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, |
278 | sizeof(input))) | 281 | sizeof(input))) |
@@ -291,6 +294,7 @@ mext_out: | |||
291 | if (err == 0) | 294 | if (err == 0) |
292 | err = err2; | 295 | err = err2; |
293 | mnt_drop_write(filp->f_path.mnt); | 296 | mnt_drop_write(filp->f_path.mnt); |
297 | ext4_resize_end(sb); | ||
294 | 298 | ||
295 | return err; | 299 | return err; |
296 | } | 300 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6ed859d56850..17a5a57c415a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -75,8 +75,8 @@ | |||
75 | * | 75 | * |
76 | * The inode preallocation space is used looking at the _logical_ start | 76 | * The inode preallocation space is used looking at the _logical_ start |
77 | * block. If only the logical file block falls within the range of prealloc | 77 | * block. If only the logical file block falls within the range of prealloc |
78 | * space we will consume the particular prealloc space. This make sure that | 78 | * space we will consume the particular prealloc space. This makes sure that |
79 | * that the we have contiguous physical blocks representing the file blocks | 79 | * we have contiguous physical blocks representing the file blocks |
80 | * | 80 | * |
81 | * The important thing to be noted in case of inode prealloc space is that | 81 | * The important thing to be noted in case of inode prealloc space is that |
82 | * we don't modify the values associated to inode prealloc space except | 82 | * we don't modify the values associated to inode prealloc space except |
@@ -84,7 +84,7 @@ | |||
84 | * | 84 | * |
85 | * If we are not able to find blocks in the inode prealloc space and if we | 85 | * If we are not able to find blocks in the inode prealloc space and if we |
86 | * have the group allocation flag set then we look at the locality group | 86 | * have the group allocation flag set then we look at the locality group |
87 | * prealloc space. These are per CPU prealloc list repreasented as | 87 | * prealloc space. These are per CPU prealloc list represented as |
88 | * | 88 | * |
89 | * ext4_sb_info.s_locality_groups[smp_processor_id()] | 89 | * ext4_sb_info.s_locality_groups[smp_processor_id()] |
90 | * | 90 | * |
@@ -128,12 +128,13 @@ | |||
128 | * we are doing a group prealloc we try to normalize the request to | 128 | * we are doing a group prealloc we try to normalize the request to |
129 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is | 129 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is |
130 | * 512 blocks. This can be tuned via | 130 | * 512 blocks. This can be tuned via |
131 | * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in | 131 | * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in |
132 | * terms of number of blocks. If we have mounted the file system with -O | 132 | * terms of number of blocks. If we have mounted the file system with -O |
133 | * stripe=<value> option the group prealloc request is normalized to the | 133 | * stripe=<value> option the group prealloc request is normalized to the |
134 | * stripe value (sbi->s_stripe) | 134 | * the smallest multiple of the stripe value (sbi->s_stripe) which is |
135 | * greater than the default mb_group_prealloc. | ||
135 | * | 136 | * |
136 | * The regular allocator(using the buddy cache) supports few tunables. | 137 | * The regular allocator (using the buddy cache) supports a few tunables. |
137 | * | 138 | * |
138 | * /sys/fs/ext4/<partition>/mb_min_to_scan | 139 | * /sys/fs/ext4/<partition>/mb_min_to_scan |
139 | * /sys/fs/ext4/<partition>/mb_max_to_scan | 140 | * /sys/fs/ext4/<partition>/mb_max_to_scan |
@@ -152,7 +153,7 @@ | |||
152 | * best extent in the found extents. Searching for the blocks starts with | 153 | * best extent in the found extents. Searching for the blocks starts with |
153 | * the group specified as the goal value in allocation context via | 154 | * the group specified as the goal value in allocation context via |
154 | * ac_g_ex. Each group is first checked based on the criteria whether it | 155 | * ac_g_ex. Each group is first checked based on the criteria whether it |
155 | * can used for allocation. ext4_mb_good_group explains how the groups are | 156 | * can be used for allocation. ext4_mb_good_group explains how the groups are |
156 | * checked. | 157 | * checked. |
157 | * | 158 | * |
158 | * Both the prealloc space are getting populated as above. So for the first | 159 | * Both the prealloc space are getting populated as above. So for the first |
@@ -492,10 +493,11 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
492 | b2 = (unsigned char *) bitmap; | 493 | b2 = (unsigned char *) bitmap; |
493 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 494 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
494 | if (b1[i] != b2[i]) { | 495 | if (b1[i] != b2[i]) { |
495 | printk(KERN_ERR "corruption in group %u " | 496 | ext4_msg(e4b->bd_sb, KERN_ERR, |
496 | "at byte %u(%u): %x in copy != %x " | 497 | "corruption in group %u " |
497 | "on disk/prealloc\n", | 498 | "at byte %u(%u): %x in copy != %x " |
498 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 499 | "on disk/prealloc", |
500 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
499 | BUG(); | 501 | BUG(); |
500 | } | 502 | } |
501 | } | 503 | } |
@@ -1125,7 +1127,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
1125 | grp = ext4_get_group_info(sb, group); | 1127 | grp = ext4_get_group_info(sb, group); |
1126 | 1128 | ||
1127 | e4b->bd_blkbits = sb->s_blocksize_bits; | 1129 | e4b->bd_blkbits = sb->s_blocksize_bits; |
1128 | e4b->bd_info = ext4_get_group_info(sb, group); | 1130 | e4b->bd_info = grp; |
1129 | e4b->bd_sb = sb; | 1131 | e4b->bd_sb = sb; |
1130 | e4b->bd_group = group; | 1132 | e4b->bd_group = group; |
1131 | e4b->bd_buddy_page = NULL; | 1133 | e4b->bd_buddy_page = NULL; |
@@ -1281,7 +1283,7 @@ static void mb_clear_bits(void *bm, int cur, int len) | |||
1281 | } | 1283 | } |
1282 | } | 1284 | } |
1283 | 1285 | ||
1284 | static void mb_set_bits(void *bm, int cur, int len) | 1286 | void ext4_set_bits(void *bm, int cur, int len) |
1285 | { | 1287 | { |
1286 | __u32 *addr; | 1288 | __u32 *addr; |
1287 | 1289 | ||
@@ -1510,7 +1512,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1510 | } | 1512 | } |
1511 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); | 1513 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); |
1512 | 1514 | ||
1513 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | 1515 | ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); |
1514 | mb_check_buddy(e4b); | 1516 | mb_check_buddy(e4b); |
1515 | 1517 | ||
1516 | return ret; | 1518 | return ret; |
@@ -2223,8 +2225,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2223 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2225 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2224 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2226 | meta_group_info = kmalloc(metalen, GFP_KERNEL); |
2225 | if (meta_group_info == NULL) { | 2227 | if (meta_group_info == NULL) { |
2226 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | 2228 | ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " |
2227 | "buddy group\n"); | 2229 | "for a buddy group"); |
2228 | goto exit_meta_group_info; | 2230 | goto exit_meta_group_info; |
2229 | } | 2231 | } |
2230 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = | 2232 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = |
@@ -2237,7 +2239,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2237 | 2239 | ||
2238 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); | 2240 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2239 | if (meta_group_info[i] == NULL) { | 2241 | if (meta_group_info[i] == NULL) { |
2240 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | 2242 | ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); |
2241 | goto exit_group_info; | 2243 | goto exit_group_info; |
2242 | } | 2244 | } |
2243 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | 2245 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); |
@@ -2279,8 +2281,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2279 | 2281 | ||
2280 | exit_group_info: | 2282 | exit_group_info: |
2281 | /* If a meta_group_info table has been allocated, release it now */ | 2283 | /* If a meta_group_info table has been allocated, release it now */ |
2282 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) | 2284 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { |
2283 | kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); | 2285 | kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); |
2286 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; | ||
2287 | } | ||
2284 | exit_meta_group_info: | 2288 | exit_meta_group_info: |
2285 | return -ENOMEM; | 2289 | return -ENOMEM; |
2286 | } /* ext4_mb_add_groupinfo */ | 2290 | } /* ext4_mb_add_groupinfo */ |
@@ -2328,23 +2332,26 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2328 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2332 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
2329 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2333 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
2330 | * So a two level scheme suffices for now. */ | 2334 | * So a two level scheme suffices for now. */ |
2331 | sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); | 2335 | sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); |
2332 | if (sbi->s_group_info == NULL) { | 2336 | if (sbi->s_group_info == NULL) { |
2333 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2337 | ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); |
2334 | return -ENOMEM; | 2338 | return -ENOMEM; |
2335 | } | 2339 | } |
2336 | sbi->s_buddy_cache = new_inode(sb); | 2340 | sbi->s_buddy_cache = new_inode(sb); |
2337 | if (sbi->s_buddy_cache == NULL) { | 2341 | if (sbi->s_buddy_cache == NULL) { |
2338 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); | 2342 | ext4_msg(sb, KERN_ERR, "can't get new inode"); |
2339 | goto err_freesgi; | 2343 | goto err_freesgi; |
2340 | } | 2344 | } |
2341 | sbi->s_buddy_cache->i_ino = get_next_ino(); | 2345 | /* To avoid potentially colliding with an valid on-disk inode number, |
2346 | * use EXT4_BAD_INO for the buddy cache inode number. This inode is | ||
2347 | * not in the inode hash, so it should never be found by iget(), but | ||
2348 | * this will avoid confusion if it ever shows up during debugging. */ | ||
2349 | sbi->s_buddy_cache->i_ino = EXT4_BAD_INO; | ||
2342 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2350 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2343 | for (i = 0; i < ngroups; i++) { | 2351 | for (i = 0; i < ngroups; i++) { |
2344 | desc = ext4_get_group_desc(sb, i, NULL); | 2352 | desc = ext4_get_group_desc(sb, i, NULL); |
2345 | if (desc == NULL) { | 2353 | if (desc == NULL) { |
2346 | printk(KERN_ERR | 2354 | ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i); |
2347 | "EXT4-fs: can't read descriptor %u\n", i); | ||
2348 | goto err_freebuddy; | 2355 | goto err_freebuddy; |
2349 | } | 2356 | } |
2350 | if (ext4_mb_add_groupinfo(sb, i, desc) != 0) | 2357 | if (ext4_mb_add_groupinfo(sb, i, desc) != 0) |
@@ -2362,7 +2369,7 @@ err_freebuddy: | |||
2362 | kfree(sbi->s_group_info[i]); | 2369 | kfree(sbi->s_group_info[i]); |
2363 | iput(sbi->s_buddy_cache); | 2370 | iput(sbi->s_buddy_cache); |
2364 | err_freesgi: | 2371 | err_freesgi: |
2365 | kfree(sbi->s_group_info); | 2372 | ext4_kvfree(sbi->s_group_info); |
2366 | return -ENOMEM; | 2373 | return -ENOMEM; |
2367 | } | 2374 | } |
2368 | 2375 | ||
@@ -2404,14 +2411,15 @@ static int ext4_groupinfo_create_slab(size_t size) | |||
2404 | slab_size, 0, SLAB_RECLAIM_ACCOUNT, | 2411 | slab_size, 0, SLAB_RECLAIM_ACCOUNT, |
2405 | NULL); | 2412 | NULL); |
2406 | 2413 | ||
2414 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2415 | |||
2407 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); | 2416 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); |
2408 | if (!cachep) { | 2417 | if (!cachep) { |
2409 | printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); | 2418 | printk(KERN_EMERG |
2419 | "EXT4-fs: no memory for groupinfo slab cache\n"); | ||
2410 | return -ENOMEM; | 2420 | return -ENOMEM; |
2411 | } | 2421 | } |
2412 | 2422 | ||
2413 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2414 | |||
2415 | return 0; | 2423 | return 0; |
2416 | } | 2424 | } |
2417 | 2425 | ||
@@ -2457,12 +2465,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2457 | i++; | 2465 | i++; |
2458 | } while (i <= sb->s_blocksize_bits + 1); | 2466 | } while (i <= sb->s_blocksize_bits + 1); |
2459 | 2467 | ||
2460 | /* init file for buddy data */ | ||
2461 | ret = ext4_mb_init_backend(sb); | ||
2462 | if (ret != 0) { | ||
2463 | goto out; | ||
2464 | } | ||
2465 | |||
2466 | spin_lock_init(&sbi->s_md_lock); | 2468 | spin_lock_init(&sbi->s_md_lock); |
2467 | spin_lock_init(&sbi->s_bal_lock); | 2469 | spin_lock_init(&sbi->s_bal_lock); |
2468 | 2470 | ||
@@ -2472,6 +2474,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2472 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; | 2474 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; |
2473 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; | 2475 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; |
2474 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2476 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2477 | /* | ||
2478 | * If there is a s_stripe > 1, then we set the s_mb_group_prealloc | ||
2479 | * to the lowest multiple of s_stripe which is bigger than | ||
2480 | * the s_mb_group_prealloc as determined above. We want | ||
2481 | * the preallocation size to be an exact multiple of the | ||
2482 | * RAID stripe size so that preallocations don't fragment | ||
2483 | * the stripes. | ||
2484 | */ | ||
2485 | if (sbi->s_stripe > 1) { | ||
2486 | sbi->s_mb_group_prealloc = roundup( | ||
2487 | sbi->s_mb_group_prealloc, sbi->s_stripe); | ||
2488 | } | ||
2475 | 2489 | ||
2476 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2490 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2477 | if (sbi->s_locality_groups == NULL) { | 2491 | if (sbi->s_locality_groups == NULL) { |
@@ -2487,6 +2501,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2487 | spin_lock_init(&lg->lg_prealloc_lock); | 2501 | spin_lock_init(&lg->lg_prealloc_lock); |
2488 | } | 2502 | } |
2489 | 2503 | ||
2504 | /* init file for buddy data */ | ||
2505 | ret = ext4_mb_init_backend(sb); | ||
2506 | if (ret != 0) { | ||
2507 | goto out; | ||
2508 | } | ||
2509 | |||
2490 | if (sbi->s_proc) | 2510 | if (sbi->s_proc) |
2491 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2511 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2492 | &ext4_mb_seq_groups_fops, sb); | 2512 | &ext4_mb_seq_groups_fops, sb); |
@@ -2544,32 +2564,32 @@ int ext4_mb_release(struct super_block *sb) | |||
2544 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2564 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2545 | for (i = 0; i < num_meta_group_infos; i++) | 2565 | for (i = 0; i < num_meta_group_infos; i++) |
2546 | kfree(sbi->s_group_info[i]); | 2566 | kfree(sbi->s_group_info[i]); |
2547 | kfree(sbi->s_group_info); | 2567 | ext4_kvfree(sbi->s_group_info); |
2548 | } | 2568 | } |
2549 | kfree(sbi->s_mb_offsets); | 2569 | kfree(sbi->s_mb_offsets); |
2550 | kfree(sbi->s_mb_maxs); | 2570 | kfree(sbi->s_mb_maxs); |
2551 | if (sbi->s_buddy_cache) | 2571 | if (sbi->s_buddy_cache) |
2552 | iput(sbi->s_buddy_cache); | 2572 | iput(sbi->s_buddy_cache); |
2553 | if (sbi->s_mb_stats) { | 2573 | if (sbi->s_mb_stats) { |
2554 | printk(KERN_INFO | 2574 | ext4_msg(sb, KERN_INFO, |
2555 | "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n", | 2575 | "mballoc: %u blocks %u reqs (%u success)", |
2556 | atomic_read(&sbi->s_bal_allocated), | 2576 | atomic_read(&sbi->s_bal_allocated), |
2557 | atomic_read(&sbi->s_bal_reqs), | 2577 | atomic_read(&sbi->s_bal_reqs), |
2558 | atomic_read(&sbi->s_bal_success)); | 2578 | atomic_read(&sbi->s_bal_success)); |
2559 | printk(KERN_INFO | 2579 | ext4_msg(sb, KERN_INFO, |
2560 | "EXT4-fs: mballoc: %u extents scanned, %u goal hits, " | 2580 | "mballoc: %u extents scanned, %u goal hits, " |
2561 | "%u 2^N hits, %u breaks, %u lost\n", | 2581 | "%u 2^N hits, %u breaks, %u lost", |
2562 | atomic_read(&sbi->s_bal_ex_scanned), | 2582 | atomic_read(&sbi->s_bal_ex_scanned), |
2563 | atomic_read(&sbi->s_bal_goals), | 2583 | atomic_read(&sbi->s_bal_goals), |
2564 | atomic_read(&sbi->s_bal_2orders), | 2584 | atomic_read(&sbi->s_bal_2orders), |
2565 | atomic_read(&sbi->s_bal_breaks), | 2585 | atomic_read(&sbi->s_bal_breaks), |
2566 | atomic_read(&sbi->s_mb_lost_chunks)); | 2586 | atomic_read(&sbi->s_mb_lost_chunks)); |
2567 | printk(KERN_INFO | 2587 | ext4_msg(sb, KERN_INFO, |
2568 | "EXT4-fs: mballoc: %lu generated and it took %Lu\n", | 2588 | "mballoc: %lu generated and it took %Lu", |
2569 | sbi->s_mb_buddies_generated++, | 2589 | sbi->s_mb_buddies_generated, |
2570 | sbi->s_mb_generation_time); | 2590 | sbi->s_mb_generation_time); |
2571 | printk(KERN_INFO | 2591 | ext4_msg(sb, KERN_INFO, |
2572 | "EXT4-fs: mballoc: %u preallocated, %u discarded\n", | 2592 | "mballoc: %u preallocated, %u discarded", |
2573 | atomic_read(&sbi->s_mb_preallocated), | 2593 | atomic_read(&sbi->s_mb_preallocated), |
2574 | atomic_read(&sbi->s_mb_discarded)); | 2594 | atomic_read(&sbi->s_mb_discarded)); |
2575 | } | 2595 | } |
@@ -2628,6 +2648,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2628 | rb_erase(&entry->node, &(db->bb_free_root)); | 2648 | rb_erase(&entry->node, &(db->bb_free_root)); |
2629 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); | 2649 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
2630 | 2650 | ||
2651 | /* | ||
2652 | * Clear the trimmed flag for the group so that the next | ||
2653 | * ext4_trim_fs can trim it. | ||
2654 | * If the volume is mounted with -o discard, online discard | ||
2655 | * is supported and the free blocks will be trimmed online. | ||
2656 | */ | ||
2657 | if (!test_opt(sb, DISCARD)) | ||
2658 | EXT4_MB_GRP_CLEAR_TRIMMED(db); | ||
2659 | |||
2631 | if (!db->bb_free_root.rb_node) { | 2660 | if (!db->bb_free_root.rb_node) { |
2632 | /* No more items in the per group rb tree | 2661 | /* No more items in the per group rb tree |
2633 | * balance refcounts from ext4_mb_free_metadata() | 2662 | * balance refcounts from ext4_mb_free_metadata() |
@@ -2771,8 +2800,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2771 | * We leak some of the blocks here. | 2800 | * We leak some of the blocks here. |
2772 | */ | 2801 | */ |
2773 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); | 2802 | ext4_lock_group(sb, ac->ac_b_ex.fe_group); |
2774 | mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, | 2803 | ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, |
2775 | ac->ac_b_ex.fe_len); | 2804 | ac->ac_b_ex.fe_len); |
2776 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | 2805 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); |
2777 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 2806 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
2778 | if (!err) | 2807 | if (!err) |
@@ -2790,7 +2819,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2790 | } | 2819 | } |
2791 | } | 2820 | } |
2792 | #endif | 2821 | #endif |
2793 | mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); | 2822 | ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, |
2823 | ac->ac_b_ex.fe_len); | ||
2794 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 2824 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
2795 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 2825 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
2796 | ext4_free_blks_set(sb, gdp, | 2826 | ext4_free_blks_set(sb, gdp, |
@@ -2830,8 +2860,9 @@ out_err: | |||
2830 | 2860 | ||
2831 | /* | 2861 | /* |
2832 | * here we normalize request for locality group | 2862 | * here we normalize request for locality group |
2833 | * Group request are normalized to s_strip size if we set the same via mount | 2863 | * Group request are normalized to s_mb_group_prealloc, which goes to |
2834 | * option. If not we set it to s_mb_group_prealloc which can be configured via | 2864 | * s_strip if we set the same via mount option. |
2865 | * s_mb_group_prealloc can be configured via | ||
2835 | * /sys/fs/ext4/<partition>/mb_group_prealloc | 2866 | * /sys/fs/ext4/<partition>/mb_group_prealloc |
2836 | * | 2867 | * |
2837 | * XXX: should we try to preallocate more than the group has now? | 2868 | * XXX: should we try to preallocate more than the group has now? |
@@ -2842,10 +2873,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) | |||
2842 | struct ext4_locality_group *lg = ac->ac_lg; | 2873 | struct ext4_locality_group *lg = ac->ac_lg; |
2843 | 2874 | ||
2844 | BUG_ON(lg == NULL); | 2875 | BUG_ON(lg == NULL); |
2845 | if (EXT4_SB(sb)->s_stripe) | 2876 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; |
2846 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; | ||
2847 | else | ||
2848 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; | ||
2849 | mb_debug(1, "#%u: goal %u blocks for locality group\n", | 2877 | mb_debug(1, "#%u: goal %u blocks for locality group\n", |
2850 | current->pid, ac->ac_g_ex.fe_len); | 2878 | current->pid, ac->ac_g_ex.fe_len); |
2851 | } | 2879 | } |
@@ -3001,9 +3029,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3001 | 3029 | ||
3002 | if (start + size <= ac->ac_o_ex.fe_logical && | 3030 | if (start + size <= ac->ac_o_ex.fe_logical && |
3003 | start > ac->ac_o_ex.fe_logical) { | 3031 | start > ac->ac_o_ex.fe_logical) { |
3004 | printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n", | 3032 | ext4_msg(ac->ac_sb, KERN_ERR, |
3005 | (unsigned long) start, (unsigned long) size, | 3033 | "start %lu, size %lu, fe_logical %lu", |
3006 | (unsigned long) ac->ac_o_ex.fe_logical); | 3034 | (unsigned long) start, (unsigned long) size, |
3035 | (unsigned long) ac->ac_o_ex.fe_logical); | ||
3007 | } | 3036 | } |
3008 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && | 3037 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && |
3009 | start > ac->ac_o_ex.fe_logical); | 3038 | start > ac->ac_o_ex.fe_logical); |
@@ -3262,7 +3291,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3262 | 3291 | ||
3263 | while (n) { | 3292 | while (n) { |
3264 | entry = rb_entry(n, struct ext4_free_data, node); | 3293 | entry = rb_entry(n, struct ext4_free_data, node); |
3265 | mb_set_bits(bitmap, entry->start_blk, entry->count); | 3294 | ext4_set_bits(bitmap, entry->start_blk, entry->count); |
3266 | n = rb_next(n); | 3295 | n = rb_next(n); |
3267 | } | 3296 | } |
3268 | return; | 3297 | return; |
@@ -3304,7 +3333,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3304 | if (unlikely(len == 0)) | 3333 | if (unlikely(len == 0)) |
3305 | continue; | 3334 | continue; |
3306 | BUG_ON(groupnr != group); | 3335 | BUG_ON(groupnr != group); |
3307 | mb_set_bits(bitmap, start, len); | 3336 | ext4_set_bits(bitmap, start, len); |
3308 | preallocated += len; | 3337 | preallocated += len; |
3309 | count++; | 3338 | count++; |
3310 | } | 3339 | } |
@@ -3584,10 +3613,11 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3584 | bit = next + 1; | 3613 | bit = next + 1; |
3585 | } | 3614 | } |
3586 | if (free != pa->pa_free) { | 3615 | if (free != pa->pa_free) { |
3587 | printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", | 3616 | ext4_msg(e4b->bd_sb, KERN_CRIT, |
3588 | pa, (unsigned long) pa->pa_lstart, | 3617 | "pa %p: logic %lu, phys. %lu, len %lu", |
3589 | (unsigned long) pa->pa_pstart, | 3618 | pa, (unsigned long) pa->pa_lstart, |
3590 | (unsigned long) pa->pa_len); | 3619 | (unsigned long) pa->pa_pstart, |
3620 | (unsigned long) pa->pa_len); | ||
3591 | ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", | 3621 | ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", |
3592 | free, pa->pa_free); | 3622 | free, pa->pa_free); |
3593 | /* | 3623 | /* |
@@ -3775,7 +3805,8 @@ repeat: | |||
3775 | * use preallocation while we're discarding it */ | 3805 | * use preallocation while we're discarding it */ |
3776 | spin_unlock(&pa->pa_lock); | 3806 | spin_unlock(&pa->pa_lock); |
3777 | spin_unlock(&ei->i_prealloc_lock); | 3807 | spin_unlock(&ei->i_prealloc_lock); |
3778 | printk(KERN_ERR "uh-oh! used pa while discarding\n"); | 3808 | ext4_msg(sb, KERN_ERR, |
3809 | "uh-oh! used pa while discarding"); | ||
3779 | WARN_ON(1); | 3810 | WARN_ON(1); |
3780 | schedule_timeout_uninterruptible(HZ); | 3811 | schedule_timeout_uninterruptible(HZ); |
3781 | goto repeat; | 3812 | goto repeat; |
@@ -3852,12 +3883,13 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3852 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | 3883 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) |
3853 | return; | 3884 | return; |
3854 | 3885 | ||
3855 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3886 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" |
3856 | " Allocation context details:\n"); | 3887 | " Allocation context details:"); |
3857 | printk(KERN_ERR "EXT4-fs: status %d flags %d\n", | 3888 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", |
3858 | ac->ac_status, ac->ac_flags); | 3889 | ac->ac_status, ac->ac_flags); |
3859 | printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " | 3890 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " |
3860 | "best %lu/%lu/%lu@%lu cr %d\n", | 3891 | "goal %lu/%lu/%lu@%lu, " |
3892 | "best %lu/%lu/%lu@%lu cr %d", | ||
3861 | (unsigned long)ac->ac_o_ex.fe_group, | 3893 | (unsigned long)ac->ac_o_ex.fe_group, |
3862 | (unsigned long)ac->ac_o_ex.fe_start, | 3894 | (unsigned long)ac->ac_o_ex.fe_start, |
3863 | (unsigned long)ac->ac_o_ex.fe_len, | 3895 | (unsigned long)ac->ac_o_ex.fe_len, |
@@ -3871,9 +3903,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3871 | (unsigned long)ac->ac_b_ex.fe_len, | 3903 | (unsigned long)ac->ac_b_ex.fe_len, |
3872 | (unsigned long)ac->ac_b_ex.fe_logical, | 3904 | (unsigned long)ac->ac_b_ex.fe_logical, |
3873 | (int)ac->ac_criteria); | 3905 | (int)ac->ac_criteria); |
3874 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, | 3906 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", |
3875 | ac->ac_found); | 3907 | ac->ac_ex_scanned, ac->ac_found); |
3876 | printk(KERN_ERR "EXT4-fs: groups: \n"); | 3908 | ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); |
3877 | ngroups = ext4_get_groups_count(sb); | 3909 | ngroups = ext4_get_groups_count(sb); |
3878 | for (i = 0; i < ngroups; i++) { | 3910 | for (i = 0; i < ngroups; i++) { |
3879 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); | 3911 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
@@ -4637,7 +4669,7 @@ do_more: | |||
4637 | } | 4669 | } |
4638 | ext4_mark_super_dirty(sb); | 4670 | ext4_mark_super_dirty(sb); |
4639 | error_return: | 4671 | error_return: |
4640 | if (freed) | 4672 | if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) |
4641 | dquot_free_block(inode, freed); | 4673 | dquot_free_block(inode, freed); |
4642 | brelse(bitmap_bh); | 4674 | brelse(bitmap_bh); |
4643 | ext4_std_error(sb, err); | 4675 | ext4_std_error(sb, err); |
@@ -4645,7 +4677,7 @@ error_return: | |||
4645 | } | 4677 | } |
4646 | 4678 | ||
4647 | /** | 4679 | /** |
4648 | * ext4_add_groupblocks() -- Add given blocks to an existing group | 4680 | * ext4_group_add_blocks() -- Add given blocks to an existing group |
4649 | * @handle: handle to this transaction | 4681 | * @handle: handle to this transaction |
4650 | * @sb: super block | 4682 | * @sb: super block |
4651 | * @block: start physcial block to add to the block group | 4683 | * @block: start physcial block to add to the block group |
@@ -4653,7 +4685,7 @@ error_return: | |||
4653 | * | 4685 | * |
4654 | * This marks the blocks as free in the bitmap and buddy. | 4686 | * This marks the blocks as free in the bitmap and buddy. |
4655 | */ | 4687 | */ |
4656 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 4688 | int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, |
4657 | ext4_fsblk_t block, unsigned long count) | 4689 | ext4_fsblk_t block, unsigned long count) |
4658 | { | 4690 | { |
4659 | struct buffer_head *bitmap_bh = NULL; | 4691 | struct buffer_head *bitmap_bh = NULL; |
@@ -4666,25 +4698,35 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
4666 | struct ext4_buddy e4b; | 4698 | struct ext4_buddy e4b; |
4667 | int err = 0, ret, blk_free_count; | 4699 | int err = 0, ret, blk_free_count; |
4668 | ext4_grpblk_t blocks_freed; | 4700 | ext4_grpblk_t blocks_freed; |
4669 | struct ext4_group_info *grp; | ||
4670 | 4701 | ||
4671 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | 4702 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); |
4672 | 4703 | ||
4704 | if (count == 0) | ||
4705 | return 0; | ||
4706 | |||
4673 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4707 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
4674 | grp = ext4_get_group_info(sb, block_group); | ||
4675 | /* | 4708 | /* |
4676 | * Check to see if we are freeing blocks across a group | 4709 | * Check to see if we are freeing blocks across a group |
4677 | * boundary. | 4710 | * boundary. |
4678 | */ | 4711 | */ |
4679 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) | 4712 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { |
4713 | ext4_warning(sb, "too much blocks added to group %u\n", | ||
4714 | block_group); | ||
4715 | err = -EINVAL; | ||
4680 | goto error_return; | 4716 | goto error_return; |
4717 | } | ||
4681 | 4718 | ||
4682 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 4719 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
4683 | if (!bitmap_bh) | 4720 | if (!bitmap_bh) { |
4721 | err = -EIO; | ||
4684 | goto error_return; | 4722 | goto error_return; |
4723 | } | ||
4724 | |||
4685 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); | 4725 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); |
4686 | if (!desc) | 4726 | if (!desc) { |
4727 | err = -EIO; | ||
4687 | goto error_return; | 4728 | goto error_return; |
4729 | } | ||
4688 | 4730 | ||
4689 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || | 4731 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || |
4690 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | 4732 | in_range(ext4_inode_bitmap(sb, desc), block, count) || |
@@ -4694,6 +4736,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
4694 | ext4_error(sb, "Adding blocks in system zones - " | 4736 | ext4_error(sb, "Adding blocks in system zones - " |
4695 | "Block = %llu, count = %lu", | 4737 | "Block = %llu, count = %lu", |
4696 | block, count); | 4738 | block, count); |
4739 | err = -EINVAL; | ||
4697 | goto error_return; | 4740 | goto error_return; |
4698 | } | 4741 | } |
4699 | 4742 | ||
@@ -4762,7 +4805,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
4762 | error_return: | 4805 | error_return: |
4763 | brelse(bitmap_bh); | 4806 | brelse(bitmap_bh); |
4764 | ext4_std_error(sb, err); | 4807 | ext4_std_error(sb, err); |
4765 | return; | 4808 | return err; |
4766 | } | 4809 | } |
4767 | 4810 | ||
4768 | /** | 4811 | /** |
@@ -4782,6 +4825,8 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4782 | { | 4825 | { |
4783 | struct ext4_free_extent ex; | 4826 | struct ext4_free_extent ex; |
4784 | 4827 | ||
4828 | trace_ext4_trim_extent(sb, group, start, count); | ||
4829 | |||
4785 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | 4830 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); |
4786 | 4831 | ||
4787 | ex.fe_start = start; | 4832 | ex.fe_start = start; |
@@ -4802,7 +4847,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4802 | /** | 4847 | /** |
4803 | * ext4_trim_all_free -- function to trim all free space in alloc. group | 4848 | * ext4_trim_all_free -- function to trim all free space in alloc. group |
4804 | * @sb: super block for file system | 4849 | * @sb: super block for file system |
4805 | * @e4b: ext4 buddy | 4850 | * @group: group to be trimmed |
4806 | * @start: first group block to examine | 4851 | * @start: first group block to examine |
4807 | * @max: last group block to examine | 4852 | * @max: last group block to examine |
4808 | * @minblocks: minimum extent block count | 4853 | * @minblocks: minimum extent block count |
@@ -4823,10 +4868,12 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4823 | ext4_grpblk_t minblocks) | 4868 | ext4_grpblk_t minblocks) |
4824 | { | 4869 | { |
4825 | void *bitmap; | 4870 | void *bitmap; |
4826 | ext4_grpblk_t next, count = 0; | 4871 | ext4_grpblk_t next, count = 0, free_count = 0; |
4827 | struct ext4_buddy e4b; | 4872 | struct ext4_buddy e4b; |
4828 | int ret; | 4873 | int ret; |
4829 | 4874 | ||
4875 | trace_ext4_trim_all_free(sb, group, start, max); | ||
4876 | |||
4830 | ret = ext4_mb_load_buddy(sb, group, &e4b); | 4877 | ret = ext4_mb_load_buddy(sb, group, &e4b); |
4831 | if (ret) { | 4878 | if (ret) { |
4832 | ext4_error(sb, "Error in loading buddy " | 4879 | ext4_error(sb, "Error in loading buddy " |
@@ -4836,6 +4883,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4836 | bitmap = e4b.bd_bitmap; | 4883 | bitmap = e4b.bd_bitmap; |
4837 | 4884 | ||
4838 | ext4_lock_group(sb, group); | 4885 | ext4_lock_group(sb, group); |
4886 | if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && | ||
4887 | minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) | ||
4888 | goto out; | ||
4889 | |||
4839 | start = (e4b.bd_info->bb_first_free > start) ? | 4890 | start = (e4b.bd_info->bb_first_free > start) ? |
4840 | e4b.bd_info->bb_first_free : start; | 4891 | e4b.bd_info->bb_first_free : start; |
4841 | 4892 | ||
@@ -4850,6 +4901,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4850 | next - start, group, &e4b); | 4901 | next - start, group, &e4b); |
4851 | count += next - start; | 4902 | count += next - start; |
4852 | } | 4903 | } |
4904 | free_count += next - start; | ||
4853 | start = next + 1; | 4905 | start = next + 1; |
4854 | 4906 | ||
4855 | if (fatal_signal_pending(current)) { | 4907 | if (fatal_signal_pending(current)) { |
@@ -4863,9 +4915,13 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | |||
4863 | ext4_lock_group(sb, group); | 4915 | ext4_lock_group(sb, group); |
4864 | } | 4916 | } |
4865 | 4917 | ||
4866 | if ((e4b.bd_info->bb_free - count) < minblocks) | 4918 | if ((e4b.bd_info->bb_free - free_count) < minblocks) |
4867 | break; | 4919 | break; |
4868 | } | 4920 | } |
4921 | |||
4922 | if (!ret) | ||
4923 | EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); | ||
4924 | out: | ||
4869 | ext4_unlock_group(sb, group); | 4925 | ext4_unlock_group(sb, group); |
4870 | ext4_mb_unload_buddy(&e4b); | 4926 | ext4_mb_unload_buddy(&e4b); |
4871 | 4927 | ||
@@ -4904,6 +4960,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4904 | 4960 | ||
4905 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | 4961 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) |
4906 | return -EINVAL; | 4962 | return -EINVAL; |
4963 | if (start + len <= first_data_blk) | ||
4964 | goto out; | ||
4907 | if (start < first_data_blk) { | 4965 | if (start < first_data_blk) { |
4908 | len -= first_data_blk - start; | 4966 | len -= first_data_blk - start; |
4909 | start = first_data_blk; | 4967 | start = first_data_blk; |
@@ -4952,5 +5010,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4952 | } | 5010 | } |
4953 | range->len = trimmed * sb->s_blocksize; | 5011 | range->len = trimmed * sb->s_blocksize; |
4954 | 5012 | ||
5013 | if (!ret) | ||
5014 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); | ||
5015 | |||
5016 | out: | ||
4955 | return ret; | 5017 | return ret; |
4956 | } | 5018 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 20b5e7bfebd1..9d4a636b546c 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -187,7 +187,6 @@ struct ext4_allocation_context { | |||
187 | __u16 ac_flags; /* allocation hints */ | 187 | __u16 ac_flags; /* allocation hints */ |
188 | __u8 ac_status; | 188 | __u8 ac_status; |
189 | __u8 ac_criteria; | 189 | __u8 ac_criteria; |
190 | __u8 ac_repeats; | ||
191 | __u8 ac_2order; /* if request is to allocate 2^N blocks and | 190 | __u8 ac_2order; /* if request is to allocate 2^N blocks and |
192 | * N > 0, the field stores N, otherwise 0 */ | 191 | * N > 0, the field stores N, otherwise 0 */ |
193 | __u8 ac_op; /* operation, for history only */ | 192 | __u8 ac_op; /* operation, for history only */ |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8c9babac43dc..f8068c7bae9f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -289,7 +289,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent | |||
289 | while (len--) printk("%c", *name++); | 289 | while (len--) printk("%c", *name++); |
290 | ext4fs_dirhash(de->name, de->name_len, &h); | 290 | ext4fs_dirhash(de->name, de->name_len, &h); |
291 | printk(":%x.%u ", h.hash, | 291 | printk(":%x.%u ", h.hash, |
292 | ((char *) de - base)); | 292 | (unsigned) ((char *) de - base)); |
293 | } | 293 | } |
294 | space += EXT4_DIR_REC_LEN(de->name_len); | 294 | space += EXT4_DIR_REC_LEN(de->name_len); |
295 | names++; | 295 | names++; |
@@ -1013,7 +1013,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1013 | 1013 | ||
1014 | *err = -ENOENT; | 1014 | *err = -ENOENT; |
1015 | errout: | 1015 | errout: |
1016 | dxtrace(printk(KERN_DEBUG "%s not found\n", name)); | 1016 | dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); |
1017 | dx_release (frames); | 1017 | dx_release (frames); |
1018 | return NULL; | 1018 | return NULL; |
1019 | } | 1019 | } |
@@ -1985,18 +1985,11 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
1985 | if (!list_empty(&EXT4_I(inode)->i_orphan)) | 1985 | if (!list_empty(&EXT4_I(inode)->i_orphan)) |
1986 | goto out_unlock; | 1986 | goto out_unlock; |
1987 | 1987 | ||
1988 | /* Orphan handling is only valid for files with data blocks | 1988 | /* |
1989 | * being truncated, or files being unlinked. */ | 1989 | * Orphan handling is only valid for files with data blocks |
1990 | 1990 | * being truncated, or files being unlinked. Note that we either | |
1991 | /* @@@ FIXME: Observation from aviro: | 1991 | * hold i_mutex, or the inode can not be referenced from outside, |
1992 | * I think I can trigger J_ASSERT in ext4_orphan_add(). We block | 1992 | * so i_nlink should not be bumped due to race |
1993 | * here (on s_orphan_lock), so race with ext4_link() which might bump | ||
1994 | * ->i_nlink. For, say it, character device. Not a regular file, | ||
1995 | * not a directory, not a symlink and ->i_nlink > 0. | ||
1996 | * | ||
1997 | * tytso, 4/25/2009: I'm not sure how that could happen; | ||
1998 | * shouldn't the fs core protect us from these sort of | ||
1999 | * unlink()/link() races? | ||
2000 | */ | 1993 | */ |
2001 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1994 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
2002 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 1995 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
@@ -2260,9 +2253,11 @@ static int ext4_symlink(struct inode *dir, | |||
2260 | /* | 2253 | /* |
2261 | * For non-fast symlinks, we just allocate inode and put it on | 2254 | * For non-fast symlinks, we just allocate inode and put it on |
2262 | * orphan list in the first transaction => we need bitmap, | 2255 | * orphan list in the first transaction => we need bitmap, |
2263 | * group descriptor, sb, inode block, quota blocks. | 2256 | * group descriptor, sb, inode block, quota blocks, and |
2257 | * possibly selinux xattr blocks. | ||
2264 | */ | 2258 | */ |
2265 | credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | 2259 | credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + |
2260 | EXT4_XATTR_TRANS_BLOCKS; | ||
2266 | } else { | 2261 | } else { |
2267 | /* | 2262 | /* |
2268 | * Fast symlink. We have to add entry to directory | 2263 | * Fast symlink. We have to add entry to directory |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7bb8f76d470a..92f38ee13f8a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -142,7 +142,23 @@ static void ext4_end_io_work(struct work_struct *work) | |||
142 | unsigned long flags; | 142 | unsigned long flags; |
143 | int ret; | 143 | int ret; |
144 | 144 | ||
145 | mutex_lock(&inode->i_mutex); | 145 | if (!mutex_trylock(&inode->i_mutex)) { |
146 | /* | ||
147 | * Requeue the work instead of waiting so that the work | ||
148 | * items queued after this can be processed. | ||
149 | */ | ||
150 | queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); | ||
151 | /* | ||
152 | * To prevent the ext4-dio-unwritten thread from keeping | ||
153 | * requeueing end_io requests and occupying cpu for too long, | ||
154 | * yield the cpu if it sees an end_io request that has already | ||
155 | * been requeued. | ||
156 | */ | ||
157 | if (io->flag & EXT4_IO_END_QUEUED) | ||
158 | yield(); | ||
159 | io->flag |= EXT4_IO_END_QUEUED; | ||
160 | return; | ||
161 | } | ||
146 | ret = ext4_end_io_nolock(io); | 162 | ret = ext4_end_io_nolock(io); |
147 | if (ret < 0) { | 163 | if (ret < 0) { |
148 | mutex_unlock(&inode->i_mutex); | 164 | mutex_unlock(&inode->i_mutex); |
@@ -285,11 +301,7 @@ static int io_submit_init(struct ext4_io_submit *io, | |||
285 | io_end = ext4_init_io_end(inode, GFP_NOFS); | 301 | io_end = ext4_init_io_end(inode, GFP_NOFS); |
286 | if (!io_end) | 302 | if (!io_end) |
287 | return -ENOMEM; | 303 | return -ENOMEM; |
288 | do { | 304 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); |
289 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
290 | nvecs >>= 1; | ||
291 | } while (bio == NULL); | ||
292 | |||
293 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 305 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
294 | bio->bi_bdev = bh->b_bdev; | 306 | bio->bi_bdev = bh->b_bdev; |
295 | bio->bi_private = io->io_end = io_end; | 307 | bio->bi_private = io->io_end = io_end; |
@@ -338,8 +350,10 @@ submit_and_retry: | |||
338 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | 350 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && |
339 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | 351 | (io_end->pages[io_end->num_io_pages-1] != io_page)) |
340 | goto submit_and_retry; | 352 | goto submit_and_retry; |
341 | if (buffer_uninit(bh)) | 353 | if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
342 | io->io_end->flag |= EXT4_IO_END_UNWRITTEN; | 354 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
355 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
356 | } | ||
343 | io->io_end->size += bh->b_size; | 357 | io->io_end->size += bh->b_size; |
344 | io->io_next_block++; | 358 | io->io_next_block++; |
345 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 359 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 80bbc9c60c24..707d3f16f7ce 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -16,6 +16,35 @@ | |||
16 | 16 | ||
17 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
18 | 18 | ||
19 | int ext4_resize_begin(struct super_block *sb) | ||
20 | { | ||
21 | int ret = 0; | ||
22 | |||
23 | if (!capable(CAP_SYS_RESOURCE)) | ||
24 | return -EPERM; | ||
25 | |||
26 | /* | ||
27 | * We are not allowed to do online-resizing on a filesystem mounted | ||
28 | * with error, because it can destroy the filesystem easily. | ||
29 | */ | ||
30 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | ||
31 | ext4_warning(sb, "There are errors in the filesystem, " | ||
32 | "so online resizing is not allowed\n"); | ||
33 | return -EPERM; | ||
34 | } | ||
35 | |||
36 | if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) | ||
37 | ret = -EBUSY; | ||
38 | |||
39 | return ret; | ||
40 | } | ||
41 | |||
42 | void ext4_resize_end(struct super_block *sb) | ||
43 | { | ||
44 | clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); | ||
45 | smp_mb__after_clear_bit(); | ||
46 | } | ||
47 | |||
19 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) | 48 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) |
20 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) | 49 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) |
21 | 50 | ||
@@ -118,10 +147,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, | |||
118 | brelse(bh); | 147 | brelse(bh); |
119 | bh = ERR_PTR(err); | 148 | bh = ERR_PTR(err); |
120 | } else { | 149 | } else { |
121 | lock_buffer(bh); | ||
122 | memset(bh->b_data, 0, sb->s_blocksize); | 150 | memset(bh->b_data, 0, sb->s_blocksize); |
123 | set_buffer_uptodate(bh); | 151 | set_buffer_uptodate(bh); |
124 | unlock_buffer(bh); | ||
125 | } | 152 | } |
126 | 153 | ||
127 | return bh; | 154 | return bh; |
@@ -132,8 +159,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, | |||
132 | * If that fails, restart the transaction & regain write access for the | 159 | * If that fails, restart the transaction & regain write access for the |
133 | * buffer head which is used for block_bitmap modifications. | 160 | * buffer head which is used for block_bitmap modifications. |
134 | */ | 161 | */ |
135 | static int extend_or_restart_transaction(handle_t *handle, int thresh, | 162 | static int extend_or_restart_transaction(handle_t *handle, int thresh) |
136 | struct buffer_head *bh) | ||
137 | { | 163 | { |
138 | int err; | 164 | int err; |
139 | 165 | ||
@@ -144,9 +170,8 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh, | |||
144 | if (err < 0) | 170 | if (err < 0) |
145 | return err; | 171 | return err; |
146 | if (err) { | 172 | if (err) { |
147 | if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) | 173 | err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA); |
148 | return err; | 174 | if (err) |
149 | if ((err = ext4_journal_get_write_access(handle, bh))) | ||
150 | return err; | 175 | return err; |
151 | } | 176 | } |
152 | 177 | ||
@@ -181,21 +206,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
181 | if (IS_ERR(handle)) | 206 | if (IS_ERR(handle)) |
182 | return PTR_ERR(handle); | 207 | return PTR_ERR(handle); |
183 | 208 | ||
184 | mutex_lock(&sbi->s_resize_lock); | 209 | BUG_ON(input->group != sbi->s_groups_count); |
185 | if (input->group != sbi->s_groups_count) { | ||
186 | err = -EBUSY; | ||
187 | goto exit_journal; | ||
188 | } | ||
189 | |||
190 | if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { | ||
191 | err = PTR_ERR(bh); | ||
192 | goto exit_journal; | ||
193 | } | ||
194 | |||
195 | if (ext4_bg_has_super(sb, input->group)) { | ||
196 | ext4_debug("mark backup superblock %#04llx (+0)\n", start); | ||
197 | ext4_set_bit(0, bh->b_data); | ||
198 | } | ||
199 | 210 | ||
200 | /* Copy all of the GDT blocks into the backup in this group */ | 211 | /* Copy all of the GDT blocks into the backup in this group */ |
201 | for (i = 0, bit = 1, block = start + 1; | 212 | for (i = 0, bit = 1, block = start + 1; |
@@ -203,29 +214,26 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
203 | struct buffer_head *gdb; | 214 | struct buffer_head *gdb; |
204 | 215 | ||
205 | ext4_debug("update backup group %#04llx (+%d)\n", block, bit); | 216 | ext4_debug("update backup group %#04llx (+%d)\n", block, bit); |
206 | 217 | err = extend_or_restart_transaction(handle, 1); | |
207 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | 218 | if (err) |
208 | goto exit_bh; | 219 | goto exit_journal; |
209 | 220 | ||
210 | gdb = sb_getblk(sb, block); | 221 | gdb = sb_getblk(sb, block); |
211 | if (!gdb) { | 222 | if (!gdb) { |
212 | err = -EIO; | 223 | err = -EIO; |
213 | goto exit_bh; | 224 | goto exit_journal; |
214 | } | 225 | } |
215 | if ((err = ext4_journal_get_write_access(handle, gdb))) { | 226 | if ((err = ext4_journal_get_write_access(handle, gdb))) { |
216 | brelse(gdb); | 227 | brelse(gdb); |
217 | goto exit_bh; | 228 | goto exit_journal; |
218 | } | 229 | } |
219 | lock_buffer(gdb); | ||
220 | memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); | 230 | memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); |
221 | set_buffer_uptodate(gdb); | 231 | set_buffer_uptodate(gdb); |
222 | unlock_buffer(gdb); | ||
223 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); | 232 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); |
224 | if (unlikely(err)) { | 233 | if (unlikely(err)) { |
225 | brelse(gdb); | 234 | brelse(gdb); |
226 | goto exit_bh; | 235 | goto exit_journal; |
227 | } | 236 | } |
228 | ext4_set_bit(bit, bh->b_data); | ||
229 | brelse(gdb); | 237 | brelse(gdb); |
230 | } | 238 | } |
231 | 239 | ||
@@ -235,9 +243,22 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, | 243 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
236 | GFP_NOFS); | 244 | GFP_NOFS); |
237 | if (err) | 245 | if (err) |
238 | goto exit_bh; | 246 | goto exit_journal; |
239 | for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) | 247 | |
240 | ext4_set_bit(bit, bh->b_data); | 248 | err = extend_or_restart_transaction(handle, 2); |
249 | if (err) | ||
250 | goto exit_journal; | ||
251 | |||
252 | bh = bclean(handle, sb, input->block_bitmap); | ||
253 | if (IS_ERR(bh)) { | ||
254 | err = PTR_ERR(bh); | ||
255 | goto exit_journal; | ||
256 | } | ||
257 | |||
258 | if (ext4_bg_has_super(sb, input->group)) { | ||
259 | ext4_debug("mark backup group tables %#04llx (+0)\n", start); | ||
260 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); | ||
261 | } | ||
241 | 262 | ||
242 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | 263 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, |
243 | input->block_bitmap - start); | 264 | input->block_bitmap - start); |
@@ -253,12 +274,9 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | 274 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); |
254 | if (err) | 275 | if (err) |
255 | goto exit_bh; | 276 | goto exit_bh; |
256 | for (i = 0, bit = input->inode_table - start; | 277 | ext4_set_bits(bh->b_data, input->inode_table - start, |
257 | i < sbi->s_itb_per_group; i++, bit++) | 278 | sbi->s_itb_per_group); |
258 | ext4_set_bit(bit, bh->b_data); | ||
259 | 279 | ||
260 | if ((err = extend_or_restart_transaction(handle, 2, bh))) | ||
261 | goto exit_bh; | ||
262 | 280 | ||
263 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, | 281 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, |
264 | bh->b_data); | 282 | bh->b_data); |
@@ -285,7 +303,6 @@ exit_bh: | |||
285 | brelse(bh); | 303 | brelse(bh); |
286 | 304 | ||
287 | exit_journal: | 305 | exit_journal: |
288 | mutex_unlock(&sbi->s_resize_lock); | ||
289 | if ((err2 = ext4_journal_stop(handle)) && !err) | 306 | if ((err2 = ext4_journal_stop(handle)) && !err) |
290 | err = err2; | 307 | err = err2; |
291 | 308 | ||
@@ -377,15 +394,15 @@ static int verify_reserved_gdb(struct super_block *sb, | |||
377 | * fail once we start modifying the data on disk, because JBD has no rollback. | 394 | * fail once we start modifying the data on disk, because JBD has no rollback. |
378 | */ | 395 | */ |
379 | static int add_new_gdb(handle_t *handle, struct inode *inode, | 396 | static int add_new_gdb(handle_t *handle, struct inode *inode, |
380 | struct ext4_new_group_data *input, | 397 | ext4_group_t group) |
381 | struct buffer_head **primary) | ||
382 | { | 398 | { |
383 | struct super_block *sb = inode->i_sb; | 399 | struct super_block *sb = inode->i_sb; |
384 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 400 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
385 | unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 401 | unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); |
386 | ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; | 402 | ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; |
387 | struct buffer_head **o_group_desc, **n_group_desc; | 403 | struct buffer_head **o_group_desc, **n_group_desc; |
388 | struct buffer_head *dind; | 404 | struct buffer_head *dind; |
405 | struct buffer_head *gdb_bh; | ||
389 | int gdbackups; | 406 | int gdbackups; |
390 | struct ext4_iloc iloc; | 407 | struct ext4_iloc iloc; |
391 | __le32 *data; | 408 | __le32 *data; |
@@ -408,11 +425,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
408 | return -EPERM; | 425 | return -EPERM; |
409 | } | 426 | } |
410 | 427 | ||
411 | *primary = sb_bread(sb, gdblock); | 428 | gdb_bh = sb_bread(sb, gdblock); |
412 | if (!*primary) | 429 | if (!gdb_bh) |
413 | return -EIO; | 430 | return -EIO; |
414 | 431 | ||
415 | if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { | 432 | gdbackups = verify_reserved_gdb(sb, gdb_bh); |
433 | if (gdbackups < 0) { | ||
416 | err = gdbackups; | 434 | err = gdbackups; |
417 | goto exit_bh; | 435 | goto exit_bh; |
418 | } | 436 | } |
@@ -427,7 +445,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
427 | data = (__le32 *)dind->b_data; | 445 | data = (__le32 *)dind->b_data; |
428 | if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { | 446 | if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { |
429 | ext4_warning(sb, "new group %u GDT block %llu not reserved", | 447 | ext4_warning(sb, "new group %u GDT block %llu not reserved", |
430 | input->group, gdblock); | 448 | group, gdblock); |
431 | err = -EINVAL; | 449 | err = -EINVAL; |
432 | goto exit_dind; | 450 | goto exit_dind; |
433 | } | 451 | } |
@@ -436,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
436 | if (unlikely(err)) | 454 | if (unlikely(err)) |
437 | goto exit_dind; | 455 | goto exit_dind; |
438 | 456 | ||
439 | err = ext4_journal_get_write_access(handle, *primary); | 457 | err = ext4_journal_get_write_access(handle, gdb_bh); |
440 | if (unlikely(err)) | 458 | if (unlikely(err)) |
441 | goto exit_sbh; | 459 | goto exit_sbh; |
442 | 460 | ||
@@ -449,12 +467,13 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
449 | if (unlikely(err)) | 467 | if (unlikely(err)) |
450 | goto exit_dindj; | 468 | goto exit_dindj; |
451 | 469 | ||
452 | n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), | 470 | n_group_desc = ext4_kvmalloc((gdb_num + 1) * |
453 | GFP_NOFS); | 471 | sizeof(struct buffer_head *), |
472 | GFP_NOFS); | ||
454 | if (!n_group_desc) { | 473 | if (!n_group_desc) { |
455 | err = -ENOMEM; | 474 | err = -ENOMEM; |
456 | ext4_warning(sb, | 475 | ext4_warning(sb, "not enough memory for %lu groups", |
457 | "not enough memory for %lu groups", gdb_num + 1); | 476 | gdb_num + 1); |
458 | goto exit_inode; | 477 | goto exit_inode; |
459 | } | 478 | } |
460 | 479 | ||
@@ -475,8 +494,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
475 | } | 494 | } |
476 | inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; | 495 | inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; |
477 | ext4_mark_iloc_dirty(handle, inode, &iloc); | 496 | ext4_mark_iloc_dirty(handle, inode, &iloc); |
478 | memset((*primary)->b_data, 0, sb->s_blocksize); | 497 | memset(gdb_bh->b_data, 0, sb->s_blocksize); |
479 | err = ext4_handle_dirty_metadata(handle, NULL, *primary); | 498 | err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); |
480 | if (unlikely(err)) { | 499 | if (unlikely(err)) { |
481 | ext4_std_error(sb, err); | 500 | ext4_std_error(sb, err); |
482 | goto exit_inode; | 501 | goto exit_inode; |
@@ -486,10 +505,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
486 | o_group_desc = EXT4_SB(sb)->s_group_desc; | 505 | o_group_desc = EXT4_SB(sb)->s_group_desc; |
487 | memcpy(n_group_desc, o_group_desc, | 506 | memcpy(n_group_desc, o_group_desc, |
488 | EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); | 507 | EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); |
489 | n_group_desc[gdb_num] = *primary; | 508 | n_group_desc[gdb_num] = gdb_bh; |
490 | EXT4_SB(sb)->s_group_desc = n_group_desc; | 509 | EXT4_SB(sb)->s_group_desc = n_group_desc; |
491 | EXT4_SB(sb)->s_gdb_count++; | 510 | EXT4_SB(sb)->s_gdb_count++; |
492 | kfree(o_group_desc); | 511 | ext4_kvfree(o_group_desc); |
493 | 512 | ||
494 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); | 513 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); |
495 | err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | 514 | err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); |
@@ -499,6 +518,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
499 | return err; | 518 | return err; |
500 | 519 | ||
501 | exit_inode: | 520 | exit_inode: |
521 | ext4_kvfree(n_group_desc); | ||
502 | /* ext4_handle_release_buffer(handle, iloc.bh); */ | 522 | /* ext4_handle_release_buffer(handle, iloc.bh); */ |
503 | brelse(iloc.bh); | 523 | brelse(iloc.bh); |
504 | exit_dindj: | 524 | exit_dindj: |
@@ -508,7 +528,7 @@ exit_sbh: | |||
508 | exit_dind: | 528 | exit_dind: |
509 | brelse(dind); | 529 | brelse(dind); |
510 | exit_bh: | 530 | exit_bh: |
511 | brelse(*primary); | 531 | brelse(gdb_bh); |
512 | 532 | ||
513 | ext4_debug("leaving with error %d\n", err); | 533 | ext4_debug("leaving with error %d\n", err); |
514 | return err; | 534 | return err; |
@@ -528,7 +548,7 @@ exit_bh: | |||
528 | * backup GDT blocks are stored in their reserved primary GDT block. | 548 | * backup GDT blocks are stored in their reserved primary GDT block. |
529 | */ | 549 | */ |
530 | static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | 550 | static int reserve_backup_gdb(handle_t *handle, struct inode *inode, |
531 | struct ext4_new_group_data *input) | 551 | ext4_group_t group) |
532 | { | 552 | { |
533 | struct super_block *sb = inode->i_sb; | 553 | struct super_block *sb = inode->i_sb; |
534 | int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); | 554 | int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); |
@@ -599,7 +619,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
599 | * Finally we can add each of the reserved backup GDT blocks from | 619 | * Finally we can add each of the reserved backup GDT blocks from |
600 | * the new group to its reserved primary GDT block. | 620 | * the new group to its reserved primary GDT block. |
601 | */ | 621 | */ |
602 | blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); | 622 | blk = group * EXT4_BLOCKS_PER_GROUP(sb); |
603 | for (i = 0; i < reserved_gdb; i++) { | 623 | for (i = 0; i < reserved_gdb; i++) { |
604 | int err2; | 624 | int err2; |
605 | data = (__le32 *)primary[i]->b_data; | 625 | data = (__le32 *)primary[i]->b_data; |
@@ -799,13 +819,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
799 | goto exit_put; | 819 | goto exit_put; |
800 | } | 820 | } |
801 | 821 | ||
802 | mutex_lock(&sbi->s_resize_lock); | ||
803 | if (input->group != sbi->s_groups_count) { | ||
804 | ext4_warning(sb, "multiple resizers run on filesystem!"); | ||
805 | err = -EBUSY; | ||
806 | goto exit_journal; | ||
807 | } | ||
808 | |||
809 | if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) | 822 | if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) |
810 | goto exit_journal; | 823 | goto exit_journal; |
811 | 824 | ||
@@ -820,16 +833,25 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
820 | if ((err = ext4_journal_get_write_access(handle, primary))) | 833 | if ((err = ext4_journal_get_write_access(handle, primary))) |
821 | goto exit_journal; | 834 | goto exit_journal; |
822 | 835 | ||
823 | if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && | 836 | if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { |
824 | (err = reserve_backup_gdb(handle, inode, input))) | 837 | err = reserve_backup_gdb(handle, inode, input->group); |
838 | if (err) | ||
839 | goto exit_journal; | ||
840 | } | ||
841 | } else { | ||
842 | /* | ||
843 | * Note that we can access new group descriptor block safely | ||
844 | * only if add_new_gdb() succeeds. | ||
845 | */ | ||
846 | err = add_new_gdb(handle, inode, input->group); | ||
847 | if (err) | ||
825 | goto exit_journal; | 848 | goto exit_journal; |
826 | } else if ((err = add_new_gdb(handle, inode, input, &primary))) | 849 | primary = sbi->s_group_desc[gdb_num]; |
827 | goto exit_journal; | 850 | } |
828 | 851 | ||
829 | /* | 852 | /* |
830 | * OK, now we've set up the new group. Time to make it active. | 853 | * OK, now we've set up the new group. Time to make it active. |
831 | * | 854 | * |
832 | * We do not lock all allocations via s_resize_lock | ||
833 | * so we have to be safe wrt. concurrent accesses the group | 855 | * so we have to be safe wrt. concurrent accesses the group |
834 | * data. So we need to be careful to set all of the relevant | 856 | * data. So we need to be careful to set all of the relevant |
835 | * group descriptor data etc. *before* we enable the group. | 857 | * group descriptor data etc. *before* we enable the group. |
@@ -886,13 +908,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
886 | * | 908 | * |
887 | * The precise rules we use are: | 909 | * The precise rules we use are: |
888 | * | 910 | * |
889 | * * Writers of s_groups_count *must* hold s_resize_lock | ||
890 | * AND | ||
891 | * * Writers must perform a smp_wmb() after updating all dependent | 911 | * * Writers must perform a smp_wmb() after updating all dependent |
892 | * data and before modifying the groups count | 912 | * data and before modifying the groups count |
893 | * | 913 | * |
894 | * * Readers must hold s_resize_lock over the access | ||
895 | * OR | ||
896 | * * Readers must perform an smp_rmb() after reading the groups count | 914 | * * Readers must perform an smp_rmb() after reading the groups count |
897 | * and before reading any dependent data. | 915 | * and before reading any dependent data. |
898 | * | 916 | * |
@@ -937,10 +955,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
937 | ext4_handle_dirty_super(handle, sb); | 955 | ext4_handle_dirty_super(handle, sb); |
938 | 956 | ||
939 | exit_journal: | 957 | exit_journal: |
940 | mutex_unlock(&sbi->s_resize_lock); | ||
941 | if ((err2 = ext4_journal_stop(handle)) && !err) | 958 | if ((err2 = ext4_journal_stop(handle)) && !err) |
942 | err = err2; | 959 | err = err2; |
943 | if (!err) { | 960 | if (!err && primary) { |
944 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | 961 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, |
945 | sizeof(struct ext4_super_block)); | 962 | sizeof(struct ext4_super_block)); |
946 | update_backups(sb, primary->b_blocknr, primary->b_data, | 963 | update_backups(sb, primary->b_blocknr, primary->b_data, |
@@ -969,16 +986,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
969 | ext4_grpblk_t add; | 986 | ext4_grpblk_t add; |
970 | struct buffer_head *bh; | 987 | struct buffer_head *bh; |
971 | handle_t *handle; | 988 | handle_t *handle; |
972 | int err; | 989 | int err, err2; |
973 | ext4_group_t group; | 990 | ext4_group_t group; |
974 | 991 | ||
975 | /* We don't need to worry about locking wrt other resizers just | ||
976 | * yet: we're going to revalidate es->s_blocks_count after | ||
977 | * taking the s_resize_lock below. */ | ||
978 | o_blocks_count = ext4_blocks_count(es); | 992 | o_blocks_count = ext4_blocks_count(es); |
979 | 993 | ||
980 | if (test_opt(sb, DEBUG)) | 994 | if (test_opt(sb, DEBUG)) |
981 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", | 995 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", |
982 | o_blocks_count, n_blocks_count); | 996 | o_blocks_count, n_blocks_count); |
983 | 997 | ||
984 | if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) | 998 | if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) |
@@ -995,7 +1009,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
995 | 1009 | ||
996 | if (n_blocks_count < o_blocks_count) { | 1010 | if (n_blocks_count < o_blocks_count) { |
997 | ext4_warning(sb, "can't shrink FS - resize aborted"); | 1011 | ext4_warning(sb, "can't shrink FS - resize aborted"); |
998 | return -EBUSY; | 1012 | return -EINVAL; |
999 | } | 1013 | } |
1000 | 1014 | ||
1001 | /* Handle the remaining blocks in the last group only. */ | 1015 | /* Handle the remaining blocks in the last group only. */ |
@@ -1038,32 +1052,25 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1038 | goto exit_put; | 1052 | goto exit_put; |
1039 | } | 1053 | } |
1040 | 1054 | ||
1041 | mutex_lock(&EXT4_SB(sb)->s_resize_lock); | ||
1042 | if (o_blocks_count != ext4_blocks_count(es)) { | ||
1043 | ext4_warning(sb, "multiple resizers run on filesystem!"); | ||
1044 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | ||
1045 | ext4_journal_stop(handle); | ||
1046 | err = -EBUSY; | ||
1047 | goto exit_put; | ||
1048 | } | ||
1049 | |||
1050 | if ((err = ext4_journal_get_write_access(handle, | 1055 | if ((err = ext4_journal_get_write_access(handle, |
1051 | EXT4_SB(sb)->s_sbh))) { | 1056 | EXT4_SB(sb)->s_sbh))) { |
1052 | ext4_warning(sb, "error %d on journal write access", err); | 1057 | ext4_warning(sb, "error %d on journal write access", err); |
1053 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | ||
1054 | ext4_journal_stop(handle); | 1058 | ext4_journal_stop(handle); |
1055 | goto exit_put; | 1059 | goto exit_put; |
1056 | } | 1060 | } |
1057 | ext4_blocks_count_set(es, o_blocks_count + add); | 1061 | ext4_blocks_count_set(es, o_blocks_count + add); |
1058 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | ||
1059 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1062 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1060 | o_blocks_count + add); | 1063 | o_blocks_count + add); |
1061 | /* We add the blocks to the bitmap and set the group need init bit */ | 1064 | /* We add the blocks to the bitmap and set the group need init bit */ |
1062 | ext4_add_groupblocks(handle, sb, o_blocks_count, add); | 1065 | err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); |
1063 | ext4_handle_dirty_super(handle, sb); | 1066 | ext4_handle_dirty_super(handle, sb); |
1064 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | 1067 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, |
1065 | o_blocks_count + add); | 1068 | o_blocks_count + add); |
1066 | if ((err = ext4_journal_stop(handle))) | 1069 | err2 = ext4_journal_stop(handle); |
1070 | if (!err && err2) | ||
1071 | err = err2; | ||
1072 | |||
1073 | if (err) | ||
1067 | goto exit_put; | 1074 | goto exit_put; |
1068 | 1075 | ||
1069 | if (test_opt(sb, DEBUG)) | 1076 | if (test_opt(sb, DEBUG)) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ea71aa864b3..44d0c8db2239 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -110,6 +110,35 @@ static struct file_system_type ext3_fs_type = { | |||
110 | #define IS_EXT3_SB(sb) (0) | 110 | #define IS_EXT3_SB(sb) (0) |
111 | #endif | 111 | #endif |
112 | 112 | ||
113 | void *ext4_kvmalloc(size_t size, gfp_t flags) | ||
114 | { | ||
115 | void *ret; | ||
116 | |||
117 | ret = kmalloc(size, flags); | ||
118 | if (!ret) | ||
119 | ret = __vmalloc(size, flags, PAGE_KERNEL); | ||
120 | return ret; | ||
121 | } | ||
122 | |||
123 | void *ext4_kvzalloc(size_t size, gfp_t flags) | ||
124 | { | ||
125 | void *ret; | ||
126 | |||
127 | ret = kzalloc(size, flags); | ||
128 | if (!ret) | ||
129 | ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); | ||
130 | return ret; | ||
131 | } | ||
132 | |||
133 | void ext4_kvfree(void *ptr) | ||
134 | { | ||
135 | if (is_vmalloc_addr(ptr)) | ||
136 | vfree(ptr); | ||
137 | else | ||
138 | kfree(ptr); | ||
139 | |||
140 | } | ||
141 | |||
113 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | 142 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, |
114 | struct ext4_group_desc *bg) | 143 | struct ext4_group_desc *bg) |
115 | { | 144 | { |
@@ -269,6 +298,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
269 | journal_t *journal; | 298 | journal_t *journal; |
270 | handle_t *handle; | 299 | handle_t *handle; |
271 | 300 | ||
301 | trace_ext4_journal_start(sb, nblocks, _RET_IP_); | ||
272 | if (sb->s_flags & MS_RDONLY) | 302 | if (sb->s_flags & MS_RDONLY) |
273 | return ERR_PTR(-EROFS); | 303 | return ERR_PTR(-EROFS); |
274 | 304 | ||
@@ -789,11 +819,8 @@ static void ext4_put_super(struct super_block *sb) | |||
789 | 819 | ||
790 | for (i = 0; i < sbi->s_gdb_count; i++) | 820 | for (i = 0; i < sbi->s_gdb_count; i++) |
791 | brelse(sbi->s_group_desc[i]); | 821 | brelse(sbi->s_group_desc[i]); |
792 | kfree(sbi->s_group_desc); | 822 | ext4_kvfree(sbi->s_group_desc); |
793 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 823 | ext4_kvfree(sbi->s_flex_groups); |
794 | vfree(sbi->s_flex_groups); | ||
795 | else | ||
796 | kfree(sbi->s_flex_groups); | ||
797 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 824 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
798 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 825 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
799 | percpu_counter_destroy(&sbi->s_dirs_counter); | 826 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -892,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head) | |||
892 | 919 | ||
893 | static void ext4_destroy_inode(struct inode *inode) | 920 | static void ext4_destroy_inode(struct inode *inode) |
894 | { | 921 | { |
895 | ext4_ioend_wait(inode); | ||
896 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 922 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
897 | ext4_msg(inode->i_sb, KERN_ERR, | 923 | ext4_msg(inode->i_sb, KERN_ERR, |
898 | "Inode %lu (%p): orphan list check failed!", | 924 | "Inode %lu (%p): orphan list check failed!", |
@@ -1976,15 +2002,11 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1976 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << | 2002 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << |
1977 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; | 2003 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; |
1978 | size = flex_group_count * sizeof(struct flex_groups); | 2004 | size = flex_group_count * sizeof(struct flex_groups); |
1979 | sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); | 2005 | sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); |
1980 | if (sbi->s_flex_groups == NULL) { | 2006 | if (sbi->s_flex_groups == NULL) { |
1981 | sbi->s_flex_groups = vzalloc(size); | 2007 | ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", |
1982 | if (sbi->s_flex_groups == NULL) { | 2008 | flex_group_count); |
1983 | ext4_msg(sb, KERN_ERR, | 2009 | goto failed; |
1984 | "not enough memory for %u flex groups", | ||
1985 | flex_group_count); | ||
1986 | goto failed; | ||
1987 | } | ||
1988 | } | 2010 | } |
1989 | 2011 | ||
1990 | for (i = 0; i < sbi->s_groups_count; i++) { | 2012 | for (i = 0; i < sbi->s_groups_count; i++) { |
@@ -2383,17 +2405,25 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | |||
2383 | unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); | 2405 | unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); |
2384 | unsigned long stripe_width = | 2406 | unsigned long stripe_width = |
2385 | le32_to_cpu(sbi->s_es->s_raid_stripe_width); | 2407 | le32_to_cpu(sbi->s_es->s_raid_stripe_width); |
2408 | int ret; | ||
2386 | 2409 | ||
2387 | if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) | 2410 | if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) |
2388 | return sbi->s_stripe; | 2411 | ret = sbi->s_stripe; |
2389 | 2412 | else if (stripe_width <= sbi->s_blocks_per_group) | |
2390 | if (stripe_width <= sbi->s_blocks_per_group) | 2413 | ret = stripe_width; |
2391 | return stripe_width; | 2414 | else if (stride <= sbi->s_blocks_per_group) |
2415 | ret = stride; | ||
2416 | else | ||
2417 | ret = 0; | ||
2392 | 2418 | ||
2393 | if (stride <= sbi->s_blocks_per_group) | 2419 | /* |
2394 | return stride; | 2420 | * If the stripe width is 1, this makes no sense and |
2421 | * we set it to 0 to turn off stripe handling code. | ||
2422 | */ | ||
2423 | if (ret <= 1) | ||
2424 | ret = 0; | ||
2395 | 2425 | ||
2396 | return 0; | 2426 | return ret; |
2397 | } | 2427 | } |
2398 | 2428 | ||
2399 | /* sysfs supprt */ | 2429 | /* sysfs supprt */ |
@@ -3408,8 +3438,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3408 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); | 3438 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); |
3409 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | 3439 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / |
3410 | EXT4_DESC_PER_BLOCK(sb); | 3440 | EXT4_DESC_PER_BLOCK(sb); |
3411 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), | 3441 | sbi->s_group_desc = ext4_kvmalloc(db_count * |
3412 | GFP_KERNEL); | 3442 | sizeof(struct buffer_head *), |
3443 | GFP_KERNEL); | ||
3413 | if (sbi->s_group_desc == NULL) { | 3444 | if (sbi->s_group_desc == NULL) { |
3414 | ext4_msg(sb, KERN_ERR, "not enough memory"); | 3445 | ext4_msg(sb, KERN_ERR, "not enough memory"); |
3415 | goto failed_mount; | 3446 | goto failed_mount; |
@@ -3491,7 +3522,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3491 | 3522 | ||
3492 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 3523 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
3493 | mutex_init(&sbi->s_orphan_lock); | 3524 | mutex_init(&sbi->s_orphan_lock); |
3494 | mutex_init(&sbi->s_resize_lock); | 3525 | sbi->s_resize_flags = 0; |
3495 | 3526 | ||
3496 | sb->s_root = NULL; | 3527 | sb->s_root = NULL; |
3497 | 3528 | ||
@@ -3741,12 +3772,8 @@ failed_mount_wq: | |||
3741 | } | 3772 | } |
3742 | failed_mount3: | 3773 | failed_mount3: |
3743 | del_timer(&sbi->s_err_report); | 3774 | del_timer(&sbi->s_err_report); |
3744 | if (sbi->s_flex_groups) { | 3775 | if (sbi->s_flex_groups) |
3745 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 3776 | ext4_kvfree(sbi->s_flex_groups); |
3746 | vfree(sbi->s_flex_groups); | ||
3747 | else | ||
3748 | kfree(sbi->s_flex_groups); | ||
3749 | } | ||
3750 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 3777 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
3751 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 3778 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
3752 | percpu_counter_destroy(&sbi->s_dirs_counter); | 3779 | percpu_counter_destroy(&sbi->s_dirs_counter); |
@@ -3756,7 +3783,7 @@ failed_mount3: | |||
3756 | failed_mount2: | 3783 | failed_mount2: |
3757 | for (i = 0; i < db_count; i++) | 3784 | for (i = 0; i < db_count; i++) |
3758 | brelse(sbi->s_group_desc[i]); | 3785 | brelse(sbi->s_group_desc[i]); |
3759 | kfree(sbi->s_group_desc); | 3786 | ext4_kvfree(sbi->s_group_desc); |
3760 | failed_mount: | 3787 | failed_mount: |
3761 | if (sbi->s_proc) { | 3788 | if (sbi->s_proc) { |
3762 | remove_proc_entry(sb->s_id, ext4_proc_root); | 3789 | remove_proc_entry(sb->s_id, ext4_proc_root); |
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h new file mode 100644 index 000000000000..011ba6670d99 --- /dev/null +++ b/fs/ext4/truncate.h | |||
@@ -0,0 +1,43 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/truncate.h | ||
3 | * | ||
4 | * Common inline functions needed for truncate support | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * Truncate blocks that were not used by write. We have to truncate the | ||
9 | * pagecache as well so that corresponding buffers get properly unmapped. | ||
10 | */ | ||
11 | static inline void ext4_truncate_failed_write(struct inode *inode) | ||
12 | { | ||
13 | truncate_inode_pages(inode->i_mapping, inode->i_size); | ||
14 | ext4_truncate(inode); | ||
15 | } | ||
16 | |||
17 | /* | ||
18 | * Work out how many blocks we need to proceed with the next chunk of a | ||
19 | * truncate transaction. | ||
20 | */ | ||
21 | static inline unsigned long ext4_blocks_for_truncate(struct inode *inode) | ||
22 | { | ||
23 | ext4_lblk_t needed; | ||
24 | |||
25 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); | ||
26 | |||
27 | /* Give ourselves just enough room to cope with inodes in which | ||
28 | * i_blocks is corrupt: we've seen disk corruptions in the past | ||
29 | * which resulted in random data in an inode which looked enough | ||
30 | * like a regular file for ext4 to try to delete it. Things | ||
31 | * will go a bit crazy if that happens, but at least we should | ||
32 | * try not to panic the whole kernel. */ | ||
33 | if (needed < 2) | ||
34 | needed = 2; | ||
35 | |||
36 | /* But we need to bound the transaction so we don't overflow the | ||
37 | * journal. */ | ||
38 | if (needed > EXT4_MAX_TRANS_DATA) | ||
39 | needed = EXT4_MAX_TRANS_DATA; | ||
40 | |||
41 | return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; | ||
42 | } | ||
43 | |||
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4ad64732cbce..5efbd5d7701a 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, | |||
1231 | struct super_block *sb = dir->i_sb; | 1231 | struct super_block *sb = dir->i_sb; |
1232 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 1232 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
1233 | struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ | 1233 | struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ |
1234 | struct msdos_dir_entry *de; | 1234 | struct msdos_dir_entry *uninitialized_var(de); |
1235 | int err, free_slots, i, nr_bhs; | 1235 | int err, free_slots, i, nr_bhs; |
1236 | loff_t pos, i_pos; | 1236 | loff_t pos, i_pos; |
1237 | 1237 | ||
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5942fec22c65..1726d7303047 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -1188,9 +1188,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, | |||
1188 | out: | 1188 | out: |
1189 | /* UTF-8 doesn't provide FAT semantics */ | 1189 | /* UTF-8 doesn't provide FAT semantics */ |
1190 | if (!strcmp(opts->iocharset, "utf8")) { | 1190 | if (!strcmp(opts->iocharset, "utf8")) { |
1191 | fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" | 1191 | fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" |
1192 | " for FAT filesystems, filesystem will be " | 1192 | " for FAT filesystems, filesystem will be " |
1193 | "case sensitive!\n"); | 1193 | "case sensitive!"); |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | /* If user doesn't specify allow_utime, it's initialized from dmask. */ | 1196 | /* If user doesn't specify allow_utime, it's initialized from dmask. */ |
@@ -1367,6 +1367,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1367 | sbi->free_clusters = -1; /* Don't know yet */ | 1367 | sbi->free_clusters = -1; /* Don't know yet */ |
1368 | sbi->free_clus_valid = 0; | 1368 | sbi->free_clus_valid = 0; |
1369 | sbi->prev_free = FAT_START_ENT; | 1369 | sbi->prev_free = FAT_START_ENT; |
1370 | sb->s_maxbytes = 0xffffffff; | ||
1370 | 1371 | ||
1371 | if (!sbi->fat_length && b->fat32_length) { | 1372 | if (!sbi->fat_length && b->fat32_length) { |
1372 | struct fat_boot_fsinfo *fsinfo; | 1373 | struct fat_boot_fsinfo *fsinfo; |
@@ -1377,8 +1378,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1377 | sbi->fat_length = le32_to_cpu(b->fat32_length); | 1378 | sbi->fat_length = le32_to_cpu(b->fat32_length); |
1378 | sbi->root_cluster = le32_to_cpu(b->root_cluster); | 1379 | sbi->root_cluster = le32_to_cpu(b->root_cluster); |
1379 | 1380 | ||
1380 | sb->s_maxbytes = 0xffffffff; | ||
1381 | |||
1382 | /* MC - if info_sector is 0, don't multiply by 0 */ | 1381 | /* MC - if info_sector is 0, don't multiply by 0 */ |
1383 | sbi->fsinfo_sector = le16_to_cpu(b->info_sector); | 1382 | sbi->fsinfo_sector = le16_to_cpu(b->info_sector); |
1384 | if (sbi->fsinfo_sector == 0) | 1383 | if (sbi->fsinfo_sector == 0) |
diff --git a/fs/file_table.c b/fs/file_table.c index 01e4c1e8e6b6..c322794f7360 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/percpu.h> | 25 | #include <linux/percpu.h> |
26 | #include <linux/ima.h> | 26 | #include <linux/ima.h> |
27 | 27 | ||
28 | #include <asm/atomic.h> | 28 | #include <linux/atomic.h> |
29 | 29 | ||
30 | #include "internal.h" | 30 | #include "internal.h" |
31 | 31 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b8c507ca42f7..04cf3b91e501 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -35,7 +35,9 @@ | |||
35 | struct wb_writeback_work { | 35 | struct wb_writeback_work { |
36 | long nr_pages; | 36 | long nr_pages; |
37 | struct super_block *sb; | 37 | struct super_block *sb; |
38 | unsigned long *older_than_this; | ||
38 | enum writeback_sync_modes sync_mode; | 39 | enum writeback_sync_modes sync_mode; |
40 | unsigned int tagged_writepages:1; | ||
39 | unsigned int for_kupdate:1; | 41 | unsigned int for_kupdate:1; |
40 | unsigned int range_cyclic:1; | 42 | unsigned int range_cyclic:1; |
41 | unsigned int for_background:1; | 43 | unsigned int for_background:1; |
@@ -180,12 +182,13 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
180 | */ | 182 | */ |
181 | void inode_wb_list_del(struct inode *inode) | 183 | void inode_wb_list_del(struct inode *inode) |
182 | { | 184 | { |
183 | spin_lock(&inode_wb_list_lock); | 185 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
186 | |||
187 | spin_lock(&bdi->wb.list_lock); | ||
184 | list_del_init(&inode->i_wb_list); | 188 | list_del_init(&inode->i_wb_list); |
185 | spin_unlock(&inode_wb_list_lock); | 189 | spin_unlock(&bdi->wb.list_lock); |
186 | } | 190 | } |
187 | 191 | ||
188 | |||
189 | /* | 192 | /* |
190 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the | 193 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the |
191 | * furthest end of its superblock's dirty-inode list. | 194 | * furthest end of its superblock's dirty-inode list. |
@@ -195,11 +198,9 @@ void inode_wb_list_del(struct inode *inode) | |||
195 | * the case then the inode must have been redirtied while it was being written | 198 | * the case then the inode must have been redirtied while it was being written |
196 | * out and we don't reset its dirtied_when. | 199 | * out and we don't reset its dirtied_when. |
197 | */ | 200 | */ |
198 | static void redirty_tail(struct inode *inode) | 201 | static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) |
199 | { | 202 | { |
200 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 203 | assert_spin_locked(&wb->list_lock); |
201 | |||
202 | assert_spin_locked(&inode_wb_list_lock); | ||
203 | if (!list_empty(&wb->b_dirty)) { | 204 | if (!list_empty(&wb->b_dirty)) { |
204 | struct inode *tail; | 205 | struct inode *tail; |
205 | 206 | ||
@@ -213,11 +214,9 @@ static void redirty_tail(struct inode *inode) | |||
213 | /* | 214 | /* |
214 | * requeue inode for re-scanning after bdi->b_io list is exhausted. | 215 | * requeue inode for re-scanning after bdi->b_io list is exhausted. |
215 | */ | 216 | */ |
216 | static void requeue_io(struct inode *inode) | 217 | static void requeue_io(struct inode *inode, struct bdi_writeback *wb) |
217 | { | 218 | { |
218 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 219 | assert_spin_locked(&wb->list_lock); |
219 | |||
220 | assert_spin_locked(&inode_wb_list_lock); | ||
221 | list_move(&inode->i_wb_list, &wb->b_more_io); | 220 | list_move(&inode->i_wb_list, &wb->b_more_io); |
222 | } | 221 | } |
223 | 222 | ||
@@ -225,7 +224,7 @@ static void inode_sync_complete(struct inode *inode) | |||
225 | { | 224 | { |
226 | /* | 225 | /* |
227 | * Prevent speculative execution through | 226 | * Prevent speculative execution through |
228 | * spin_unlock(&inode_wb_list_lock); | 227 | * spin_unlock(&wb->list_lock); |
229 | */ | 228 | */ |
230 | 229 | ||
231 | smp_mb(); | 230 | smp_mb(); |
@@ -250,15 +249,16 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) | |||
250 | /* | 249 | /* |
251 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. | 250 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. |
252 | */ | 251 | */ |
253 | static void move_expired_inodes(struct list_head *delaying_queue, | 252 | static int move_expired_inodes(struct list_head *delaying_queue, |
254 | struct list_head *dispatch_queue, | 253 | struct list_head *dispatch_queue, |
255 | unsigned long *older_than_this) | 254 | unsigned long *older_than_this) |
256 | { | 255 | { |
257 | LIST_HEAD(tmp); | 256 | LIST_HEAD(tmp); |
258 | struct list_head *pos, *node; | 257 | struct list_head *pos, *node; |
259 | struct super_block *sb = NULL; | 258 | struct super_block *sb = NULL; |
260 | struct inode *inode; | 259 | struct inode *inode; |
261 | int do_sb_sort = 0; | 260 | int do_sb_sort = 0; |
261 | int moved = 0; | ||
262 | 262 | ||
263 | while (!list_empty(delaying_queue)) { | 263 | while (!list_empty(delaying_queue)) { |
264 | inode = wb_inode(delaying_queue->prev); | 264 | inode = wb_inode(delaying_queue->prev); |
@@ -269,12 +269,13 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
269 | do_sb_sort = 1; | 269 | do_sb_sort = 1; |
270 | sb = inode->i_sb; | 270 | sb = inode->i_sb; |
271 | list_move(&inode->i_wb_list, &tmp); | 271 | list_move(&inode->i_wb_list, &tmp); |
272 | moved++; | ||
272 | } | 273 | } |
273 | 274 | ||
274 | /* just one sb in list, splice to dispatch_queue and we're done */ | 275 | /* just one sb in list, splice to dispatch_queue and we're done */ |
275 | if (!do_sb_sort) { | 276 | if (!do_sb_sort) { |
276 | list_splice(&tmp, dispatch_queue); | 277 | list_splice(&tmp, dispatch_queue); |
277 | return; | 278 | goto out; |
278 | } | 279 | } |
279 | 280 | ||
280 | /* Move inodes from one superblock together */ | 281 | /* Move inodes from one superblock together */ |
@@ -286,6 +287,8 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
286 | list_move(&inode->i_wb_list, dispatch_queue); | 287 | list_move(&inode->i_wb_list, dispatch_queue); |
287 | } | 288 | } |
288 | } | 289 | } |
290 | out: | ||
291 | return moved; | ||
289 | } | 292 | } |
290 | 293 | ||
291 | /* | 294 | /* |
@@ -301,9 +304,11 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
301 | */ | 304 | */ |
302 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 305 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
303 | { | 306 | { |
304 | assert_spin_locked(&inode_wb_list_lock); | 307 | int moved; |
308 | assert_spin_locked(&wb->list_lock); | ||
305 | list_splice_init(&wb->b_more_io, &wb->b_io); | 309 | list_splice_init(&wb->b_more_io, &wb->b_io); |
306 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 310 | moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
311 | trace_writeback_queue_io(wb, older_than_this, moved); | ||
307 | } | 312 | } |
308 | 313 | ||
309 | static int write_inode(struct inode *inode, struct writeback_control *wbc) | 314 | static int write_inode(struct inode *inode, struct writeback_control *wbc) |
@@ -316,7 +321,8 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) | |||
316 | /* | 321 | /* |
317 | * Wait for writeback on an inode to complete. | 322 | * Wait for writeback on an inode to complete. |
318 | */ | 323 | */ |
319 | static void inode_wait_for_writeback(struct inode *inode) | 324 | static void inode_wait_for_writeback(struct inode *inode, |
325 | struct bdi_writeback *wb) | ||
320 | { | 326 | { |
321 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); | 327 | DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); |
322 | wait_queue_head_t *wqh; | 328 | wait_queue_head_t *wqh; |
@@ -324,15 +330,15 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
324 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 330 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
325 | while (inode->i_state & I_SYNC) { | 331 | while (inode->i_state & I_SYNC) { |
326 | spin_unlock(&inode->i_lock); | 332 | spin_unlock(&inode->i_lock); |
327 | spin_unlock(&inode_wb_list_lock); | 333 | spin_unlock(&wb->list_lock); |
328 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 334 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
329 | spin_lock(&inode_wb_list_lock); | 335 | spin_lock(&wb->list_lock); |
330 | spin_lock(&inode->i_lock); | 336 | spin_lock(&inode->i_lock); |
331 | } | 337 | } |
332 | } | 338 | } |
333 | 339 | ||
334 | /* | 340 | /* |
335 | * Write out an inode's dirty pages. Called under inode_wb_list_lock and | 341 | * Write out an inode's dirty pages. Called under wb->list_lock and |
336 | * inode->i_lock. Either the caller has an active reference on the inode or | 342 | * inode->i_lock. Either the caller has an active reference on the inode or |
337 | * the inode has I_WILL_FREE set. | 343 | * the inode has I_WILL_FREE set. |
338 | * | 344 | * |
@@ -343,13 +349,15 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
343 | * livelocks, etc. | 349 | * livelocks, etc. |
344 | */ | 350 | */ |
345 | static int | 351 | static int |
346 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | 352 | writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, |
353 | struct writeback_control *wbc) | ||
347 | { | 354 | { |
348 | struct address_space *mapping = inode->i_mapping; | 355 | struct address_space *mapping = inode->i_mapping; |
356 | long nr_to_write = wbc->nr_to_write; | ||
349 | unsigned dirty; | 357 | unsigned dirty; |
350 | int ret; | 358 | int ret; |
351 | 359 | ||
352 | assert_spin_locked(&inode_wb_list_lock); | 360 | assert_spin_locked(&wb->list_lock); |
353 | assert_spin_locked(&inode->i_lock); | 361 | assert_spin_locked(&inode->i_lock); |
354 | 362 | ||
355 | if (!atomic_read(&inode->i_count)) | 363 | if (!atomic_read(&inode->i_count)) |
@@ -367,14 +375,16 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
367 | * completed a full scan of b_io. | 375 | * completed a full scan of b_io. |
368 | */ | 376 | */ |
369 | if (wbc->sync_mode != WB_SYNC_ALL) { | 377 | if (wbc->sync_mode != WB_SYNC_ALL) { |
370 | requeue_io(inode); | 378 | requeue_io(inode, wb); |
379 | trace_writeback_single_inode_requeue(inode, wbc, | ||
380 | nr_to_write); | ||
371 | return 0; | 381 | return 0; |
372 | } | 382 | } |
373 | 383 | ||
374 | /* | 384 | /* |
375 | * It's a data-integrity sync. We must wait. | 385 | * It's a data-integrity sync. We must wait. |
376 | */ | 386 | */ |
377 | inode_wait_for_writeback(inode); | 387 | inode_wait_for_writeback(inode, wb); |
378 | } | 388 | } |
379 | 389 | ||
380 | BUG_ON(inode->i_state & I_SYNC); | 390 | BUG_ON(inode->i_state & I_SYNC); |
@@ -383,7 +393,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
383 | inode->i_state |= I_SYNC; | 393 | inode->i_state |= I_SYNC; |
384 | inode->i_state &= ~I_DIRTY_PAGES; | 394 | inode->i_state &= ~I_DIRTY_PAGES; |
385 | spin_unlock(&inode->i_lock); | 395 | spin_unlock(&inode->i_lock); |
386 | spin_unlock(&inode_wb_list_lock); | 396 | spin_unlock(&wb->list_lock); |
387 | 397 | ||
388 | ret = do_writepages(mapping, wbc); | 398 | ret = do_writepages(mapping, wbc); |
389 | 399 | ||
@@ -414,10 +424,19 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
414 | ret = err; | 424 | ret = err; |
415 | } | 425 | } |
416 | 426 | ||
417 | spin_lock(&inode_wb_list_lock); | 427 | spin_lock(&wb->list_lock); |
418 | spin_lock(&inode->i_lock); | 428 | spin_lock(&inode->i_lock); |
419 | inode->i_state &= ~I_SYNC; | 429 | inode->i_state &= ~I_SYNC; |
420 | if (!(inode->i_state & I_FREEING)) { | 430 | if (!(inode->i_state & I_FREEING)) { |
431 | /* | ||
432 | * Sync livelock prevention. Each inode is tagged and synced in | ||
433 | * one shot. If still dirty, it will be redirty_tail()'ed below. | ||
434 | * Update the dirty time to prevent enqueue and sync it again. | ||
435 | */ | ||
436 | if ((inode->i_state & I_DIRTY) && | ||
437 | (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) | ||
438 | inode->dirtied_when = jiffies; | ||
439 | |||
421 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 440 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
422 | /* | 441 | /* |
423 | * We didn't write back all the pages. nfs_writepages() | 442 | * We didn't write back all the pages. nfs_writepages() |
@@ -428,7 +447,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
428 | /* | 447 | /* |
429 | * slice used up: queue for next turn | 448 | * slice used up: queue for next turn |
430 | */ | 449 | */ |
431 | requeue_io(inode); | 450 | requeue_io(inode, wb); |
432 | } else { | 451 | } else { |
433 | /* | 452 | /* |
434 | * Writeback blocked by something other than | 453 | * Writeback blocked by something other than |
@@ -437,7 +456,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
437 | * retrying writeback of the dirty page/inode | 456 | * retrying writeback of the dirty page/inode |
438 | * that cannot be performed immediately. | 457 | * that cannot be performed immediately. |
439 | */ | 458 | */ |
440 | redirty_tail(inode); | 459 | redirty_tail(inode, wb); |
441 | } | 460 | } |
442 | } else if (inode->i_state & I_DIRTY) { | 461 | } else if (inode->i_state & I_DIRTY) { |
443 | /* | 462 | /* |
@@ -446,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
446 | * submission or metadata updates after data IO | 465 | * submission or metadata updates after data IO |
447 | * completion. | 466 | * completion. |
448 | */ | 467 | */ |
449 | redirty_tail(inode); | 468 | redirty_tail(inode, wb); |
450 | } else { | 469 | } else { |
451 | /* | 470 | /* |
452 | * The inode is clean. At this point we either have | 471 | * The inode is clean. At this point we either have |
@@ -457,9 +476,41 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
457 | } | 476 | } |
458 | } | 477 | } |
459 | inode_sync_complete(inode); | 478 | inode_sync_complete(inode); |
479 | trace_writeback_single_inode(inode, wbc, nr_to_write); | ||
460 | return ret; | 480 | return ret; |
461 | } | 481 | } |
462 | 482 | ||
483 | static long writeback_chunk_size(struct backing_dev_info *bdi, | ||
484 | struct wb_writeback_work *work) | ||
485 | { | ||
486 | long pages; | ||
487 | |||
488 | /* | ||
489 | * WB_SYNC_ALL mode does livelock avoidance by syncing dirty | ||
490 | * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX | ||
491 | * here avoids calling into writeback_inodes_wb() more than once. | ||
492 | * | ||
493 | * The intended call sequence for WB_SYNC_ALL writeback is: | ||
494 | * | ||
495 | * wb_writeback() | ||
496 | * writeback_sb_inodes() <== called only once | ||
497 | * write_cache_pages() <== called once for each inode | ||
498 | * (quickly) tag currently dirty pages | ||
499 | * (maybe slowly) sync all tagged pages | ||
500 | */ | ||
501 | if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) | ||
502 | pages = LONG_MAX; | ||
503 | else { | ||
504 | pages = min(bdi->avg_write_bandwidth / 2, | ||
505 | global_dirty_limit / DIRTY_SCOPE); | ||
506 | pages = min(pages, work->nr_pages); | ||
507 | pages = round_down(pages + MIN_WRITEBACK_PAGES, | ||
508 | MIN_WRITEBACK_PAGES); | ||
509 | } | ||
510 | |||
511 | return pages; | ||
512 | } | ||
513 | |||
463 | /* | 514 | /* |
464 | * Write a portion of b_io inodes which belong to @sb. | 515 | * Write a portion of b_io inodes which belong to @sb. |
465 | * | 516 | * |
@@ -467,24 +518,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
467 | * inodes. Otherwise write only ones which go sequentially | 518 | * inodes. Otherwise write only ones which go sequentially |
468 | * in reverse order. | 519 | * in reverse order. |
469 | * | 520 | * |
470 | * Return 1, if the caller writeback routine should be | 521 | * Return the number of pages and/or inodes written. |
471 | * interrupted. Otherwise return 0. | ||
472 | */ | 522 | */ |
473 | static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | 523 | static long writeback_sb_inodes(struct super_block *sb, |
474 | struct writeback_control *wbc, bool only_this_sb) | 524 | struct bdi_writeback *wb, |
525 | struct wb_writeback_work *work) | ||
475 | { | 526 | { |
527 | struct writeback_control wbc = { | ||
528 | .sync_mode = work->sync_mode, | ||
529 | .tagged_writepages = work->tagged_writepages, | ||
530 | .for_kupdate = work->for_kupdate, | ||
531 | .for_background = work->for_background, | ||
532 | .range_cyclic = work->range_cyclic, | ||
533 | .range_start = 0, | ||
534 | .range_end = LLONG_MAX, | ||
535 | }; | ||
536 | unsigned long start_time = jiffies; | ||
537 | long write_chunk; | ||
538 | long wrote = 0; /* count both pages and inodes */ | ||
539 | |||
476 | while (!list_empty(&wb->b_io)) { | 540 | while (!list_empty(&wb->b_io)) { |
477 | long pages_skipped; | ||
478 | struct inode *inode = wb_inode(wb->b_io.prev); | 541 | struct inode *inode = wb_inode(wb->b_io.prev); |
479 | 542 | ||
480 | if (inode->i_sb != sb) { | 543 | if (inode->i_sb != sb) { |
481 | if (only_this_sb) { | 544 | if (work->sb) { |
482 | /* | 545 | /* |
483 | * We only want to write back data for this | 546 | * We only want to write back data for this |
484 | * superblock, move all inodes not belonging | 547 | * superblock, move all inodes not belonging |
485 | * to it back onto the dirty list. | 548 | * to it back onto the dirty list. |
486 | */ | 549 | */ |
487 | redirty_tail(inode); | 550 | redirty_tail(inode, wb); |
488 | continue; | 551 | continue; |
489 | } | 552 | } |
490 | 553 | ||
@@ -493,7 +556,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
493 | * Bounce back to the caller to unpin this and | 556 | * Bounce back to the caller to unpin this and |
494 | * pin the next superblock. | 557 | * pin the next superblock. |
495 | */ | 558 | */ |
496 | return 0; | 559 | break; |
497 | } | 560 | } |
498 | 561 | ||
499 | /* | 562 | /* |
@@ -504,95 +567,96 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
504 | spin_lock(&inode->i_lock); | 567 | spin_lock(&inode->i_lock); |
505 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | 568 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { |
506 | spin_unlock(&inode->i_lock); | 569 | spin_unlock(&inode->i_lock); |
507 | requeue_io(inode); | 570 | redirty_tail(inode, wb); |
508 | continue; | 571 | continue; |
509 | } | 572 | } |
510 | |||
511 | /* | ||
512 | * Was this inode dirtied after sync_sb_inodes was called? | ||
513 | * This keeps sync from extra jobs and livelock. | ||
514 | */ | ||
515 | if (inode_dirtied_after(inode, wbc->wb_start)) { | ||
516 | spin_unlock(&inode->i_lock); | ||
517 | return 1; | ||
518 | } | ||
519 | |||
520 | __iget(inode); | 573 | __iget(inode); |
574 | write_chunk = writeback_chunk_size(wb->bdi, work); | ||
575 | wbc.nr_to_write = write_chunk; | ||
576 | wbc.pages_skipped = 0; | ||
577 | |||
578 | writeback_single_inode(inode, wb, &wbc); | ||
521 | 579 | ||
522 | pages_skipped = wbc->pages_skipped; | 580 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
523 | writeback_single_inode(inode, wbc); | 581 | wrote += write_chunk - wbc.nr_to_write; |
524 | if (wbc->pages_skipped != pages_skipped) { | 582 | if (!(inode->i_state & I_DIRTY)) |
583 | wrote++; | ||
584 | if (wbc.pages_skipped) { | ||
525 | /* | 585 | /* |
526 | * writeback is not making progress due to locked | 586 | * writeback is not making progress due to locked |
527 | * buffers. Skip this inode for now. | 587 | * buffers. Skip this inode for now. |
528 | */ | 588 | */ |
529 | redirty_tail(inode); | 589 | redirty_tail(inode, wb); |
530 | } | 590 | } |
531 | spin_unlock(&inode->i_lock); | 591 | spin_unlock(&inode->i_lock); |
532 | spin_unlock(&inode_wb_list_lock); | 592 | spin_unlock(&wb->list_lock); |
533 | iput(inode); | 593 | iput(inode); |
534 | cond_resched(); | 594 | cond_resched(); |
535 | spin_lock(&inode_wb_list_lock); | 595 | spin_lock(&wb->list_lock); |
536 | if (wbc->nr_to_write <= 0) { | 596 | /* |
537 | wbc->more_io = 1; | 597 | * bail out to wb_writeback() often enough to check |
538 | return 1; | 598 | * background threshold and other termination conditions. |
599 | */ | ||
600 | if (wrote) { | ||
601 | if (time_is_before_jiffies(start_time + HZ / 10UL)) | ||
602 | break; | ||
603 | if (work->nr_pages <= 0) | ||
604 | break; | ||
539 | } | 605 | } |
540 | if (!list_empty(&wb->b_more_io)) | ||
541 | wbc->more_io = 1; | ||
542 | } | 606 | } |
543 | /* b_io is empty */ | 607 | return wrote; |
544 | return 1; | ||
545 | } | 608 | } |
546 | 609 | ||
547 | void writeback_inodes_wb(struct bdi_writeback *wb, | 610 | static long __writeback_inodes_wb(struct bdi_writeback *wb, |
548 | struct writeback_control *wbc) | 611 | struct wb_writeback_work *work) |
549 | { | 612 | { |
550 | int ret = 0; | 613 | unsigned long start_time = jiffies; |
551 | 614 | long wrote = 0; | |
552 | if (!wbc->wb_start) | ||
553 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
554 | spin_lock(&inode_wb_list_lock); | ||
555 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
556 | queue_io(wb, wbc->older_than_this); | ||
557 | 615 | ||
558 | while (!list_empty(&wb->b_io)) { | 616 | while (!list_empty(&wb->b_io)) { |
559 | struct inode *inode = wb_inode(wb->b_io.prev); | 617 | struct inode *inode = wb_inode(wb->b_io.prev); |
560 | struct super_block *sb = inode->i_sb; | 618 | struct super_block *sb = inode->i_sb; |
561 | 619 | ||
562 | if (!grab_super_passive(sb)) { | 620 | if (!grab_super_passive(sb)) { |
563 | requeue_io(inode); | 621 | /* |
622 | * grab_super_passive() may fail consistently due to | ||
623 | * s_umount being grabbed by someone else. Don't use | ||
624 | * requeue_io() to avoid busy retrying the inode/sb. | ||
625 | */ | ||
626 | redirty_tail(inode, wb); | ||
564 | continue; | 627 | continue; |
565 | } | 628 | } |
566 | ret = writeback_sb_inodes(sb, wb, wbc, false); | 629 | wrote += writeback_sb_inodes(sb, wb, work); |
567 | drop_super(sb); | 630 | drop_super(sb); |
568 | 631 | ||
569 | if (ret) | 632 | /* refer to the same tests at the end of writeback_sb_inodes */ |
570 | break; | 633 | if (wrote) { |
634 | if (time_is_before_jiffies(start_time + HZ / 10UL)) | ||
635 | break; | ||
636 | if (work->nr_pages <= 0) | ||
637 | break; | ||
638 | } | ||
571 | } | 639 | } |
572 | spin_unlock(&inode_wb_list_lock); | ||
573 | /* Leave any unwritten inodes on b_io */ | 640 | /* Leave any unwritten inodes on b_io */ |
641 | return wrote; | ||
574 | } | 642 | } |
575 | 643 | ||
576 | static void __writeback_inodes_sb(struct super_block *sb, | 644 | long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages) |
577 | struct bdi_writeback *wb, struct writeback_control *wbc) | ||
578 | { | 645 | { |
579 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 646 | struct wb_writeback_work work = { |
647 | .nr_pages = nr_pages, | ||
648 | .sync_mode = WB_SYNC_NONE, | ||
649 | .range_cyclic = 1, | ||
650 | }; | ||
580 | 651 | ||
581 | spin_lock(&inode_wb_list_lock); | 652 | spin_lock(&wb->list_lock); |
582 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 653 | if (list_empty(&wb->b_io)) |
583 | queue_io(wb, wbc->older_than_this); | 654 | queue_io(wb, NULL); |
584 | writeback_sb_inodes(sb, wb, wbc, true); | 655 | __writeback_inodes_wb(wb, &work); |
585 | spin_unlock(&inode_wb_list_lock); | 656 | spin_unlock(&wb->list_lock); |
586 | } | ||
587 | 657 | ||
588 | /* | 658 | return nr_pages - work.nr_pages; |
589 | * The maximum number of pages to writeout in a single bdi flush/kupdate | 659 | } |
590 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
591 | * enormous amounts of time, which would block a userspace task which has | ||
592 | * been forced to throttle against that inode. Also, the code reevaluates | ||
593 | * the dirty each time it has written this many pages. | ||
594 | */ | ||
595 | #define MAX_WRITEBACK_PAGES 1024 | ||
596 | 660 | ||
597 | static inline bool over_bground_thresh(void) | 661 | static inline bool over_bground_thresh(void) |
598 | { | 662 | { |
@@ -605,6 +669,16 @@ static inline bool over_bground_thresh(void) | |||
605 | } | 669 | } |
606 | 670 | ||
607 | /* | 671 | /* |
672 | * Called under wb->list_lock. If there are multiple wb per bdi, | ||
673 | * only the flusher working on the first wb should do it. | ||
674 | */ | ||
675 | static void wb_update_bandwidth(struct bdi_writeback *wb, | ||
676 | unsigned long start_time) | ||
677 | { | ||
678 | __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time); | ||
679 | } | ||
680 | |||
681 | /* | ||
608 | * Explicit flushing or periodic writeback of "old" data. | 682 | * Explicit flushing or periodic writeback of "old" data. |
609 | * | 683 | * |
610 | * Define "old": the first time one of an inode's pages is dirtied, we mark the | 684 | * Define "old": the first time one of an inode's pages is dirtied, we mark the |
@@ -622,47 +696,16 @@ static inline bool over_bground_thresh(void) | |||
622 | static long wb_writeback(struct bdi_writeback *wb, | 696 | static long wb_writeback(struct bdi_writeback *wb, |
623 | struct wb_writeback_work *work) | 697 | struct wb_writeback_work *work) |
624 | { | 698 | { |
625 | struct writeback_control wbc = { | 699 | unsigned long wb_start = jiffies; |
626 | .sync_mode = work->sync_mode, | 700 | long nr_pages = work->nr_pages; |
627 | .older_than_this = NULL, | ||
628 | .for_kupdate = work->for_kupdate, | ||
629 | .for_background = work->for_background, | ||
630 | .range_cyclic = work->range_cyclic, | ||
631 | }; | ||
632 | unsigned long oldest_jif; | 701 | unsigned long oldest_jif; |
633 | long wrote = 0; | ||
634 | long write_chunk; | ||
635 | struct inode *inode; | 702 | struct inode *inode; |
703 | long progress; | ||
636 | 704 | ||
637 | if (wbc.for_kupdate) { | 705 | oldest_jif = jiffies; |
638 | wbc.older_than_this = &oldest_jif; | 706 | work->older_than_this = &oldest_jif; |
639 | oldest_jif = jiffies - | ||
640 | msecs_to_jiffies(dirty_expire_interval * 10); | ||
641 | } | ||
642 | if (!wbc.range_cyclic) { | ||
643 | wbc.range_start = 0; | ||
644 | wbc.range_end = LLONG_MAX; | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * WB_SYNC_ALL mode does livelock avoidance by syncing dirty | ||
649 | * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX | ||
650 | * here avoids calling into writeback_inodes_wb() more than once. | ||
651 | * | ||
652 | * The intended call sequence for WB_SYNC_ALL writeback is: | ||
653 | * | ||
654 | * wb_writeback() | ||
655 | * __writeback_inodes_sb() <== called only once | ||
656 | * write_cache_pages() <== called once for each inode | ||
657 | * (quickly) tag currently dirty pages | ||
658 | * (maybe slowly) sync all tagged pages | ||
659 | */ | ||
660 | if (wbc.sync_mode == WB_SYNC_NONE) | ||
661 | write_chunk = MAX_WRITEBACK_PAGES; | ||
662 | else | ||
663 | write_chunk = LONG_MAX; | ||
664 | 707 | ||
665 | wbc.wb_start = jiffies; /* livelock avoidance */ | 708 | spin_lock(&wb->list_lock); |
666 | for (;;) { | 709 | for (;;) { |
667 | /* | 710 | /* |
668 | * Stop writeback when nr_pages has been consumed | 711 | * Stop writeback when nr_pages has been consumed |
@@ -687,52 +730,54 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
687 | if (work->for_background && !over_bground_thresh()) | 730 | if (work->for_background && !over_bground_thresh()) |
688 | break; | 731 | break; |
689 | 732 | ||
690 | wbc.more_io = 0; | 733 | if (work->for_kupdate) { |
691 | wbc.nr_to_write = write_chunk; | 734 | oldest_jif = jiffies - |
692 | wbc.pages_skipped = 0; | 735 | msecs_to_jiffies(dirty_expire_interval * 10); |
736 | work->older_than_this = &oldest_jif; | ||
737 | } | ||
693 | 738 | ||
694 | trace_wbc_writeback_start(&wbc, wb->bdi); | 739 | trace_writeback_start(wb->bdi, work); |
740 | if (list_empty(&wb->b_io)) | ||
741 | queue_io(wb, work->older_than_this); | ||
695 | if (work->sb) | 742 | if (work->sb) |
696 | __writeback_inodes_sb(work->sb, wb, &wbc); | 743 | progress = writeback_sb_inodes(work->sb, wb, work); |
697 | else | 744 | else |
698 | writeback_inodes_wb(wb, &wbc); | 745 | progress = __writeback_inodes_wb(wb, work); |
699 | trace_wbc_writeback_written(&wbc, wb->bdi); | 746 | trace_writeback_written(wb->bdi, work); |
700 | 747 | ||
701 | work->nr_pages -= write_chunk - wbc.nr_to_write; | 748 | wb_update_bandwidth(wb, wb_start); |
702 | wrote += write_chunk - wbc.nr_to_write; | ||
703 | 749 | ||
704 | /* | 750 | /* |
705 | * If we consumed everything, see if we have more | 751 | * Did we write something? Try for more |
752 | * | ||
753 | * Dirty inodes are moved to b_io for writeback in batches. | ||
754 | * The completion of the current batch does not necessarily | ||
755 | * mean the overall work is done. So we keep looping as long | ||
756 | * as made some progress on cleaning pages or inodes. | ||
706 | */ | 757 | */ |
707 | if (wbc.nr_to_write <= 0) | 758 | if (progress) |
708 | continue; | 759 | continue; |
709 | /* | 760 | /* |
710 | * Didn't write everything and we don't have more IO, bail | 761 | * No more inodes for IO, bail |
711 | */ | 762 | */ |
712 | if (!wbc.more_io) | 763 | if (list_empty(&wb->b_more_io)) |
713 | break; | 764 | break; |
714 | /* | 765 | /* |
715 | * Did we write something? Try for more | ||
716 | */ | ||
717 | if (wbc.nr_to_write < write_chunk) | ||
718 | continue; | ||
719 | /* | ||
720 | * Nothing written. Wait for some inode to | 766 | * Nothing written. Wait for some inode to |
721 | * become available for writeback. Otherwise | 767 | * become available for writeback. Otherwise |
722 | * we'll just busyloop. | 768 | * we'll just busyloop. |
723 | */ | 769 | */ |
724 | spin_lock(&inode_wb_list_lock); | ||
725 | if (!list_empty(&wb->b_more_io)) { | 770 | if (!list_empty(&wb->b_more_io)) { |
771 | trace_writeback_wait(wb->bdi, work); | ||
726 | inode = wb_inode(wb->b_more_io.prev); | 772 | inode = wb_inode(wb->b_more_io.prev); |
727 | trace_wbc_writeback_wait(&wbc, wb->bdi); | ||
728 | spin_lock(&inode->i_lock); | 773 | spin_lock(&inode->i_lock); |
729 | inode_wait_for_writeback(inode); | 774 | inode_wait_for_writeback(inode, wb); |
730 | spin_unlock(&inode->i_lock); | 775 | spin_unlock(&inode->i_lock); |
731 | } | 776 | } |
732 | spin_unlock(&inode_wb_list_lock); | ||
733 | } | 777 | } |
778 | spin_unlock(&wb->list_lock); | ||
734 | 779 | ||
735 | return wrote; | 780 | return nr_pages - work->nr_pages; |
736 | } | 781 | } |
737 | 782 | ||
738 | /* | 783 | /* |
@@ -1063,10 +1108,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1063 | } | 1108 | } |
1064 | 1109 | ||
1065 | spin_unlock(&inode->i_lock); | 1110 | spin_unlock(&inode->i_lock); |
1066 | spin_lock(&inode_wb_list_lock); | 1111 | spin_lock(&bdi->wb.list_lock); |
1067 | inode->dirtied_when = jiffies; | 1112 | inode->dirtied_when = jiffies; |
1068 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1113 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1069 | spin_unlock(&inode_wb_list_lock); | 1114 | spin_unlock(&bdi->wb.list_lock); |
1070 | 1115 | ||
1071 | if (wakeup_bdi) | 1116 | if (wakeup_bdi) |
1072 | bdi_wakeup_thread_delayed(bdi); | 1117 | bdi_wakeup_thread_delayed(bdi); |
@@ -1162,10 +1207,11 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) | |||
1162 | { | 1207 | { |
1163 | DECLARE_COMPLETION_ONSTACK(done); | 1208 | DECLARE_COMPLETION_ONSTACK(done); |
1164 | struct wb_writeback_work work = { | 1209 | struct wb_writeback_work work = { |
1165 | .sb = sb, | 1210 | .sb = sb, |
1166 | .sync_mode = WB_SYNC_NONE, | 1211 | .sync_mode = WB_SYNC_NONE, |
1167 | .done = &done, | 1212 | .tagged_writepages = 1, |
1168 | .nr_pages = nr, | 1213 | .done = &done, |
1214 | .nr_pages = nr, | ||
1169 | }; | 1215 | }; |
1170 | 1216 | ||
1171 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1217 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
@@ -1267,6 +1313,7 @@ EXPORT_SYMBOL(sync_inodes_sb); | |||
1267 | */ | 1313 | */ |
1268 | int write_inode_now(struct inode *inode, int sync) | 1314 | int write_inode_now(struct inode *inode, int sync) |
1269 | { | 1315 | { |
1316 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | ||
1270 | int ret; | 1317 | int ret; |
1271 | struct writeback_control wbc = { | 1318 | struct writeback_control wbc = { |
1272 | .nr_to_write = LONG_MAX, | 1319 | .nr_to_write = LONG_MAX, |
@@ -1279,11 +1326,11 @@ int write_inode_now(struct inode *inode, int sync) | |||
1279 | wbc.nr_to_write = 0; | 1326 | wbc.nr_to_write = 0; |
1280 | 1327 | ||
1281 | might_sleep(); | 1328 | might_sleep(); |
1282 | spin_lock(&inode_wb_list_lock); | 1329 | spin_lock(&wb->list_lock); |
1283 | spin_lock(&inode->i_lock); | 1330 | spin_lock(&inode->i_lock); |
1284 | ret = writeback_single_inode(inode, &wbc); | 1331 | ret = writeback_single_inode(inode, wb, &wbc); |
1285 | spin_unlock(&inode->i_lock); | 1332 | spin_unlock(&inode->i_lock); |
1286 | spin_unlock(&inode_wb_list_lock); | 1333 | spin_unlock(&wb->list_lock); |
1287 | if (sync) | 1334 | if (sync) |
1288 | inode_sync_wait(inode); | 1335 | inode_sync_wait(inode); |
1289 | return ret; | 1336 | return ret; |
@@ -1303,13 +1350,14 @@ EXPORT_SYMBOL(write_inode_now); | |||
1303 | */ | 1350 | */ |
1304 | int sync_inode(struct inode *inode, struct writeback_control *wbc) | 1351 | int sync_inode(struct inode *inode, struct writeback_control *wbc) |
1305 | { | 1352 | { |
1353 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | ||
1306 | int ret; | 1354 | int ret; |
1307 | 1355 | ||
1308 | spin_lock(&inode_wb_list_lock); | 1356 | spin_lock(&wb->list_lock); |
1309 | spin_lock(&inode->i_lock); | 1357 | spin_lock(&inode->i_lock); |
1310 | ret = writeback_single_inode(inode, wbc); | 1358 | ret = writeback_single_inode(inode, wb, wbc); |
1311 | spin_unlock(&inode->i_lock); | 1359 | spin_unlock(&inode->i_lock); |
1312 | spin_unlock(&inode_wb_list_lock); | 1360 | spin_unlock(&wb->list_lock); |
1313 | return ret; | 1361 | return ret; |
1314 | } | 1362 | } |
1315 | EXPORT_SYMBOL(sync_inode); | 1363 | EXPORT_SYMBOL(sync_inode); |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 640fc229df10..5cb8614508c3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -258,10 +258,14 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, | |||
258 | forget->forget_one.nlookup = nlookup; | 258 | forget->forget_one.nlookup = nlookup; |
259 | 259 | ||
260 | spin_lock(&fc->lock); | 260 | spin_lock(&fc->lock); |
261 | fc->forget_list_tail->next = forget; | 261 | if (fc->connected) { |
262 | fc->forget_list_tail = forget; | 262 | fc->forget_list_tail->next = forget; |
263 | wake_up(&fc->waitq); | 263 | fc->forget_list_tail = forget; |
264 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); | 264 | wake_up(&fc->waitq); |
265 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); | ||
266 | } else { | ||
267 | kfree(forget); | ||
268 | } | ||
265 | spin_unlock(&fc->lock); | 269 | spin_unlock(&fc->lock); |
266 | } | 270 | } |
267 | 271 | ||
@@ -1358,6 +1362,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, | |||
1358 | if (outarg.namelen > FUSE_NAME_MAX) | 1362 | if (outarg.namelen > FUSE_NAME_MAX) |
1359 | goto err; | 1363 | goto err; |
1360 | 1364 | ||
1365 | err = -EINVAL; | ||
1366 | if (size != sizeof(outarg) + outarg.namelen + 1) | ||
1367 | goto err; | ||
1368 | |||
1361 | name.name = buf; | 1369 | name.name = buf; |
1362 | name.len = outarg.namelen; | 1370 | name.len = outarg.namelen; |
1363 | err = fuse_copy_one(cs, buf, outarg.namelen + 1); | 1371 | err = fuse_copy_one(cs, buf, outarg.namelen + 1); |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d480d9af46c9..594f07a81c28 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/compat.h> | 16 | #include <linux/compat.h> |
17 | #include <linux/swap.h> | ||
17 | 18 | ||
18 | static const struct file_operations fuse_direct_io_file_operations; | 19 | static const struct file_operations fuse_direct_io_file_operations; |
19 | 20 | ||
@@ -245,6 +246,12 @@ void fuse_release_common(struct file *file, int opcode) | |||
245 | req = ff->reserved_req; | 246 | req = ff->reserved_req; |
246 | fuse_prepare_release(ff, file->f_flags, opcode); | 247 | fuse_prepare_release(ff, file->f_flags, opcode); |
247 | 248 | ||
249 | if (ff->flock) { | ||
250 | struct fuse_release_in *inarg = &req->misc.release.in; | ||
251 | inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; | ||
252 | inarg->lock_owner = fuse_lock_owner_id(ff->fc, | ||
253 | (fl_owner_t) file); | ||
254 | } | ||
248 | /* Hold vfsmount and dentry until release is finished */ | 255 | /* Hold vfsmount and dentry until release is finished */ |
249 | path_get(&file->f_path); | 256 | path_get(&file->f_path); |
250 | req->misc.release.path = file->f_path; | 257 | req->misc.release.path = file->f_path; |
@@ -755,18 +762,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, | |||
755 | return req->misc.write.out.size; | 762 | return req->misc.write.out.size; |
756 | } | 763 | } |
757 | 764 | ||
758 | static int fuse_write_begin(struct file *file, struct address_space *mapping, | ||
759 | loff_t pos, unsigned len, unsigned flags, | ||
760 | struct page **pagep, void **fsdata) | ||
761 | { | ||
762 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
763 | |||
764 | *pagep = grab_cache_page_write_begin(mapping, index, flags); | ||
765 | if (!*pagep) | ||
766 | return -ENOMEM; | ||
767 | return 0; | ||
768 | } | ||
769 | |||
770 | void fuse_write_update_size(struct inode *inode, loff_t pos) | 765 | void fuse_write_update_size(struct inode *inode, loff_t pos) |
771 | { | 766 | { |
772 | struct fuse_conn *fc = get_fuse_conn(inode); | 767 | struct fuse_conn *fc = get_fuse_conn(inode); |
@@ -779,62 +774,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos) | |||
779 | spin_unlock(&fc->lock); | 774 | spin_unlock(&fc->lock); |
780 | } | 775 | } |
781 | 776 | ||
782 | static int fuse_buffered_write(struct file *file, struct inode *inode, | ||
783 | loff_t pos, unsigned count, struct page *page) | ||
784 | { | ||
785 | int err; | ||
786 | size_t nres; | ||
787 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
788 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
789 | struct fuse_req *req; | ||
790 | |||
791 | if (is_bad_inode(inode)) | ||
792 | return -EIO; | ||
793 | |||
794 | /* | ||
795 | * Make sure writepages on the same page are not mixed up with | ||
796 | * plain writes. | ||
797 | */ | ||
798 | fuse_wait_on_page_writeback(inode, page->index); | ||
799 | |||
800 | req = fuse_get_req(fc); | ||
801 | if (IS_ERR(req)) | ||
802 | return PTR_ERR(req); | ||
803 | |||
804 | req->in.argpages = 1; | ||
805 | req->num_pages = 1; | ||
806 | req->pages[0] = page; | ||
807 | req->page_offset = offset; | ||
808 | nres = fuse_send_write(req, file, pos, count, NULL); | ||
809 | err = req->out.h.error; | ||
810 | fuse_put_request(fc, req); | ||
811 | if (!err && !nres) | ||
812 | err = -EIO; | ||
813 | if (!err) { | ||
814 | pos += nres; | ||
815 | fuse_write_update_size(inode, pos); | ||
816 | if (count == PAGE_CACHE_SIZE) | ||
817 | SetPageUptodate(page); | ||
818 | } | ||
819 | fuse_invalidate_attr(inode); | ||
820 | return err ? err : nres; | ||
821 | } | ||
822 | |||
823 | static int fuse_write_end(struct file *file, struct address_space *mapping, | ||
824 | loff_t pos, unsigned len, unsigned copied, | ||
825 | struct page *page, void *fsdata) | ||
826 | { | ||
827 | struct inode *inode = mapping->host; | ||
828 | int res = 0; | ||
829 | |||
830 | if (copied) | ||
831 | res = fuse_buffered_write(file, inode, pos, copied, page); | ||
832 | |||
833 | unlock_page(page); | ||
834 | page_cache_release(page); | ||
835 | return res; | ||
836 | } | ||
837 | |||
838 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, | 777 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, |
839 | struct inode *inode, loff_t pos, | 778 | struct inode *inode, loff_t pos, |
840 | size_t count) | 779 | size_t count) |
@@ -908,6 +847,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, | |||
908 | pagefault_enable(); | 847 | pagefault_enable(); |
909 | flush_dcache_page(page); | 848 | flush_dcache_page(page); |
910 | 849 | ||
850 | mark_page_accessed(page); | ||
851 | |||
911 | if (!tmp) { | 852 | if (!tmp) { |
912 | unlock_page(page); | 853 | unlock_page(page); |
913 | page_cache_release(page); | 854 | page_cache_release(page); |
@@ -1559,11 +1500,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) | |||
1559 | struct fuse_conn *fc = get_fuse_conn(inode); | 1500 | struct fuse_conn *fc = get_fuse_conn(inode); |
1560 | int err; | 1501 | int err; |
1561 | 1502 | ||
1562 | if (fc->no_lock) { | 1503 | if (fc->no_flock) { |
1563 | err = flock_lock_file_wait(file, fl); | 1504 | err = flock_lock_file_wait(file, fl); |
1564 | } else { | 1505 | } else { |
1506 | struct fuse_file *ff = file->private_data; | ||
1507 | |||
1565 | /* emulate flock with POSIX locks */ | 1508 | /* emulate flock with POSIX locks */ |
1566 | fl->fl_owner = (fl_owner_t) file; | 1509 | fl->fl_owner = (fl_owner_t) file; |
1510 | ff->flock = true; | ||
1567 | err = fuse_setlk(file, fl, 1); | 1511 | err = fuse_setlk(file, fl, 1); |
1568 | } | 1512 | } |
1569 | 1513 | ||
@@ -2201,8 +2145,6 @@ static const struct address_space_operations fuse_file_aops = { | |||
2201 | .readpage = fuse_readpage, | 2145 | .readpage = fuse_readpage, |
2202 | .writepage = fuse_writepage, | 2146 | .writepage = fuse_writepage, |
2203 | .launder_page = fuse_launder_page, | 2147 | .launder_page = fuse_launder_page, |
2204 | .write_begin = fuse_write_begin, | ||
2205 | .write_end = fuse_write_end, | ||
2206 | .readpages = fuse_readpages, | 2148 | .readpages = fuse_readpages, |
2207 | .set_page_dirty = __set_page_dirty_nobuffers, | 2149 | .set_page_dirty = __set_page_dirty_nobuffers, |
2208 | .bmap = fuse_bmap, | 2150 | .bmap = fuse_bmap, |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c6aa2d4b8517..cf6db0a93219 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -135,6 +135,9 @@ struct fuse_file { | |||
135 | 135 | ||
136 | /** Wait queue head for poll */ | 136 | /** Wait queue head for poll */ |
137 | wait_queue_head_t poll_wait; | 137 | wait_queue_head_t poll_wait; |
138 | |||
139 | /** Has flock been performed on this file? */ | ||
140 | bool flock:1; | ||
138 | }; | 141 | }; |
139 | 142 | ||
140 | /** One input argument of a request */ | 143 | /** One input argument of a request */ |
@@ -448,7 +451,7 @@ struct fuse_conn { | |||
448 | /** Is removexattr not implemented by fs? */ | 451 | /** Is removexattr not implemented by fs? */ |
449 | unsigned no_removexattr:1; | 452 | unsigned no_removexattr:1; |
450 | 453 | ||
451 | /** Are file locking primitives not implemented by fs? */ | 454 | /** Are posix file locking primitives not implemented by fs? */ |
452 | unsigned no_lock:1; | 455 | unsigned no_lock:1; |
453 | 456 | ||
454 | /** Is access not implemented by fs? */ | 457 | /** Is access not implemented by fs? */ |
@@ -472,6 +475,9 @@ struct fuse_conn { | |||
472 | /** Don't apply umask to creation modes */ | 475 | /** Don't apply umask to creation modes */ |
473 | unsigned dont_mask:1; | 476 | unsigned dont_mask:1; |
474 | 477 | ||
478 | /** Are BSD file locking primitives not implemented by fs? */ | ||
479 | unsigned no_flock:1; | ||
480 | |||
475 | /** The number of requests waiting for completion */ | 481 | /** The number of requests waiting for completion */ |
476 | atomic_t num_waiting; | 482 | atomic_t num_waiting; |
477 | 483 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 38f84cd48b67..add96f6ffda5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -71,7 +71,7 @@ struct fuse_mount_data { | |||
71 | unsigned blksize; | 71 | unsigned blksize; |
72 | }; | 72 | }; |
73 | 73 | ||
74 | struct fuse_forget_link *fuse_alloc_forget() | 74 | struct fuse_forget_link *fuse_alloc_forget(void) |
75 | { | 75 | { |
76 | return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); | 76 | return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); |
77 | } | 77 | } |
@@ -809,6 +809,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
809 | fc->async_read = 1; | 809 | fc->async_read = 1; |
810 | if (!(arg->flags & FUSE_POSIX_LOCKS)) | 810 | if (!(arg->flags & FUSE_POSIX_LOCKS)) |
811 | fc->no_lock = 1; | 811 | fc->no_lock = 1; |
812 | if (arg->minor >= 17) { | ||
813 | if (!(arg->flags & FUSE_FLOCK_LOCKS)) | ||
814 | fc->no_flock = 1; | ||
815 | } else { | ||
816 | if (!(arg->flags & FUSE_POSIX_LOCKS)) | ||
817 | fc->no_flock = 1; | ||
818 | } | ||
812 | if (arg->flags & FUSE_ATOMIC_O_TRUNC) | 819 | if (arg->flags & FUSE_ATOMIC_O_TRUNC) |
813 | fc->atomic_o_trunc = 1; | 820 | fc->atomic_o_trunc = 1; |
814 | if (arg->minor >= 9) { | 821 | if (arg->minor >= 9) { |
@@ -823,6 +830,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
823 | } else { | 830 | } else { |
824 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; | 831 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; |
825 | fc->no_lock = 1; | 832 | fc->no_lock = 1; |
833 | fc->no_flock = 1; | ||
826 | } | 834 | } |
827 | 835 | ||
828 | fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); | 836 | fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); |
@@ -843,7 +851,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) | |||
843 | arg->minor = FUSE_KERNEL_MINOR_VERSION; | 851 | arg->minor = FUSE_KERNEL_MINOR_VERSION; |
844 | arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; | 852 | arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; |
845 | arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | | 853 | arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | |
846 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; | 854 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | |
855 | FUSE_FLOCK_LOCKS; | ||
847 | req->in.h.opcode = FUSE_INIT; | 856 | req->in.h.opcode = FUSE_INIT; |
848 | req->in.numargs = 1; | 857 | req->in.numargs = 1; |
849 | req->in.args[0].size = sizeof(*arg); | 858 | req->in.args[0].size = sizeof(*arg); |
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index d5e33a077a67..d0dddaceac59 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
@@ -82,18 +82,14 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, | |||
82 | return PTR_ERR(acl); | 82 | return PTR_ERR(acl); |
83 | } | 83 | } |
84 | if (acl) { | 84 | if (acl) { |
85 | mode_t mode; | ||
86 | |||
87 | error = posix_acl_valid(acl); | 85 | error = posix_acl_valid(acl); |
88 | if (error) | 86 | if (error) |
89 | goto failed; | 87 | goto failed; |
90 | switch (type) { | 88 | switch (type) { |
91 | case ACL_TYPE_ACCESS: | 89 | case ACL_TYPE_ACCESS: |
92 | mode = inode->i_mode; | 90 | error = posix_acl_equiv_mode(acl, &inode->i_mode); |
93 | error = posix_acl_equiv_mode(acl, &mode); | ||
94 | if (error < 0) | 91 | if (error < 0) |
95 | goto failed; | 92 | goto failed; |
96 | inode->i_mode = mode; | ||
97 | inode->i_ctime = CURRENT_TIME; | 93 | inode->i_ctime = CURRENT_TIME; |
98 | if (error == 0) { | 94 | if (error == 0) { |
99 | posix_acl_release(acl); | 95 | posix_acl_release(acl); |
@@ -125,21 +121,20 @@ int | |||
125 | generic_acl_init(struct inode *inode, struct inode *dir) | 121 | generic_acl_init(struct inode *inode, struct inode *dir) |
126 | { | 122 | { |
127 | struct posix_acl *acl = NULL; | 123 | struct posix_acl *acl = NULL; |
128 | mode_t mode = inode->i_mode; | ||
129 | int error; | 124 | int error; |
130 | 125 | ||
131 | inode->i_mode = mode & ~current_umask(); | ||
132 | if (!S_ISLNK(inode->i_mode)) | 126 | if (!S_ISLNK(inode->i_mode)) |
133 | acl = get_cached_acl(dir, ACL_TYPE_DEFAULT); | 127 | acl = get_cached_acl(dir, ACL_TYPE_DEFAULT); |
134 | if (acl) { | 128 | if (acl) { |
135 | if (S_ISDIR(inode->i_mode)) | 129 | if (S_ISDIR(inode->i_mode)) |
136 | set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); | 130 | set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); |
137 | error = posix_acl_create(&acl, GFP_KERNEL, &mode); | 131 | error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); |
138 | if (error < 0) | 132 | if (error < 0) |
139 | return error; | 133 | return error; |
140 | inode->i_mode = mode; | ||
141 | if (error > 0) | 134 | if (error > 0) |
142 | set_cached_acl(inode, ACL_TYPE_ACCESS, acl); | 135 | set_cached_acl(inode, ACL_TYPE_ACCESS, acl); |
136 | } else { | ||
137 | inode->i_mode &= ~current_umask(); | ||
143 | } | 138 | } |
144 | error = 0; | 139 | error = 0; |
145 | 140 | ||
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 884c9af0542f..34501b64bc47 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -72,7 +72,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) | |||
72 | return gfs2_acl_get(GFS2_I(inode), type); | 72 | return gfs2_acl_get(GFS2_I(inode), type); |
73 | } | 73 | } |
74 | 74 | ||
75 | static int gfs2_set_mode(struct inode *inode, mode_t mode) | 75 | static int gfs2_set_mode(struct inode *inode, umode_t mode) |
76 | { | 76 | { |
77 | int error = 0; | 77 | int error = 0; |
78 | 78 | ||
@@ -117,7 +117,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) | |||
117 | { | 117 | { |
118 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 118 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
119 | struct posix_acl *acl; | 119 | struct posix_acl *acl; |
120 | mode_t mode = inode->i_mode; | 120 | umode_t mode = inode->i_mode; |
121 | int error = 0; | 121 | int error = 0; |
122 | 122 | ||
123 | if (!sdp->sd_args.ar_posix_acl) | 123 | if (!sdp->sd_args.ar_posix_acl) |
@@ -276,7 +276,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, | |||
276 | goto out_release; | 276 | goto out_release; |
277 | 277 | ||
278 | if (type == ACL_TYPE_ACCESS) { | 278 | if (type == ACL_TYPE_ACCESS) { |
279 | mode_t mode = inode->i_mode; | 279 | umode_t mode = inode->i_mode; |
280 | error = posix_acl_equiv_mode(acl, &mode); | 280 | error = posix_acl_equiv_mode(acl, &mode); |
281 | 281 | ||
282 | if (error <= 0) { | 282 | if (error <= 0) { |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 29e1ace7953d..8a139ff1919f 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
17 | #include <linux/rcupdate.h> | 17 | #include <linux/rcupdate.h> |
18 | #include <linux/rculist_bl.h> | 18 | #include <linux/rculist_bl.h> |
19 | #include <asm/atomic.h> | 19 | #include <linux/atomic.h> |
20 | 20 | ||
21 | #include "gfs2.h" | 21 | #include "gfs2.h" |
22 | #include "incore.h" | 22 | #include "incore.h" |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 516516e0c2a2..3bc073a4cf82 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -1018,13 +1018,13 @@ hostdata_error: | |||
1018 | fsname++; | 1018 | fsname++; |
1019 | if (lm->lm_mount == NULL) { | 1019 | if (lm->lm_mount == NULL) { |
1020 | fs_info(sdp, "Now mounting FS...\n"); | 1020 | fs_info(sdp, "Now mounting FS...\n"); |
1021 | complete(&sdp->sd_locking_init); | 1021 | complete_all(&sdp->sd_locking_init); |
1022 | return 0; | 1022 | return 0; |
1023 | } | 1023 | } |
1024 | ret = lm->lm_mount(sdp, fsname); | 1024 | ret = lm->lm_mount(sdp, fsname); |
1025 | if (ret == 0) | 1025 | if (ret == 0) |
1026 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); | 1026 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); |
1027 | complete(&sdp->sd_locking_init); | 1027 | complete_all(&sdp->sd_locking_init); |
1028 | return ret; | 1028 | return ret; |
1029 | } | 1029 | } |
1030 | 1030 | ||
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 8635be5ffd97..970ea987b3f6 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/statfs.h> | 16 | #include <linux/statfs.h> |
17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
18 | #include <linux/pid_namespace.h> | 18 | #include <linux/pid_namespace.h> |
19 | #include <linux/namei.h> | ||
19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
20 | #include "os.h" | 21 | #include "os.h" |
21 | 22 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 87b6e0421c12..ec889538e5a6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -491,6 +491,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, | |||
491 | inode->i_op = &page_symlink_inode_operations; | 491 | inode->i_op = &page_symlink_inode_operations; |
492 | break; | 492 | break; |
493 | } | 493 | } |
494 | lockdep_annotate_inode_mutex_key(inode); | ||
494 | } | 495 | } |
495 | return inode; | 496 | return inode; |
496 | } | 497 | } |
diff --git a/fs/inode.c b/fs/inode.c index 96c77b81167c..ec7924696a13 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -37,7 +37,7 @@ | |||
37 | * inode->i_sb->s_inode_lru, inode->i_lru | 37 | * inode->i_sb->s_inode_lru, inode->i_lru |
38 | * inode_sb_list_lock protects: | 38 | * inode_sb_list_lock protects: |
39 | * sb->s_inodes, inode->i_sb_list | 39 | * sb->s_inodes, inode->i_sb_list |
40 | * inode_wb_list_lock protects: | 40 | * bdi->wb.list_lock protects: |
41 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list | 41 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list |
42 | * inode_hash_lock protects: | 42 | * inode_hash_lock protects: |
43 | * inode_hashtable, inode->i_hash | 43 | * inode_hashtable, inode->i_hash |
@@ -48,7 +48,7 @@ | |||
48 | * inode->i_lock | 48 | * inode->i_lock |
49 | * inode->i_sb->s_inode_lru_lock | 49 | * inode->i_sb->s_inode_lru_lock |
50 | * | 50 | * |
51 | * inode_wb_list_lock | 51 | * bdi->wb.list_lock |
52 | * inode->i_lock | 52 | * inode->i_lock |
53 | * | 53 | * |
54 | * inode_hash_lock | 54 | * inode_hash_lock |
@@ -65,7 +65,6 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
65 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); | 65 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); |
66 | 66 | ||
67 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); | 67 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); |
68 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); | ||
69 | 68 | ||
70 | /* | 69 | /* |
71 | * Empty aops. Can be used for the cases where the user does not | 70 | * Empty aops. Can be used for the cases where the user does not |
@@ -144,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
144 | inode->i_op = &empty_iops; | 143 | inode->i_op = &empty_iops; |
145 | inode->i_fop = &empty_fops; | 144 | inode->i_fop = &empty_fops; |
146 | inode->i_nlink = 1; | 145 | inode->i_nlink = 1; |
146 | inode->i_opflags = 0; | ||
147 | inode->i_uid = 0; | 147 | inode->i_uid = 0; |
148 | inode->i_gid = 0; | 148 | inode->i_gid = 0; |
149 | atomic_set(&inode->i_writecount, 0); | 149 | atomic_set(&inode->i_writecount, 0); |
@@ -362,9 +362,11 @@ EXPORT_SYMBOL_GPL(inode_sb_list_add); | |||
362 | 362 | ||
363 | static inline void inode_sb_list_del(struct inode *inode) | 363 | static inline void inode_sb_list_del(struct inode *inode) |
364 | { | 364 | { |
365 | spin_lock(&inode_sb_list_lock); | 365 | if (!list_empty(&inode->i_sb_list)) { |
366 | list_del_init(&inode->i_sb_list); | 366 | spin_lock(&inode_sb_list_lock); |
367 | spin_unlock(&inode_sb_list_lock); | 367 | list_del_init(&inode->i_sb_list); |
368 | spin_unlock(&inode_sb_list_lock); | ||
369 | } | ||
368 | } | 370 | } |
369 | 371 | ||
370 | static unsigned long hash(struct super_block *sb, unsigned long hashval) | 372 | static unsigned long hash(struct super_block *sb, unsigned long hashval) |
@@ -398,12 +400,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) | |||
398 | EXPORT_SYMBOL(__insert_inode_hash); | 400 | EXPORT_SYMBOL(__insert_inode_hash); |
399 | 401 | ||
400 | /** | 402 | /** |
401 | * remove_inode_hash - remove an inode from the hash | 403 | * __remove_inode_hash - remove an inode from the hash |
402 | * @inode: inode to unhash | 404 | * @inode: inode to unhash |
403 | * | 405 | * |
404 | * Remove an inode from the superblock. | 406 | * Remove an inode from the superblock. |
405 | */ | 407 | */ |
406 | void remove_inode_hash(struct inode *inode) | 408 | void __remove_inode_hash(struct inode *inode) |
407 | { | 409 | { |
408 | spin_lock(&inode_hash_lock); | 410 | spin_lock(&inode_hash_lock); |
409 | spin_lock(&inode->i_lock); | 411 | spin_lock(&inode->i_lock); |
@@ -411,7 +413,7 @@ void remove_inode_hash(struct inode *inode) | |||
411 | spin_unlock(&inode->i_lock); | 413 | spin_unlock(&inode->i_lock); |
412 | spin_unlock(&inode_hash_lock); | 414 | spin_unlock(&inode_hash_lock); |
413 | } | 415 | } |
414 | EXPORT_SYMBOL(remove_inode_hash); | 416 | EXPORT_SYMBOL(__remove_inode_hash); |
415 | 417 | ||
416 | void end_writeback(struct inode *inode) | 418 | void end_writeback(struct inode *inode) |
417 | { | 419 | { |
@@ -453,7 +455,9 @@ static void evict(struct inode *inode) | |||
453 | BUG_ON(!(inode->i_state & I_FREEING)); | 455 | BUG_ON(!(inode->i_state & I_FREEING)); |
454 | BUG_ON(!list_empty(&inode->i_lru)); | 456 | BUG_ON(!list_empty(&inode->i_lru)); |
455 | 457 | ||
456 | inode_wb_list_del(inode); | 458 | if (!list_empty(&inode->i_wb_list)) |
459 | inode_wb_list_del(inode); | ||
460 | |||
457 | inode_sb_list_del(inode); | 461 | inode_sb_list_del(inode); |
458 | 462 | ||
459 | if (op->evict_inode) { | 463 | if (op->evict_inode) { |
@@ -797,6 +801,29 @@ unsigned int get_next_ino(void) | |||
797 | EXPORT_SYMBOL(get_next_ino); | 801 | EXPORT_SYMBOL(get_next_ino); |
798 | 802 | ||
799 | /** | 803 | /** |
804 | * new_inode_pseudo - obtain an inode | ||
805 | * @sb: superblock | ||
806 | * | ||
807 | * Allocates a new inode for given superblock. | ||
808 | * Inode wont be chained in superblock s_inodes list | ||
809 | * This means : | ||
810 | * - fs can't be unmount | ||
811 | * - quotas, fsnotify, writeback can't work | ||
812 | */ | ||
813 | struct inode *new_inode_pseudo(struct super_block *sb) | ||
814 | { | ||
815 | struct inode *inode = alloc_inode(sb); | ||
816 | |||
817 | if (inode) { | ||
818 | spin_lock(&inode->i_lock); | ||
819 | inode->i_state = 0; | ||
820 | spin_unlock(&inode->i_lock); | ||
821 | INIT_LIST_HEAD(&inode->i_sb_list); | ||
822 | } | ||
823 | return inode; | ||
824 | } | ||
825 | |||
826 | /** | ||
800 | * new_inode - obtain an inode | 827 | * new_inode - obtain an inode |
801 | * @sb: superblock | 828 | * @sb: superblock |
802 | * | 829 | * |
@@ -814,27 +841,16 @@ struct inode *new_inode(struct super_block *sb) | |||
814 | 841 | ||
815 | spin_lock_prefetch(&inode_sb_list_lock); | 842 | spin_lock_prefetch(&inode_sb_list_lock); |
816 | 843 | ||
817 | inode = alloc_inode(sb); | 844 | inode = new_inode_pseudo(sb); |
818 | if (inode) { | 845 | if (inode) |
819 | spin_lock(&inode->i_lock); | ||
820 | inode->i_state = 0; | ||
821 | spin_unlock(&inode->i_lock); | ||
822 | inode_sb_list_add(inode); | 846 | inode_sb_list_add(inode); |
823 | } | ||
824 | return inode; | 847 | return inode; |
825 | } | 848 | } |
826 | EXPORT_SYMBOL(new_inode); | 849 | EXPORT_SYMBOL(new_inode); |
827 | 850 | ||
828 | /** | ||
829 | * unlock_new_inode - clear the I_NEW state and wake up any waiters | ||
830 | * @inode: new inode to unlock | ||
831 | * | ||
832 | * Called when the inode is fully initialised to clear the new state of the | ||
833 | * inode and wake up anyone waiting for the inode to finish initialisation. | ||
834 | */ | ||
835 | void unlock_new_inode(struct inode *inode) | ||
836 | { | ||
837 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 851 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
852 | void lockdep_annotate_inode_mutex_key(struct inode *inode) | ||
853 | { | ||
838 | if (S_ISDIR(inode->i_mode)) { | 854 | if (S_ISDIR(inode->i_mode)) { |
839 | struct file_system_type *type = inode->i_sb->s_type; | 855 | struct file_system_type *type = inode->i_sb->s_type; |
840 | 856 | ||
@@ -850,7 +866,20 @@ void unlock_new_inode(struct inode *inode) | |||
850 | &type->i_mutex_dir_key); | 866 | &type->i_mutex_dir_key); |
851 | } | 867 | } |
852 | } | 868 | } |
869 | } | ||
870 | EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key); | ||
853 | #endif | 871 | #endif |
872 | |||
873 | /** | ||
874 | * unlock_new_inode - clear the I_NEW state and wake up any waiters | ||
875 | * @inode: new inode to unlock | ||
876 | * | ||
877 | * Called when the inode is fully initialised to clear the new state of the | ||
878 | * inode and wake up anyone waiting for the inode to finish initialisation. | ||
879 | */ | ||
880 | void unlock_new_inode(struct inode *inode) | ||
881 | { | ||
882 | lockdep_annotate_inode_mutex_key(inode); | ||
854 | spin_lock(&inode->i_lock); | 883 | spin_lock(&inode->i_lock); |
855 | WARN_ON(!(inode->i_state & I_NEW)); | 884 | WARN_ON(!(inode->i_state & I_NEW)); |
856 | inode->i_state &= ~I_NEW; | 885 | inode->i_state &= ~I_NEW; |
@@ -1308,7 +1337,8 @@ static void iput_final(struct inode *inode) | |||
1308 | } | 1337 | } |
1309 | 1338 | ||
1310 | inode->i_state |= I_FREEING; | 1339 | inode->i_state |= I_FREEING; |
1311 | inode_lru_list_del(inode); | 1340 | if (!list_empty(&inode->i_lru)) |
1341 | inode_lru_list_del(inode); | ||
1312 | spin_unlock(&inode->i_lock); | 1342 | spin_unlock(&inode->i_lock); |
1313 | 1343 | ||
1314 | evict(inode); | 1344 | evict(inode); |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e4b87bc1fa56..f94fc48ff3a0 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/jbd.h> | 22 | #include <linux/jbd.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/blkdev.h> | ||
26 | #include <trace/events/jbd.h> | ||
25 | 27 | ||
26 | /* | 28 | /* |
27 | * Unlink a buffer from a transaction checkpoint list. | 29 | * Unlink a buffer from a transaction checkpoint list. |
@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
95 | 97 | ||
96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
97 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | ||
101 | * Get our reference so that bh cannot be freed before | ||
102 | * we unlock it | ||
103 | */ | ||
104 | get_bh(bh); | ||
98 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
99 | ret = __journal_remove_checkpoint(jh) + 1; | 106 | ret = __journal_remove_checkpoint(jh) + 1; |
100 | jbd_unlock_bh_state(bh); | 107 | jbd_unlock_bh_state(bh); |
101 | journal_remove_journal_head(bh); | ||
102 | BUFFER_TRACE(bh, "release"); | 108 | BUFFER_TRACE(bh, "release"); |
103 | __brelse(bh); | 109 | __brelse(bh); |
104 | } else { | 110 | } else { |
@@ -220,8 +226,8 @@ restart: | |||
220 | spin_lock(&journal->j_list_lock); | 226 | spin_lock(&journal->j_list_lock); |
221 | goto restart; | 227 | goto restart; |
222 | } | 228 | } |
229 | get_bh(bh); | ||
223 | if (buffer_locked(bh)) { | 230 | if (buffer_locked(bh)) { |
224 | get_bh(bh); | ||
225 | spin_unlock(&journal->j_list_lock); | 231 | spin_unlock(&journal->j_list_lock); |
226 | jbd_unlock_bh_state(bh); | 232 | jbd_unlock_bh_state(bh); |
227 | wait_on_buffer(bh); | 233 | wait_on_buffer(bh); |
@@ -240,7 +246,6 @@ restart: | |||
240 | */ | 246 | */ |
241 | released = __journal_remove_checkpoint(jh); | 247 | released = __journal_remove_checkpoint(jh); |
242 | jbd_unlock_bh_state(bh); | 248 | jbd_unlock_bh_state(bh); |
243 | journal_remove_journal_head(bh); | ||
244 | __brelse(bh); | 249 | __brelse(bh); |
245 | } | 250 | } |
246 | 251 | ||
@@ -253,9 +258,12 @@ static void | |||
253 | __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | 258 | __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) |
254 | { | 259 | { |
255 | int i; | 260 | int i; |
261 | struct blk_plug plug; | ||
256 | 262 | ||
263 | blk_start_plug(&plug); | ||
257 | for (i = 0; i < *batch_count; i++) | 264 | for (i = 0; i < *batch_count; i++) |
258 | write_dirty_buffer(bhs[i], WRITE); | 265 | write_dirty_buffer(bhs[i], WRITE_SYNC); |
266 | blk_finish_plug(&plug); | ||
259 | 267 | ||
260 | for (i = 0; i < *batch_count; i++) { | 268 | for (i = 0; i < *batch_count; i++) { |
261 | struct buffer_head *bh = bhs[i]; | 269 | struct buffer_head *bh = bhs[i]; |
@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
304 | ret = 1; | 312 | ret = 1; |
305 | if (unlikely(buffer_write_io_error(bh))) | 313 | if (unlikely(buffer_write_io_error(bh))) |
306 | ret = -EIO; | 314 | ret = -EIO; |
315 | get_bh(bh); | ||
307 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 316 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
308 | BUFFER_TRACE(bh, "remove from checkpoint"); | 317 | BUFFER_TRACE(bh, "remove from checkpoint"); |
309 | __journal_remove_checkpoint(jh); | 318 | __journal_remove_checkpoint(jh); |
310 | spin_unlock(&journal->j_list_lock); | 319 | spin_unlock(&journal->j_list_lock); |
311 | jbd_unlock_bh_state(bh); | 320 | jbd_unlock_bh_state(bh); |
312 | journal_remove_journal_head(bh); | ||
313 | __brelse(bh); | 321 | __brelse(bh); |
314 | } else { | 322 | } else { |
315 | /* | 323 | /* |
@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal) | |||
358 | * journal straight away. | 366 | * journal straight away. |
359 | */ | 367 | */ |
360 | result = cleanup_journal_tail(journal); | 368 | result = cleanup_journal_tail(journal); |
369 | trace_jbd_checkpoint(journal, result); | ||
361 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 370 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
362 | if (result <= 0) | 371 | if (result <= 0) |
363 | return result; | 372 | return result; |
@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal) | |||
503 | if (blocknr < journal->j_tail) | 512 | if (blocknr < journal->j_tail) |
504 | freed = freed + journal->j_last - journal->j_first; | 513 | freed = freed + journal->j_last - journal->j_first; |
505 | 514 | ||
515 | trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed); | ||
506 | jbd_debug(1, | 516 | jbd_debug(1, |
507 | "Cleaning journal tail from %d to %d (offset %u), " | 517 | "Cleaning journal tail from %d to %d (offset %u), " |
508 | "freeing %u\n", | 518 | "freeing %u\n", |
@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal) | |||
523 | /* | 533 | /* |
524 | * journal_clean_one_cp_list | 534 | * journal_clean_one_cp_list |
525 | * | 535 | * |
526 | * Find all the written-back checkpoint buffers in the given list and release them. | 536 | * Find all the written-back checkpoint buffers in the given list and release |
537 | * them. | ||
527 | * | 538 | * |
528 | * Called with the journal locked. | ||
529 | * Called with j_list_lock held. | 539 | * Called with j_list_lock held. |
530 | * Returns number of bufers reaped (for debug) | 540 | * Returns number of bufers reaped (for debug) |
531 | */ | 541 | */ |
@@ -632,8 +642,8 @@ out: | |||
632 | * checkpoint lists. | 642 | * checkpoint lists. |
633 | * | 643 | * |
634 | * The function returns 1 if it frees the transaction, 0 otherwise. | 644 | * The function returns 1 if it frees the transaction, 0 otherwise. |
645 | * The function can free jh and bh. | ||
635 | * | 646 | * |
636 | * This function is called with the journal locked. | ||
637 | * This function is called with j_list_lock held. | 647 | * This function is called with j_list_lock held. |
638 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | 648 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) |
639 | */ | 649 | */ |
@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
652 | } | 662 | } |
653 | journal = transaction->t_journal; | 663 | journal = transaction->t_journal; |
654 | 664 | ||
665 | JBUFFER_TRACE(jh, "removing from transaction"); | ||
655 | __buffer_unlink(jh); | 666 | __buffer_unlink(jh); |
656 | jh->b_cp_transaction = NULL; | 667 | jh->b_cp_transaction = NULL; |
668 | journal_put_journal_head(jh); | ||
657 | 669 | ||
658 | if (transaction->t_checkpoint_list != NULL || | 670 | if (transaction->t_checkpoint_list != NULL || |
659 | transaction->t_checkpoint_io_list != NULL) | 671 | transaction->t_checkpoint_io_list != NULL) |
660 | goto out; | 672 | goto out; |
661 | JBUFFER_TRACE(jh, "transaction has no more buffers"); | ||
662 | 673 | ||
663 | /* | 674 | /* |
664 | * There is one special case to worry about: if we have just pulled the | 675 | * There is one special case to worry about: if we have just pulled the |
@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
669 | * The locking here around t_state is a bit sleazy. | 680 | * The locking here around t_state is a bit sleazy. |
670 | * See the comment at the end of journal_commit_transaction(). | 681 | * See the comment at the end of journal_commit_transaction(). |
671 | */ | 682 | */ |
672 | if (transaction->t_state != T_FINISHED) { | 683 | if (transaction->t_state != T_FINISHED) |
673 | JBUFFER_TRACE(jh, "belongs to running/committing transaction"); | ||
674 | goto out; | 684 | goto out; |
675 | } | ||
676 | 685 | ||
677 | /* OK, that was the last buffer for the transaction: we can now | 686 | /* OK, that was the last buffer for the transaction: we can now |
678 | safely remove this transaction from the log */ | 687 | safely remove this transaction from the log */ |
@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
684 | wake_up(&journal->j_wait_logspace); | 693 | wake_up(&journal->j_wait_logspace); |
685 | ret = 1; | 694 | ret = 1; |
686 | out: | 695 | out: |
687 | JBUFFER_TRACE(jh, "exit"); | ||
688 | return ret; | 696 | return ret; |
689 | } | 697 | } |
690 | 698 | ||
@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh, | |||
703 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); | 711 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); |
704 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); | 712 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
705 | 713 | ||
714 | /* Get reference for checkpointing transaction */ | ||
715 | journal_grab_journal_head(jh2bh(jh)); | ||
706 | jh->b_cp_transaction = transaction; | 716 | jh->b_cp_transaction = transaction; |
707 | 717 | ||
708 | if (!transaction->t_checkpoint_list) { | 718 | if (!transaction->t_checkpoint_list) { |
@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) | |||
752 | J_ASSERT(journal->j_committing_transaction != transaction); | 762 | J_ASSERT(journal->j_committing_transaction != transaction); |
753 | J_ASSERT(journal->j_running_transaction != transaction); | 763 | J_ASSERT(journal->j_running_transaction != transaction); |
754 | 764 | ||
765 | trace_jbd_drop_transaction(journal, transaction); | ||
755 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); | 766 | jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); |
756 | kfree(transaction); | 767 | kfree(transaction); |
757 | } | 768 | } |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 72ffa974b0b8..8799207df058 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <trace/events/jbd.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Default IO end handler for temporary BJ_IO buffer_heads. | 27 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -204,6 +205,8 @@ write_out_data: | |||
204 | if (!trylock_buffer(bh)) { | 205 | if (!trylock_buffer(bh)) { |
205 | BUFFER_TRACE(bh, "needs blocking lock"); | 206 | BUFFER_TRACE(bh, "needs blocking lock"); |
206 | spin_unlock(&journal->j_list_lock); | 207 | spin_unlock(&journal->j_list_lock); |
208 | trace_jbd_do_submit_data(journal, | ||
209 | commit_transaction); | ||
207 | /* Write out all data to prevent deadlocks */ | 210 | /* Write out all data to prevent deadlocks */ |
208 | journal_do_submit_data(wbuf, bufs, write_op); | 211 | journal_do_submit_data(wbuf, bufs, write_op); |
209 | bufs = 0; | 212 | bufs = 0; |
@@ -236,6 +239,8 @@ write_out_data: | |||
236 | jbd_unlock_bh_state(bh); | 239 | jbd_unlock_bh_state(bh); |
237 | if (bufs == journal->j_wbufsize) { | 240 | if (bufs == journal->j_wbufsize) { |
238 | spin_unlock(&journal->j_list_lock); | 241 | spin_unlock(&journal->j_list_lock); |
242 | trace_jbd_do_submit_data(journal, | ||
243 | commit_transaction); | ||
239 | journal_do_submit_data(wbuf, bufs, write_op); | 244 | journal_do_submit_data(wbuf, bufs, write_op); |
240 | bufs = 0; | 245 | bufs = 0; |
241 | goto write_out_data; | 246 | goto write_out_data; |
@@ -253,10 +258,6 @@ write_out_data: | |||
253 | jbd_unlock_bh_state(bh); | 258 | jbd_unlock_bh_state(bh); |
254 | if (locked) | 259 | if (locked) |
255 | unlock_buffer(bh); | 260 | unlock_buffer(bh); |
256 | journal_remove_journal_head(bh); | ||
257 | /* One for our safety reference, other for | ||
258 | * journal_remove_journal_head() */ | ||
259 | put_bh(bh); | ||
260 | release_data_buffer(bh); | 261 | release_data_buffer(bh); |
261 | } | 262 | } |
262 | 263 | ||
@@ -266,6 +267,7 @@ write_out_data: | |||
266 | } | 267 | } |
267 | } | 268 | } |
268 | spin_unlock(&journal->j_list_lock); | 269 | spin_unlock(&journal->j_list_lock); |
270 | trace_jbd_do_submit_data(journal, commit_transaction); | ||
269 | journal_do_submit_data(wbuf, bufs, write_op); | 271 | journal_do_submit_data(wbuf, bufs, write_op); |
270 | 272 | ||
271 | return err; | 273 | return err; |
@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal) | |||
316 | commit_transaction = journal->j_running_transaction; | 318 | commit_transaction = journal->j_running_transaction; |
317 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 319 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
318 | 320 | ||
321 | trace_jbd_start_commit(journal, commit_transaction); | ||
319 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 322 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
320 | commit_transaction->t_tid); | 323 | commit_transaction->t_tid); |
321 | 324 | ||
322 | spin_lock(&journal->j_state_lock); | 325 | spin_lock(&journal->j_state_lock); |
323 | commit_transaction->t_state = T_LOCKED; | 326 | commit_transaction->t_state = T_LOCKED; |
324 | 327 | ||
328 | trace_jbd_commit_locking(journal, commit_transaction); | ||
325 | spin_lock(&commit_transaction->t_handle_lock); | 329 | spin_lock(&commit_transaction->t_handle_lock); |
326 | while (commit_transaction->t_updates) { | 330 | while (commit_transaction->t_updates) { |
327 | DEFINE_WAIT(wait); | 331 | DEFINE_WAIT(wait); |
@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal) | |||
392 | */ | 396 | */ |
393 | journal_switch_revoke_table(journal); | 397 | journal_switch_revoke_table(journal); |
394 | 398 | ||
399 | trace_jbd_commit_flushing(journal, commit_transaction); | ||
395 | commit_transaction->t_state = T_FLUSH; | 400 | commit_transaction->t_state = T_FLUSH; |
396 | journal->j_committing_transaction = commit_transaction; | 401 | journal->j_committing_transaction = commit_transaction; |
397 | journal->j_running_transaction = NULL; | 402 | journal->j_running_transaction = NULL; |
@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal) | |||
446 | } | 451 | } |
447 | if (buffer_jbd(bh) && bh2jh(bh) == jh && | 452 | if (buffer_jbd(bh) && bh2jh(bh) == jh && |
448 | jh->b_transaction == commit_transaction && | 453 | jh->b_transaction == commit_transaction && |
449 | jh->b_jlist == BJ_Locked) { | 454 | jh->b_jlist == BJ_Locked) |
450 | __journal_unfile_buffer(jh); | 455 | __journal_unfile_buffer(jh); |
451 | jbd_unlock_bh_state(bh); | 456 | jbd_unlock_bh_state(bh); |
452 | journal_remove_journal_head(bh); | ||
453 | put_bh(bh); | ||
454 | } else { | ||
455 | jbd_unlock_bh_state(bh); | ||
456 | } | ||
457 | release_data_buffer(bh); | 457 | release_data_buffer(bh); |
458 | cond_resched_lock(&journal->j_list_lock); | 458 | cond_resched_lock(&journal->j_list_lock); |
459 | } | 459 | } |
@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal) | |||
493 | commit_transaction->t_state = T_COMMIT; | 493 | commit_transaction->t_state = T_COMMIT; |
494 | spin_unlock(&journal->j_state_lock); | 494 | spin_unlock(&journal->j_state_lock); |
495 | 495 | ||
496 | trace_jbd_commit_logging(journal, commit_transaction); | ||
496 | J_ASSERT(commit_transaction->t_nr_buffers <= | 497 | J_ASSERT(commit_transaction->t_nr_buffers <= |
497 | commit_transaction->t_outstanding_credits); | 498 | commit_transaction->t_outstanding_credits); |
498 | 499 | ||
@@ -797,10 +798,16 @@ restart_loop: | |||
797 | while (commit_transaction->t_forget) { | 798 | while (commit_transaction->t_forget) { |
798 | transaction_t *cp_transaction; | 799 | transaction_t *cp_transaction; |
799 | struct buffer_head *bh; | 800 | struct buffer_head *bh; |
801 | int try_to_free = 0; | ||
800 | 802 | ||
801 | jh = commit_transaction->t_forget; | 803 | jh = commit_transaction->t_forget; |
802 | spin_unlock(&journal->j_list_lock); | 804 | spin_unlock(&journal->j_list_lock); |
803 | bh = jh2bh(jh); | 805 | bh = jh2bh(jh); |
806 | /* | ||
807 | * Get a reference so that bh cannot be freed before we are | ||
808 | * done with it. | ||
809 | */ | ||
810 | get_bh(bh); | ||
804 | jbd_lock_bh_state(bh); | 811 | jbd_lock_bh_state(bh); |
805 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || | 812 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || |
806 | jh->b_transaction == journal->j_running_transaction); | 813 | jh->b_transaction == journal->j_running_transaction); |
@@ -858,28 +865,27 @@ restart_loop: | |||
858 | __journal_insert_checkpoint(jh, commit_transaction); | 865 | __journal_insert_checkpoint(jh, commit_transaction); |
859 | if (is_journal_aborted(journal)) | 866 | if (is_journal_aborted(journal)) |
860 | clear_buffer_jbddirty(bh); | 867 | clear_buffer_jbddirty(bh); |
861 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | ||
862 | __journal_refile_buffer(jh); | ||
863 | jbd_unlock_bh_state(bh); | ||
864 | } else { | 868 | } else { |
865 | J_ASSERT_BH(bh, !buffer_dirty(bh)); | 869 | J_ASSERT_BH(bh, !buffer_dirty(bh)); |
866 | /* The buffer on BJ_Forget list and not jbddirty means | 870 | /* |
871 | * The buffer on BJ_Forget list and not jbddirty means | ||
867 | * it has been freed by this transaction and hence it | 872 | * it has been freed by this transaction and hence it |
868 | * could not have been reallocated until this | 873 | * could not have been reallocated until this |
869 | * transaction has committed. *BUT* it could be | 874 | * transaction has committed. *BUT* it could be |
870 | * reallocated once we have written all the data to | 875 | * reallocated once we have written all the data to |
871 | * disk and before we process the buffer on BJ_Forget | 876 | * disk and before we process the buffer on BJ_Forget |
872 | * list. */ | 877 | * list. |
873 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | 878 | */ |
874 | __journal_refile_buffer(jh); | 879 | if (!jh->b_next_transaction) |
875 | if (!jh->b_transaction) { | 880 | try_to_free = 1; |
876 | jbd_unlock_bh_state(bh); | ||
877 | /* needs a brelse */ | ||
878 | journal_remove_journal_head(bh); | ||
879 | release_buffer_page(bh); | ||
880 | } else | ||
881 | jbd_unlock_bh_state(bh); | ||
882 | } | 881 | } |
882 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | ||
883 | __journal_refile_buffer(jh); | ||
884 | jbd_unlock_bh_state(bh); | ||
885 | if (try_to_free) | ||
886 | release_buffer_page(bh); | ||
887 | else | ||
888 | __brelse(bh); | ||
883 | cond_resched_lock(&journal->j_list_lock); | 889 | cond_resched_lock(&journal->j_list_lock); |
884 | } | 890 | } |
885 | spin_unlock(&journal->j_list_lock); | 891 | spin_unlock(&journal->j_list_lock); |
@@ -946,6 +952,7 @@ restart_loop: | |||
946 | } | 952 | } |
947 | spin_unlock(&journal->j_list_lock); | 953 | spin_unlock(&journal->j_list_lock); |
948 | 954 | ||
955 | trace_jbd_end_commit(journal, commit_transaction); | ||
949 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 956 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
950 | journal->j_commit_sequence, journal->j_tail_sequence); | 957 | journal->j_commit_sequence, journal->j_tail_sequence); |
951 | 958 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index e2d4285fbe90..9fe061fb8779 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -38,6 +38,9 @@ | |||
38 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
40 | 40 | ||
41 | #define CREATE_TRACE_POINTS | ||
42 | #include <trace/events/jbd.h> | ||
43 | |||
41 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
42 | #include <asm/page.h> | 45 | #include <asm/page.h> |
43 | 46 | ||
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
1065 | } else | 1068 | } else |
1066 | write_dirty_buffer(bh, WRITE); | 1069 | write_dirty_buffer(bh, WRITE); |
1067 | 1070 | ||
1071 | trace_jbd_update_superblock_end(journal, wait); | ||
1068 | out: | 1072 | out: |
1069 | /* If we have just flushed the log (by marking s_start==0), then | 1073 | /* If we have just flushed the log (by marking s_start==0), then |
1070 | * any future commit will have to be careful to update the | 1074 | * any future commit will have to be careful to update the |
@@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1799 | * When a buffer has its BH_JBD bit set it is immune from being released by | 1803 | * When a buffer has its BH_JBD bit set it is immune from being released by |
1800 | * core kernel code, mainly via ->b_count. | 1804 | * core kernel code, mainly via ->b_count. |
1801 | * | 1805 | * |
1802 | * A journal_head may be detached from its buffer_head when the journal_head's | 1806 | * A journal_head is detached from its buffer_head when the journal_head's |
1803 | * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. | 1807 | * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint |
1804 | * Various places in JBD call journal_remove_journal_head() to indicate that the | 1808 | * transaction (b_cp_transaction) hold their references to b_jcount. |
1805 | * journal_head can be dropped if needed. | ||
1806 | * | 1809 | * |
1807 | * Various places in the kernel want to attach a journal_head to a buffer_head | 1810 | * Various places in the kernel want to attach a journal_head to a buffer_head |
1808 | * _before_ attaching the journal_head to a transaction. To protect the | 1811 | * _before_ attaching the journal_head to a transaction. To protect the |
@@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1815 | * (Attach a journal_head if needed. Increments b_jcount) | 1818 | * (Attach a journal_head if needed. Increments b_jcount) |
1816 | * struct journal_head *jh = journal_add_journal_head(bh); | 1819 | * struct journal_head *jh = journal_add_journal_head(bh); |
1817 | * ... | 1820 | * ... |
1818 | * jh->b_transaction = xxx; | 1821 | * (Get another reference for transaction) |
1819 | * journal_put_journal_head(jh); | 1822 | * journal_grab_journal_head(bh); |
1820 | * | 1823 | * jh->b_transaction = xxx; |
1821 | * Now, the journal_head's b_jcount is zero, but it is safe from being released | 1824 | * (Put original reference) |
1822 | * because it has a non-zero b_transaction. | 1825 | * journal_put_journal_head(jh); |
1823 | */ | 1826 | */ |
1824 | 1827 | ||
1825 | /* | 1828 | /* |
1826 | * Give a buffer_head a journal_head. | 1829 | * Give a buffer_head a journal_head. |
1827 | * | 1830 | * |
1828 | * Doesn't need the journal lock. | ||
1829 | * May sleep. | 1831 | * May sleep. |
1830 | */ | 1832 | */ |
1831 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) | 1833 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) |
@@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
1889 | struct journal_head *jh = bh2jh(bh); | 1891 | struct journal_head *jh = bh2jh(bh); |
1890 | 1892 | ||
1891 | J_ASSERT_JH(jh, jh->b_jcount >= 0); | 1893 | J_ASSERT_JH(jh, jh->b_jcount >= 0); |
1892 | 1894 | J_ASSERT_JH(jh, jh->b_transaction == NULL); | |
1893 | get_bh(bh); | 1895 | J_ASSERT_JH(jh, jh->b_next_transaction == NULL); |
1894 | if (jh->b_jcount == 0) { | 1896 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
1895 | if (jh->b_transaction == NULL && | 1897 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); |
1896 | jh->b_next_transaction == NULL && | 1898 | J_ASSERT_BH(bh, buffer_jbd(bh)); |
1897 | jh->b_cp_transaction == NULL) { | 1899 | J_ASSERT_BH(bh, jh2bh(jh) == bh); |
1898 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); | 1900 | BUFFER_TRACE(bh, "remove journal_head"); |
1899 | J_ASSERT_BH(bh, buffer_jbd(bh)); | 1901 | if (jh->b_frozen_data) { |
1900 | J_ASSERT_BH(bh, jh2bh(jh) == bh); | 1902 | printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); |
1901 | BUFFER_TRACE(bh, "remove journal_head"); | 1903 | jbd_free(jh->b_frozen_data, bh->b_size); |
1902 | if (jh->b_frozen_data) { | ||
1903 | printk(KERN_WARNING "%s: freeing " | ||
1904 | "b_frozen_data\n", | ||
1905 | __func__); | ||
1906 | jbd_free(jh->b_frozen_data, bh->b_size); | ||
1907 | } | ||
1908 | if (jh->b_committed_data) { | ||
1909 | printk(KERN_WARNING "%s: freeing " | ||
1910 | "b_committed_data\n", | ||
1911 | __func__); | ||
1912 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1913 | } | ||
1914 | bh->b_private = NULL; | ||
1915 | jh->b_bh = NULL; /* debug, really */ | ||
1916 | clear_buffer_jbd(bh); | ||
1917 | __brelse(bh); | ||
1918 | journal_free_journal_head(jh); | ||
1919 | } else { | ||
1920 | BUFFER_TRACE(bh, "journal_head was locked"); | ||
1921 | } | ||
1922 | } | 1904 | } |
1905 | if (jh->b_committed_data) { | ||
1906 | printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); | ||
1907 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1908 | } | ||
1909 | bh->b_private = NULL; | ||
1910 | jh->b_bh = NULL; /* debug, really */ | ||
1911 | clear_buffer_jbd(bh); | ||
1912 | journal_free_journal_head(jh); | ||
1923 | } | 1913 | } |
1924 | 1914 | ||
1925 | /* | 1915 | /* |
1926 | * journal_remove_journal_head(): if the buffer isn't attached to a transaction | 1916 | * Drop a reference on the passed journal_head. If it fell to zero then |
1927 | * and has a zero b_jcount then remove and release its journal_head. If we did | ||
1928 | * see that the buffer is not used by any transaction we also "logically" | ||
1929 | * decrement ->b_count. | ||
1930 | * | ||
1931 | * We in fact take an additional increment on ->b_count as a convenience, | ||
1932 | * because the caller usually wants to do additional things with the bh | ||
1933 | * after calling here. | ||
1934 | * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some | ||
1935 | * time. Once the caller has run __brelse(), the buffer is eligible for | ||
1936 | * reaping by try_to_free_buffers(). | ||
1937 | */ | ||
1938 | void journal_remove_journal_head(struct buffer_head *bh) | ||
1939 | { | ||
1940 | jbd_lock_bh_journal_head(bh); | ||
1941 | __journal_remove_journal_head(bh); | ||
1942 | jbd_unlock_bh_journal_head(bh); | ||
1943 | } | ||
1944 | |||
1945 | /* | ||
1946 | * Drop a reference on the passed journal_head. If it fell to zero then try to | ||
1947 | * release the journal_head from the buffer_head. | 1917 | * release the journal_head from the buffer_head. |
1948 | */ | 1918 | */ |
1949 | void journal_put_journal_head(struct journal_head *jh) | 1919 | void journal_put_journal_head(struct journal_head *jh) |
@@ -1953,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh) | |||
1953 | jbd_lock_bh_journal_head(bh); | 1923 | jbd_lock_bh_journal_head(bh); |
1954 | J_ASSERT_JH(jh, jh->b_jcount > 0); | 1924 | J_ASSERT_JH(jh, jh->b_jcount > 0); |
1955 | --jh->b_jcount; | 1925 | --jh->b_jcount; |
1956 | if (!jh->b_jcount && !jh->b_transaction) { | 1926 | if (!jh->b_jcount) { |
1957 | __journal_remove_journal_head(bh); | 1927 | __journal_remove_journal_head(bh); |
1928 | jbd_unlock_bh_journal_head(bh); | ||
1958 | __brelse(bh); | 1929 | __brelse(bh); |
1959 | } | 1930 | } else |
1960 | jbd_unlock_bh_journal_head(bh); | 1931 | jbd_unlock_bh_journal_head(bh); |
1961 | } | 1932 | } |
1962 | 1933 | ||
1963 | /* | 1934 | /* |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index f7ee81a065da..7e59c6e66f9b 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
29 | #include <linux/backing-dev.h> | ||
29 | 30 | ||
30 | static void __journal_temp_unlink_buffer(struct journal_head *jh); | 31 | static void __journal_temp_unlink_buffer(struct journal_head *jh); |
31 | 32 | ||
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle) | |||
99 | 100 | ||
100 | alloc_transaction: | 101 | alloc_transaction: |
101 | if (!journal->j_running_transaction) { | 102 | if (!journal->j_running_transaction) { |
102 | new_transaction = kzalloc(sizeof(*new_transaction), | 103 | new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS); |
103 | GFP_NOFS|__GFP_NOFAIL); | ||
104 | if (!new_transaction) { | 104 | if (!new_transaction) { |
105 | ret = -ENOMEM; | 105 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
106 | goto out; | 106 | goto alloc_transaction; |
107 | } | 107 | } |
108 | } | 108 | } |
109 | 109 | ||
@@ -696,7 +696,6 @@ repeat: | |||
696 | if (!jh->b_transaction) { | 696 | if (!jh->b_transaction) { |
697 | JBUFFER_TRACE(jh, "no transaction"); | 697 | JBUFFER_TRACE(jh, "no transaction"); |
698 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 698 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
699 | jh->b_transaction = transaction; | ||
700 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 699 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
701 | spin_lock(&journal->j_list_lock); | 700 | spin_lock(&journal->j_list_lock); |
702 | __journal_file_buffer(jh, transaction, BJ_Reserved); | 701 | __journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
818 | * committed and so it's safe to clear the dirty bit. | 817 | * committed and so it's safe to clear the dirty bit. |
819 | */ | 818 | */ |
820 | clear_buffer_dirty(jh2bh(jh)); | 819 | clear_buffer_dirty(jh2bh(jh)); |
821 | jh->b_transaction = transaction; | ||
822 | 820 | ||
823 | /* first access by this transaction */ | 821 | /* first access by this transaction */ |
824 | jh->b_modified = 0; | 822 | jh->b_modified = 0; |
@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
844 | */ | 842 | */ |
845 | JBUFFER_TRACE(jh, "cancelling revoke"); | 843 | JBUFFER_TRACE(jh, "cancelling revoke"); |
846 | journal_cancel_revoke(handle, jh); | 844 | journal_cancel_revoke(handle, jh); |
847 | journal_put_journal_head(jh); | ||
848 | out: | 845 | out: |
846 | journal_put_journal_head(jh); | ||
849 | return err; | 847 | return err; |
850 | } | 848 | } |
851 | 849 | ||
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1069 | ret = -EIO; | 1067 | ret = -EIO; |
1070 | goto no_journal; | 1068 | goto no_journal; |
1071 | } | 1069 | } |
1072 | 1070 | /* We might have slept so buffer could be refiled now */ | |
1073 | if (jh->b_transaction != NULL) { | 1071 | if (jh->b_transaction != NULL && |
1072 | jh->b_transaction != handle->h_transaction) { | ||
1074 | JBUFFER_TRACE(jh, "unfile from commit"); | 1073 | JBUFFER_TRACE(jh, "unfile from commit"); |
1075 | __journal_temp_unlink_buffer(jh); | 1074 | __journal_temp_unlink_buffer(jh); |
1076 | /* It still points to the committing | 1075 | /* It still points to the committing |
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1091 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | 1090 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { |
1092 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | 1091 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); |
1093 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | 1092 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); |
1094 | __journal_temp_unlink_buffer(jh); | ||
1095 | jh->b_transaction = handle->h_transaction; | ||
1096 | JBUFFER_TRACE(jh, "file as data"); | 1093 | JBUFFER_TRACE(jh, "file as data"); |
1097 | __journal_file_buffer(jh, handle->h_transaction, | 1094 | __journal_file_buffer(jh, handle->h_transaction, |
1098 | BJ_SyncData); | 1095 | BJ_SyncData); |
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1300 | __journal_file_buffer(jh, transaction, BJ_Forget); | 1297 | __journal_file_buffer(jh, transaction, BJ_Forget); |
1301 | } else { | 1298 | } else { |
1302 | __journal_unfile_buffer(jh); | 1299 | __journal_unfile_buffer(jh); |
1303 | journal_remove_journal_head(bh); | ||
1304 | __brelse(bh); | ||
1305 | if (!buffer_jbd(bh)) { | 1300 | if (!buffer_jbd(bh)) { |
1306 | spin_unlock(&journal->j_list_lock); | 1301 | spin_unlock(&journal->j_list_lock); |
1307 | jbd_unlock_bh_state(bh); | 1302 | jbd_unlock_bh_state(bh); |
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) | |||
1622 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1617 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1623 | } | 1618 | } |
1624 | 1619 | ||
1620 | /* | ||
1621 | * Remove buffer from all transactions. | ||
1622 | * | ||
1623 | * Called with bh_state lock and j_list_lock | ||
1624 | * | ||
1625 | * jh and bh may be already freed when this function returns. | ||
1626 | */ | ||
1625 | void __journal_unfile_buffer(struct journal_head *jh) | 1627 | void __journal_unfile_buffer(struct journal_head *jh) |
1626 | { | 1628 | { |
1627 | __journal_temp_unlink_buffer(jh); | 1629 | __journal_temp_unlink_buffer(jh); |
1628 | jh->b_transaction = NULL; | 1630 | jh->b_transaction = NULL; |
1631 | journal_put_journal_head(jh); | ||
1629 | } | 1632 | } |
1630 | 1633 | ||
1631 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1634 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1632 | { | 1635 | { |
1633 | jbd_lock_bh_state(jh2bh(jh)); | 1636 | struct buffer_head *bh = jh2bh(jh); |
1637 | |||
1638 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1639 | get_bh(bh); | ||
1640 | jbd_lock_bh_state(bh); | ||
1634 | spin_lock(&journal->j_list_lock); | 1641 | spin_lock(&journal->j_list_lock); |
1635 | __journal_unfile_buffer(jh); | 1642 | __journal_unfile_buffer(jh); |
1636 | spin_unlock(&journal->j_list_lock); | 1643 | spin_unlock(&journal->j_list_lock); |
1637 | jbd_unlock_bh_state(jh2bh(jh)); | 1644 | jbd_unlock_bh_state(bh); |
1645 | __brelse(bh); | ||
1638 | } | 1646 | } |
1639 | 1647 | ||
1640 | /* | 1648 | /* |
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1661 | /* A written-back ordered data buffer */ | 1669 | /* A written-back ordered data buffer */ |
1662 | JBUFFER_TRACE(jh, "release data"); | 1670 | JBUFFER_TRACE(jh, "release data"); |
1663 | __journal_unfile_buffer(jh); | 1671 | __journal_unfile_buffer(jh); |
1664 | journal_remove_journal_head(bh); | ||
1665 | __brelse(bh); | ||
1666 | } | 1672 | } |
1667 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1673 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1668 | /* written-back checkpointed metadata buffer */ | 1674 | /* written-back checkpointed metadata buffer */ |
1669 | if (jh->b_jlist == BJ_None) { | 1675 | if (jh->b_jlist == BJ_None) { |
1670 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1676 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1671 | __journal_remove_checkpoint(jh); | 1677 | __journal_remove_checkpoint(jh); |
1672 | journal_remove_journal_head(bh); | ||
1673 | __brelse(bh); | ||
1674 | } | 1678 | } |
1675 | } | 1679 | } |
1676 | spin_unlock(&journal->j_list_lock); | 1680 | spin_unlock(&journal->j_list_lock); |
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal, | |||
1733 | /* | 1737 | /* |
1734 | * We take our own ref against the journal_head here to avoid | 1738 | * We take our own ref against the journal_head here to avoid |
1735 | * having to add tons of locking around each instance of | 1739 | * having to add tons of locking around each instance of |
1736 | * journal_remove_journal_head() and journal_put_journal_head(). | 1740 | * journal_put_journal_head(). |
1737 | */ | 1741 | */ |
1738 | jh = journal_grab_journal_head(bh); | 1742 | jh = journal_grab_journal_head(bh); |
1739 | if (!jh) | 1743 | if (!jh) |
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1770 | int may_free = 1; | 1774 | int may_free = 1; |
1771 | struct buffer_head *bh = jh2bh(jh); | 1775 | struct buffer_head *bh = jh2bh(jh); |
1772 | 1776 | ||
1773 | __journal_unfile_buffer(jh); | ||
1774 | |||
1775 | if (jh->b_cp_transaction) { | 1777 | if (jh->b_cp_transaction) { |
1776 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1778 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1779 | __journal_temp_unlink_buffer(jh); | ||
1777 | /* | 1780 | /* |
1778 | * We don't want to write the buffer anymore, clear the | 1781 | * We don't want to write the buffer anymore, clear the |
1779 | * bit so that we don't confuse checks in | 1782 | * bit so that we don't confuse checks in |
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1784 | may_free = 0; | 1787 | may_free = 0; |
1785 | } else { | 1788 | } else { |
1786 | JBUFFER_TRACE(jh, "on running transaction"); | 1789 | JBUFFER_TRACE(jh, "on running transaction"); |
1787 | journal_remove_journal_head(bh); | 1790 | __journal_unfile_buffer(jh); |
1788 | __brelse(bh); | ||
1789 | } | 1791 | } |
1790 | return may_free; | 1792 | return may_free; |
1791 | } | 1793 | } |
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh, | |||
2070 | 2072 | ||
2071 | if (jh->b_transaction) | 2073 | if (jh->b_transaction) |
2072 | __journal_temp_unlink_buffer(jh); | 2074 | __journal_temp_unlink_buffer(jh); |
2075 | else | ||
2076 | journal_grab_journal_head(bh); | ||
2073 | jh->b_transaction = transaction; | 2077 | jh->b_transaction = transaction; |
2074 | 2078 | ||
2075 | switch (jlist) { | 2079 | switch (jlist) { |
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh, | |||
2127 | * already started to be used by a subsequent transaction, refile the | 2131 | * already started to be used by a subsequent transaction, refile the |
2128 | * buffer on that transaction's metadata list. | 2132 | * buffer on that transaction's metadata list. |
2129 | * | 2133 | * |
2130 | * Called under journal->j_list_lock | 2134 | * Called under j_list_lock |
2131 | * | ||
2132 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2135 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2136 | * | ||
2137 | * jh and bh may be already free when this function returns | ||
2133 | */ | 2138 | */ |
2134 | void __journal_refile_buffer(struct journal_head *jh) | 2139 | void __journal_refile_buffer(struct journal_head *jh) |
2135 | { | 2140 | { |
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2153 | 2158 | ||
2154 | was_dirty = test_clear_buffer_jbddirty(bh); | 2159 | was_dirty = test_clear_buffer_jbddirty(bh); |
2155 | __journal_temp_unlink_buffer(jh); | 2160 | __journal_temp_unlink_buffer(jh); |
2161 | /* | ||
2162 | * We set b_transaction here because b_next_transaction will inherit | ||
2163 | * our jh reference and thus __journal_file_buffer() must not take a | ||
2164 | * new one. | ||
2165 | */ | ||
2156 | jh->b_transaction = jh->b_next_transaction; | 2166 | jh->b_transaction = jh->b_next_transaction; |
2157 | jh->b_next_transaction = NULL; | 2167 | jh->b_next_transaction = NULL; |
2158 | if (buffer_freed(bh)) | 2168 | if (buffer_freed(bh)) |
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2169 | } | 2179 | } |
2170 | 2180 | ||
2171 | /* | 2181 | /* |
2172 | * For the unlocked version of this call, also make sure that any | 2182 | * __journal_refile_buffer() with necessary locking added. We take our bh |
2173 | * hanging journal_head is cleaned up if necessary. | 2183 | * reference so that we can safely unlock bh. |
2174 | * | 2184 | * |
2175 | * __journal_refile_buffer is usually called as part of a single locked | 2185 | * The jh and bh may be freed by this call. |
2176 | * operation on a buffer_head, in which the caller is probably going to | ||
2177 | * be hooking the journal_head onto other lists. In that case it is up | ||
2178 | * to the caller to remove the journal_head if necessary. For the | ||
2179 | * unlocked journal_refile_buffer call, the caller isn't going to be | ||
2180 | * doing anything else to the buffer so we need to do the cleanup | ||
2181 | * ourselves to avoid a jh leak. | ||
2182 | * | ||
2183 | * *** The journal_head may be freed by this call! *** | ||
2184 | */ | 2186 | */ |
2185 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2187 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2186 | { | 2188 | { |
2187 | struct buffer_head *bh = jh2bh(jh); | 2189 | struct buffer_head *bh = jh2bh(jh); |
2188 | 2190 | ||
2191 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2192 | get_bh(bh); | ||
2189 | jbd_lock_bh_state(bh); | 2193 | jbd_lock_bh_state(bh); |
2190 | spin_lock(&journal->j_list_lock); | 2194 | spin_lock(&journal->j_list_lock); |
2191 | |||
2192 | __journal_refile_buffer(jh); | 2195 | __journal_refile_buffer(jh); |
2193 | jbd_unlock_bh_state(bh); | 2196 | jbd_unlock_bh_state(bh); |
2194 | journal_remove_journal_head(bh); | ||
2195 | |||
2196 | spin_unlock(&journal->j_list_lock); | 2197 | spin_unlock(&journal->j_list_lock); |
2197 | __brelse(bh); | 2198 | __brelse(bh); |
2198 | } | 2199 | } |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 2c62c5aae82f..16a698bd906d 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -257,9 +257,12 @@ static void | |||
257 | __flush_batch(journal_t *journal, int *batch_count) | 257 | __flush_batch(journal_t *journal, int *batch_count) |
258 | { | 258 | { |
259 | int i; | 259 | int i; |
260 | struct blk_plug plug; | ||
260 | 261 | ||
262 | blk_start_plug(&plug); | ||
261 | for (i = 0; i < *batch_count; i++) | 263 | for (i = 0; i < *batch_count; i++) |
262 | write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); | 264 | write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC); |
265 | blk_finish_plug(&plug); | ||
263 | 266 | ||
264 | for (i = 0; i < *batch_count; i++) { | 267 | for (i = 0; i < *batch_count; i++) { |
265 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; | 268 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0dfa5b598e68..f24df13adc4e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -2390,73 +2390,6 @@ static void __exit journal_exit(void) | |||
2390 | jbd2_journal_destroy_caches(); | 2390 | jbd2_journal_destroy_caches(); |
2391 | } | 2391 | } |
2392 | 2392 | ||
2393 | /* | ||
2394 | * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 | ||
2395 | * tracing infrastructure to map a dev_t to a device name. | ||
2396 | * | ||
2397 | * The caller should use rcu_read_lock() in order to make sure the | ||
2398 | * device name stays valid until its done with it. We use | ||
2399 | * rcu_read_lock() as well to make sure we're safe in case the caller | ||
2400 | * gets sloppy, and because rcu_read_lock() is cheap and can be safely | ||
2401 | * nested. | ||
2402 | */ | ||
2403 | struct devname_cache { | ||
2404 | struct rcu_head rcu; | ||
2405 | dev_t device; | ||
2406 | char devname[BDEVNAME_SIZE]; | ||
2407 | }; | ||
2408 | #define CACHE_SIZE_BITS 6 | ||
2409 | static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; | ||
2410 | static DEFINE_SPINLOCK(devname_cache_lock); | ||
2411 | |||
2412 | static void free_devcache(struct rcu_head *rcu) | ||
2413 | { | ||
2414 | kfree(rcu); | ||
2415 | } | ||
2416 | |||
2417 | const char *jbd2_dev_to_name(dev_t device) | ||
2418 | { | ||
2419 | int i = hash_32(device, CACHE_SIZE_BITS); | ||
2420 | char *ret; | ||
2421 | struct block_device *bd; | ||
2422 | static struct devname_cache *new_dev; | ||
2423 | |||
2424 | rcu_read_lock(); | ||
2425 | if (devcache[i] && devcache[i]->device == device) { | ||
2426 | ret = devcache[i]->devname; | ||
2427 | rcu_read_unlock(); | ||
2428 | return ret; | ||
2429 | } | ||
2430 | rcu_read_unlock(); | ||
2431 | |||
2432 | new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); | ||
2433 | if (!new_dev) | ||
2434 | return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ | ||
2435 | bd = bdget(device); | ||
2436 | spin_lock(&devname_cache_lock); | ||
2437 | if (devcache[i]) { | ||
2438 | if (devcache[i]->device == device) { | ||
2439 | kfree(new_dev); | ||
2440 | bdput(bd); | ||
2441 | ret = devcache[i]->devname; | ||
2442 | spin_unlock(&devname_cache_lock); | ||
2443 | return ret; | ||
2444 | } | ||
2445 | call_rcu(&devcache[i]->rcu, free_devcache); | ||
2446 | } | ||
2447 | devcache[i] = new_dev; | ||
2448 | devcache[i]->device = device; | ||
2449 | if (bd) { | ||
2450 | bdevname(bd, devcache[i]->devname); | ||
2451 | bdput(bd); | ||
2452 | } else | ||
2453 | __bdevname(device, devcache[i]->devname); | ||
2454 | ret = devcache[i]->devname; | ||
2455 | spin_unlock(&devname_cache_lock); | ||
2456 | return ret; | ||
2457 | } | ||
2458 | EXPORT_SYMBOL(jbd2_dev_to_name); | ||
2459 | |||
2460 | MODULE_LICENSE("GPL"); | 2393 | MODULE_LICENSE("GPL"); |
2461 | module_init(journal_init); | 2394 | module_init(journal_init); |
2462 | module_exit(journal_exit); | 2395 | module_exit(journal_exit); |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 27c511a1cf05..926d02068a14 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -227,7 +227,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
227 | case ACL_TYPE_ACCESS: | 227 | case ACL_TYPE_ACCESS: |
228 | xprefix = JFFS2_XPREFIX_ACL_ACCESS; | 228 | xprefix = JFFS2_XPREFIX_ACL_ACCESS; |
229 | if (acl) { | 229 | if (acl) { |
230 | mode_t mode = inode->i_mode; | 230 | umode_t mode = inode->i_mode; |
231 | rc = posix_acl_equiv_mode(acl, &mode); | 231 | rc = posix_acl_equiv_mode(acl, &mode); |
232 | if (rc < 0) | 232 | if (rc < 0) |
233 | return rc; | 233 | return rc; |
@@ -259,7 +259,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
259 | return rc; | 259 | return rc; |
260 | } | 260 | } |
261 | 261 | ||
262 | int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, mode_t *i_mode) | 262 | int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, umode_t *i_mode) |
263 | { | 263 | { |
264 | struct posix_acl *acl; | 264 | struct posix_acl *acl; |
265 | int rc; | 265 | int rc; |
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index b3421c78d9f8..9b477246f2a6 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h | |||
@@ -28,7 +28,7 @@ struct jffs2_acl_header { | |||
28 | 28 | ||
29 | struct posix_acl *jffs2_get_acl(struct inode *inode, int type); | 29 | struct posix_acl *jffs2_get_acl(struct inode *inode, int type); |
30 | extern int jffs2_acl_chmod(struct inode *); | 30 | extern int jffs2_acl_chmod(struct inode *); |
31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, mode_t *); | 31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); |
32 | extern int jffs2_init_acl_post(struct inode *); | 32 | extern int jffs2_init_acl_post(struct inode *); |
33 | 33 | ||
34 | extern const struct xattr_handler jffs2_acl_access_xattr_handler; | 34 | extern const struct xattr_handler jffs2_acl_access_xattr_handler; |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index eeead33d8ef0..bbcb9755dd2b 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -80,7 +80,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) | |||
80 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); | 80 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); |
81 | if (ret) { | 81 | if (ret) { |
82 | jffs2_free_raw_inode(ri); | 82 | jffs2_free_raw_inode(ri); |
83 | if (S_ISLNK(inode->i_mode & S_IFMT)) | 83 | if (S_ISLNK(inode->i_mode)) |
84 | kfree(mdata); | 84 | kfree(mdata); |
85 | return ret; | 85 | return ret; |
86 | } | 86 | } |
@@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) | |||
406 | 406 | ||
407 | /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, | 407 | /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, |
408 | fill in the raw_inode while you're at it. */ | 408 | fill in the raw_inode while you're at it. */ |
409 | struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri) | 409 | struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri) |
410 | { | 410 | { |
411 | struct inode *inode; | 411 | struct inode *inode; |
412 | struct super_block *sb = dir_i->i_sb; | 412 | struct super_block *sb = dir_i->i_sb; |
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 526979c607b6..6c1755c59c0f 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
@@ -173,7 +173,7 @@ int jffs2_do_setattr (struct inode *, struct iattr *); | |||
173 | struct inode *jffs2_iget(struct super_block *, unsigned long); | 173 | struct inode *jffs2_iget(struct super_block *, unsigned long); |
174 | void jffs2_evict_inode (struct inode *); | 174 | void jffs2_evict_inode (struct inode *); |
175 | void jffs2_dirty_inode(struct inode *inode, int flags); | 175 | void jffs2_dirty_inode(struct inode *inode, int flags); |
176 | struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, | 176 | struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, |
177 | struct jffs2_raw_inode *ri); | 177 | struct jffs2_raw_inode *ri); |
178 | int jffs2_statfs (struct dentry *, struct kstatfs *); | 178 | int jffs2_statfs (struct dentry *, struct kstatfs *); |
179 | int jffs2_remount_fs (struct super_block *, int *, char *); | 179 | int jffs2_remount_fs (struct super_block *, int *, char *); |
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index b3a32caf2b45..45559dc3ea2f 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c | |||
@@ -127,16 +127,14 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) | |||
127 | return PTR_ERR(acl); | 127 | return PTR_ERR(acl); |
128 | 128 | ||
129 | if (acl) { | 129 | if (acl) { |
130 | mode_t mode = inode->i_mode; | ||
131 | if (S_ISDIR(inode->i_mode)) { | 130 | if (S_ISDIR(inode->i_mode)) { |
132 | rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); | 131 | rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); |
133 | if (rc) | 132 | if (rc) |
134 | goto cleanup; | 133 | goto cleanup; |
135 | } | 134 | } |
136 | rc = posix_acl_create(&acl, GFP_KERNEL, &mode); | 135 | rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); |
137 | if (rc < 0) | 136 | if (rc < 0) |
138 | goto cleanup; /* posix_acl_release(NULL) is no-op */ | 137 | goto cleanup; /* posix_acl_release(NULL) is no-op */ |
139 | inode->i_mode = mode; | ||
140 | if (rc > 0) | 138 | if (rc > 0) |
141 | rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); | 139 | rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); |
142 | cleanup: | 140 | cleanup: |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 4496872cf4e7..9cbd11a3f804 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
@@ -3161,7 +3161,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
3161 | { | 3161 | { |
3162 | int rc; | 3162 | int rc; |
3163 | int dbitno, word, rembits, nb, nwords, wbitno, agno; | 3163 | int dbitno, word, rembits, nb, nwords, wbitno, agno; |
3164 | s8 oldroot, *leaf; | 3164 | s8 oldroot; |
3165 | struct dmaptree *tp = (struct dmaptree *) & dp->tree; | 3165 | struct dmaptree *tp = (struct dmaptree *) & dp->tree; |
3166 | 3166 | ||
3167 | /* save the current value of the root (i.e. maximum free string) | 3167 | /* save the current value of the root (i.e. maximum free string) |
@@ -3169,9 +3169,6 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
3169 | */ | 3169 | */ |
3170 | oldroot = tp->stree[ROOT]; | 3170 | oldroot = tp->stree[ROOT]; |
3171 | 3171 | ||
3172 | /* pick up a pointer to the leaves of the dmap tree */ | ||
3173 | leaf = tp->stree + LEAFIND; | ||
3174 | |||
3175 | /* determine the bit number and word within the dmap of the | 3172 | /* determine the bit number and word within the dmap of the |
3176 | * starting block. | 3173 | * starting block. |
3177 | */ | 3174 | */ |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index f6cc0c09ec63..af9606057dde 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -1143,7 +1143,6 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
1143 | struct jfs_log *log; | 1143 | struct jfs_log *log; |
1144 | struct tblock *tblk; | 1144 | struct tblock *tblk; |
1145 | struct lrd *lrd; | 1145 | struct lrd *lrd; |
1146 | int lsn; | ||
1147 | struct inode *ip; | 1146 | struct inode *ip; |
1148 | struct jfs_inode_info *jfs_ip; | 1147 | struct jfs_inode_info *jfs_ip; |
1149 | int k, n; | 1148 | int k, n; |
@@ -1310,7 +1309,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
1310 | */ | 1309 | */ |
1311 | lrd->type = cpu_to_le16(LOG_COMMIT); | 1310 | lrd->type = cpu_to_le16(LOG_COMMIT); |
1312 | lrd->length = 0; | 1311 | lrd->length = 0; |
1313 | lsn = lmLog(log, tblk, lrd, NULL); | 1312 | lmLog(log, tblk, lrd, NULL); |
1314 | 1313 | ||
1315 | lmGroupCommit(log, tblk); | 1314 | lmGroupCommit(log, tblk); |
1316 | 1315 | ||
@@ -2935,7 +2934,6 @@ int jfs_sync(void *arg) | |||
2935 | { | 2934 | { |
2936 | struct inode *ip; | 2935 | struct inode *ip; |
2937 | struct jfs_inode_info *jfs_ip; | 2936 | struct jfs_inode_info *jfs_ip; |
2938 | int rc; | ||
2939 | tid_t tid; | 2937 | tid_t tid; |
2940 | 2938 | ||
2941 | do { | 2939 | do { |
@@ -2961,7 +2959,7 @@ int jfs_sync(void *arg) | |||
2961 | */ | 2959 | */ |
2962 | TXN_UNLOCK(); | 2960 | TXN_UNLOCK(); |
2963 | tid = txBegin(ip->i_sb, COMMIT_INODE); | 2961 | tid = txBegin(ip->i_sb, COMMIT_INODE); |
2964 | rc = txCommit(tid, 1, &ip, 0); | 2962 | txCommit(tid, 1, &ip, 0); |
2965 | txEnd(tid); | 2963 | txEnd(tid); |
2966 | mutex_unlock(&jfs_ip->commit_mutex); | 2964 | mutex_unlock(&jfs_ip->commit_mutex); |
2967 | 2965 | ||
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index adcf92d3b603..7971f37534a3 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c | |||
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) | |||
68 | /* | 68 | /* |
69 | * Wait for outstanding transactions to be written to log: | 69 | * Wait for outstanding transactions to be written to log: |
70 | */ | 70 | */ |
71 | jfs_flush_journal(log, 1); | 71 | jfs_flush_journal(log, 2); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * close fileset inode allocation map (aka fileset inode) | 74 | * close fileset inode allocation map (aka fileset inode) |
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) | |||
146 | * | 146 | * |
147 | * remove file system from log active file system list. | 147 | * remove file system from log active file system list. |
148 | */ | 148 | */ |
149 | jfs_flush_journal(log, 1); | 149 | jfs_flush_journal(log, 2); |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Make sure all metadata makes it to disk | 152 | * Make sure all metadata makes it to disk |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 29b1f1a21142..e17545e15664 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -893,7 +893,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
893 | unchar *i_fastsymlink; | 893 | unchar *i_fastsymlink; |
894 | s64 xlen = 0; | 894 | s64 xlen = 0; |
895 | int bmask = 0, xsize; | 895 | int bmask = 0, xsize; |
896 | s64 extent = 0, xaddr; | 896 | s64 xaddr; |
897 | struct metapage *mp; | 897 | struct metapage *mp; |
898 | struct super_block *sb; | 898 | struct super_block *sb; |
899 | struct tblock *tblk; | 899 | struct tblock *tblk; |
@@ -993,7 +993,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
993 | txAbort(tid, 0); | 993 | txAbort(tid, 0); |
994 | goto out3; | 994 | goto out3; |
995 | } | 995 | } |
996 | extent = xaddr; | ||
997 | ip->i_size = ssize - 1; | 996 | ip->i_size = ssize - 1; |
998 | while (ssize) { | 997 | while (ssize) { |
999 | /* This is kind of silly since PATH_MAX == 4K */ | 998 | /* This is kind of silly since PATH_MAX == 4K */ |
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 24838f1eeee5..e87fedef23db 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
@@ -693,8 +693,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, | |||
693 | return rc; | 693 | return rc; |
694 | } | 694 | } |
695 | if (acl) { | 695 | if (acl) { |
696 | mode_t mode = inode->i_mode; | 696 | rc = posix_acl_equiv_mode(acl, &inode->i_mode); |
697 | rc = posix_acl_equiv_mode(acl, &mode); | ||
698 | posix_acl_release(acl); | 697 | posix_acl_release(acl); |
699 | if (rc < 0) { | 698 | if (rc < 0) { |
700 | printk(KERN_ERR | 699 | printk(KERN_ERR |
@@ -702,7 +701,6 @@ static int can_set_system_xattr(struct inode *inode, const char *name, | |||
702 | rc); | 701 | rc); |
703 | return rc; | 702 | return rc; |
704 | } | 703 | } |
705 | inode->i_mode = mode; | ||
706 | mark_inode_dirty(inode); | 704 | mark_inode_dirty(inode); |
707 | } | 705 | } |
708 | /* | 706 | /* |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index e374050a911c..8392cb85bd54 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc) | |||
302 | /* We appear to be out of the grace period */ | 302 | /* We appear to be out of the grace period */ |
303 | wake_up_all(&host->h_gracewait); | 303 | wake_up_all(&host->h_gracewait); |
304 | } | 304 | } |
305 | dprintk("lockd: server returns status %d\n", resp->status); | 305 | dprintk("lockd: server returns status %d\n", |
306 | ntohl(resp->status)); | ||
306 | return 0; /* Okay, call complete */ | 307 | return 0; /* Okay, call complete */ |
307 | } | 308 | } |
308 | 309 | ||
@@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) | |||
690 | goto out; | 691 | goto out; |
691 | 692 | ||
692 | if (resp->status != nlm_lck_denied_nolocks) | 693 | if (resp->status != nlm_lck_denied_nolocks) |
693 | printk("lockd: unexpected unlock status: %d\n", resp->status); | 694 | printk("lockd: unexpected unlock status: %d\n", |
695 | ntohl(resp->status)); | ||
694 | /* What to do now? I'm out of my depth... */ | 696 | /* What to do now? I'm out of my depth... */ |
695 | status = -ENOLCK; | 697 | status = -ENOLCK; |
696 | out: | 698 | out: |
@@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status) | |||
843 | return -ENOLCK; | 845 | return -ENOLCK; |
844 | #endif | 846 | #endif |
845 | } | 847 | } |
846 | printk(KERN_NOTICE "lockd: unexpected server status %d\n", status); | 848 | printk(KERN_NOTICE "lockd: unexpected server status %d\n", |
849 | ntohl(status)); | ||
847 | return -ENOLCK; | 850 | return -ENOLCK; |
848 | } | 851 | } |
diff --git a/fs/namei.c b/fs/namei.c index f8c69d373793..f4788365ea22 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -179,19 +179,14 @@ static int check_acl(struct inode *inode, int mask) | |||
179 | #ifdef CONFIG_FS_POSIX_ACL | 179 | #ifdef CONFIG_FS_POSIX_ACL |
180 | struct posix_acl *acl; | 180 | struct posix_acl *acl; |
181 | 181 | ||
182 | /* | ||
183 | * Under RCU walk, we cannot even do a "get_cached_acl()", | ||
184 | * because that involves locking and getting a refcount on | ||
185 | * a cached ACL. | ||
186 | * | ||
187 | * So the only case we handle during RCU walking is the | ||
188 | * case of a cached "no ACL at all", which needs no locks | ||
189 | * or refcounts. | ||
190 | */ | ||
191 | if (mask & MAY_NOT_BLOCK) { | 182 | if (mask & MAY_NOT_BLOCK) { |
192 | if (negative_cached_acl(inode, ACL_TYPE_ACCESS)) | 183 | acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); |
184 | if (!acl) | ||
193 | return -EAGAIN; | 185 | return -EAGAIN; |
194 | return -ECHILD; | 186 | /* no ->get_acl() calls in RCU mode... */ |
187 | if (acl == ACL_NOT_CACHED) | ||
188 | return -ECHILD; | ||
189 | return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); | ||
195 | } | 190 | } |
196 | 191 | ||
197 | acl = get_cached_acl(inode, ACL_TYPE_ACCESS); | 192 | acl = get_cached_acl(inode, ACL_TYPE_ACCESS); |
@@ -313,6 +308,26 @@ int generic_permission(struct inode *inode, int mask) | |||
313 | return -EACCES; | 308 | return -EACCES; |
314 | } | 309 | } |
315 | 310 | ||
311 | /* | ||
312 | * We _really_ want to just do "generic_permission()" without | ||
313 | * even looking at the inode->i_op values. So we keep a cache | ||
314 | * flag in inode->i_opflags, that says "this has not special | ||
315 | * permission function, use the fast case". | ||
316 | */ | ||
317 | static inline int do_inode_permission(struct inode *inode, int mask) | ||
318 | { | ||
319 | if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { | ||
320 | if (likely(inode->i_op->permission)) | ||
321 | return inode->i_op->permission(inode, mask); | ||
322 | |||
323 | /* This gets set once for the inode lifetime */ | ||
324 | spin_lock(&inode->i_lock); | ||
325 | inode->i_opflags |= IOP_FASTPERM; | ||
326 | spin_unlock(&inode->i_lock); | ||
327 | } | ||
328 | return generic_permission(inode, mask); | ||
329 | } | ||
330 | |||
316 | /** | 331 | /** |
317 | * inode_permission - check for access rights to a given inode | 332 | * inode_permission - check for access rights to a given inode |
318 | * @inode: inode to check permission on | 333 | * @inode: inode to check permission on |
@@ -327,7 +342,7 @@ int inode_permission(struct inode *inode, int mask) | |||
327 | { | 342 | { |
328 | int retval; | 343 | int retval; |
329 | 344 | ||
330 | if (mask & MAY_WRITE) { | 345 | if (unlikely(mask & MAY_WRITE)) { |
331 | umode_t mode = inode->i_mode; | 346 | umode_t mode = inode->i_mode; |
332 | 347 | ||
333 | /* | 348 | /* |
@@ -344,11 +359,7 @@ int inode_permission(struct inode *inode, int mask) | |||
344 | return -EACCES; | 359 | return -EACCES; |
345 | } | 360 | } |
346 | 361 | ||
347 | if (inode->i_op->permission) | 362 | retval = do_inode_permission(inode, mask); |
348 | retval = inode->i_op->permission(inode, mask); | ||
349 | else | ||
350 | retval = generic_permission(inode, mask); | ||
351 | |||
352 | if (retval) | 363 | if (retval) |
353 | return retval; | 364 | return retval; |
354 | 365 | ||
@@ -716,17 +727,20 @@ static int follow_automount(struct path *path, unsigned flags, | |||
716 | if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) | 727 | if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) |
717 | return -EISDIR; /* we actually want to stop here */ | 728 | return -EISDIR; /* we actually want to stop here */ |
718 | 729 | ||
719 | /* We want to mount if someone is trying to open/create a file of any | 730 | /* We don't want to mount if someone's just doing a stat - |
720 | * type under the mountpoint, wants to traverse through the mountpoint | 731 | * unless they're stat'ing a directory and appended a '/' to |
721 | * or wants to open the mounted directory. | 732 | * the name. |
722 | * | 733 | * |
723 | * We don't want to mount if someone's just doing a stat and they've | 734 | * We do, however, want to mount if someone wants to open or |
724 | * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and | 735 | * create a file of any type under the mountpoint, wants to |
725 | * appended a '/' to the name. | 736 | * traverse through the mountpoint or wants to open the |
737 | * mounted directory. Also, autofs may mark negative dentries | ||
738 | * as being automount points. These will need the attentions | ||
739 | * of the daemon to instantiate them before they can be used. | ||
726 | */ | 740 | */ |
727 | if (!(flags & LOOKUP_FOLLOW) && | 741 | if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | |
728 | !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | | 742 | LOOKUP_OPEN | LOOKUP_CREATE)) && |
729 | LOOKUP_OPEN | LOOKUP_CREATE))) | 743 | path->dentry->d_inode) |
730 | return -EISDIR; | 744 | return -EISDIR; |
731 | 745 | ||
732 | current->total_link_count++; | 746 | current->total_link_count++; |
@@ -1244,6 +1258,26 @@ static void terminate_walk(struct nameidata *nd) | |||
1244 | } | 1258 | } |
1245 | } | 1259 | } |
1246 | 1260 | ||
1261 | /* | ||
1262 | * Do we need to follow links? We _really_ want to be able | ||
1263 | * to do this check without having to look at inode->i_op, | ||
1264 | * so we keep a cache of "no, this doesn't need follow_link" | ||
1265 | * for the common case. | ||
1266 | */ | ||
1267 | static inline int should_follow_link(struct inode *inode, int follow) | ||
1268 | { | ||
1269 | if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { | ||
1270 | if (likely(inode->i_op->follow_link)) | ||
1271 | return follow; | ||
1272 | |||
1273 | /* This gets set once for the inode lifetime */ | ||
1274 | spin_lock(&inode->i_lock); | ||
1275 | inode->i_opflags |= IOP_NOFOLLOW; | ||
1276 | spin_unlock(&inode->i_lock); | ||
1277 | } | ||
1278 | return 0; | ||
1279 | } | ||
1280 | |||
1247 | static inline int walk_component(struct nameidata *nd, struct path *path, | 1281 | static inline int walk_component(struct nameidata *nd, struct path *path, |
1248 | struct qstr *name, int type, int follow) | 1282 | struct qstr *name, int type, int follow) |
1249 | { | 1283 | { |
@@ -1266,7 +1300,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, | |||
1266 | terminate_walk(nd); | 1300 | terminate_walk(nd); |
1267 | return -ENOENT; | 1301 | return -ENOENT; |
1268 | } | 1302 | } |
1269 | if (unlikely(inode->i_op->follow_link) && follow) { | 1303 | if (should_follow_link(inode, follow)) { |
1270 | if (nd->flags & LOOKUP_RCU) { | 1304 | if (nd->flags & LOOKUP_RCU) { |
1271 | if (unlikely(unlazy_walk(nd, path->dentry))) { | 1305 | if (unlikely(unlazy_walk(nd, path->dentry))) { |
1272 | terminate_walk(nd); | 1306 | terminate_walk(nd); |
@@ -1319,6 +1353,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) | |||
1319 | } | 1353 | } |
1320 | 1354 | ||
1321 | /* | 1355 | /* |
1356 | * We really don't want to look at inode->i_op->lookup | ||
1357 | * when we don't have to. So we keep a cache bit in | ||
1358 | * the inode ->i_opflags field that says "yes, we can | ||
1359 | * do lookup on this inode". | ||
1360 | */ | ||
1361 | static inline int can_lookup(struct inode *inode) | ||
1362 | { | ||
1363 | if (likely(inode->i_opflags & IOP_LOOKUP)) | ||
1364 | return 1; | ||
1365 | if (likely(!inode->i_op->lookup)) | ||
1366 | return 0; | ||
1367 | |||
1368 | /* We do this once for the lifetime of the inode */ | ||
1369 | spin_lock(&inode->i_lock); | ||
1370 | inode->i_opflags |= IOP_LOOKUP; | ||
1371 | spin_unlock(&inode->i_lock); | ||
1372 | return 1; | ||
1373 | } | ||
1374 | |||
1375 | /* | ||
1322 | * Name resolution. | 1376 | * Name resolution. |
1323 | * This is the basic name resolution function, turning a pathname into | 1377 | * This is the basic name resolution function, turning a pathname into |
1324 | * the final dentry. We expect 'base' to be positive and a directory. | 1378 | * the final dentry. We expect 'base' to be positive and a directory. |
@@ -1397,10 +1451,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1397 | if (err) | 1451 | if (err) |
1398 | return err; | 1452 | return err; |
1399 | } | 1453 | } |
1454 | if (can_lookup(nd->inode)) | ||
1455 | continue; | ||
1400 | err = -ENOTDIR; | 1456 | err = -ENOTDIR; |
1401 | if (!nd->inode->i_op->lookup) | 1457 | break; |
1402 | break; | ||
1403 | continue; | ||
1404 | /* here ends the main loop */ | 1458 | /* here ends the main loop */ |
1405 | 1459 | ||
1406 | last_component: | 1460 | last_component: |
@@ -2562,6 +2616,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2562 | if (!dir->i_op->rmdir) | 2616 | if (!dir->i_op->rmdir) |
2563 | return -EPERM; | 2617 | return -EPERM; |
2564 | 2618 | ||
2619 | dget(dentry); | ||
2565 | mutex_lock(&dentry->d_inode->i_mutex); | 2620 | mutex_lock(&dentry->d_inode->i_mutex); |
2566 | 2621 | ||
2567 | error = -EBUSY; | 2622 | error = -EBUSY; |
@@ -2582,6 +2637,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2582 | 2637 | ||
2583 | out: | 2638 | out: |
2584 | mutex_unlock(&dentry->d_inode->i_mutex); | 2639 | mutex_unlock(&dentry->d_inode->i_mutex); |
2640 | dput(dentry); | ||
2585 | if (!error) | 2641 | if (!error) |
2586 | d_delete(dentry); | 2642 | d_delete(dentry); |
2587 | return error; | 2643 | return error; |
@@ -2971,6 +3027,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
2971 | if (error) | 3027 | if (error) |
2972 | return error; | 3028 | return error; |
2973 | 3029 | ||
3030 | dget(new_dentry); | ||
2974 | if (target) | 3031 | if (target) |
2975 | mutex_lock(&target->i_mutex); | 3032 | mutex_lock(&target->i_mutex); |
2976 | 3033 | ||
@@ -2991,6 +3048,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
2991 | out: | 3048 | out: |
2992 | if (target) | 3049 | if (target) |
2993 | mutex_unlock(&target->i_mutex); | 3050 | mutex_unlock(&target->i_mutex); |
3051 | dput(new_dentry); | ||
2994 | if (!error) | 3052 | if (!error) |
2995 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 3053 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
2996 | d_move(old_dentry,new_dentry); | 3054 | d_move(old_dentry,new_dentry); |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 81515545ba75..dbcd82126aed 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -77,6 +77,7 @@ config NFS_V4 | |||
77 | config NFS_V4_1 | 77 | config NFS_V4_1 |
78 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 78 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
79 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL | 79 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL |
80 | select SUNRPC_BACKCHANNEL | ||
80 | select PNFS_FILE_LAYOUT | 81 | select PNFS_FILE_LAYOUT |
81 | help | 82 | help |
82 | This option enables support for minor version 1 of the NFSv4 protocol | 83 | This option enables support for minor version 1 of the NFSv4 protocol |
@@ -87,15 +88,15 @@ config NFS_V4_1 | |||
87 | config PNFS_FILE_LAYOUT | 88 | config PNFS_FILE_LAYOUT |
88 | tristate | 89 | tristate |
89 | 90 | ||
91 | config PNFS_BLOCK | ||
92 | tristate | ||
93 | depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM | ||
94 | default m | ||
95 | |||
90 | config PNFS_OBJLAYOUT | 96 | config PNFS_OBJLAYOUT |
91 | tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" | 97 | tristate |
92 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD | 98 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD |
93 | help | 99 | default m |
94 | Say M here if you want your pNFS client to support the Objects Layout Driver. | ||
95 | Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and | ||
96 | upper level driver (SCSI_OSD_ULD). | ||
97 | |||
98 | If unsure, say N. | ||
99 | 100 | ||
100 | config ROOT_NFS | 101 | config ROOT_NFS |
101 | bool "Root file system on NFS" | 102 | bool "Root file system on NFS" |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 6a34f7dd0e6f..b58613d0abb3 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -23,3 +23,4 @@ obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | |||
23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | 23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o |
24 | 24 | ||
25 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | 25 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ |
26 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | ||
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile new file mode 100644 index 000000000000..d5815505c020 --- /dev/null +++ b/fs/nfs/blocklayout/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for the pNFS block layout driver kernel module | ||
3 | # | ||
4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o | ||
5 | blocklayoutdriver-objs := blocklayout.o extents.o blocklayoutdev.o blocklayoutdm.o | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c new file mode 100644 index 000000000000..9561c8fc8bdb --- /dev/null +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -0,0 +1,1020 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.c | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include <linux/module.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/mount.h> | ||
36 | #include <linux/namei.h> | ||
37 | #include <linux/bio.h> /* struct bio */ | ||
38 | #include <linux/buffer_head.h> /* various write calls */ | ||
39 | #include <linux/prefetch.h> | ||
40 | |||
41 | #include "blocklayout.h" | ||
42 | |||
43 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
44 | |||
45 | MODULE_LICENSE("GPL"); | ||
46 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); | ||
47 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); | ||
48 | |||
49 | struct dentry *bl_device_pipe; | ||
50 | wait_queue_head_t bl_wq; | ||
51 | |||
52 | static void print_page(struct page *page) | ||
53 | { | ||
54 | dprintk("PRINTPAGE page %p\n", page); | ||
55 | dprintk(" PagePrivate %d\n", PagePrivate(page)); | ||
56 | dprintk(" PageUptodate %d\n", PageUptodate(page)); | ||
57 | dprintk(" PageError %d\n", PageError(page)); | ||
58 | dprintk(" PageDirty %d\n", PageDirty(page)); | ||
59 | dprintk(" PageReferenced %d\n", PageReferenced(page)); | ||
60 | dprintk(" PageLocked %d\n", PageLocked(page)); | ||
61 | dprintk(" PageWriteback %d\n", PageWriteback(page)); | ||
62 | dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page)); | ||
63 | dprintk("\n"); | ||
64 | } | ||
65 | |||
66 | /* Given the be associated with isect, determine if page data needs to be | ||
67 | * initialized. | ||
68 | */ | ||
69 | static int is_hole(struct pnfs_block_extent *be, sector_t isect) | ||
70 | { | ||
71 | if (be->be_state == PNFS_BLOCK_NONE_DATA) | ||
72 | return 1; | ||
73 | else if (be->be_state != PNFS_BLOCK_INVALID_DATA) | ||
74 | return 0; | ||
75 | else | ||
76 | return !bl_is_sector_init(be->be_inval, isect); | ||
77 | } | ||
78 | |||
79 | /* Given the be associated with isect, determine if page data can be | ||
80 | * written to disk. | ||
81 | */ | ||
82 | static int is_writable(struct pnfs_block_extent *be, sector_t isect) | ||
83 | { | ||
84 | return (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
85 | be->be_state == PNFS_BLOCK_INVALID_DATA); | ||
86 | } | ||
87 | |||
88 | /* The data we are handed might be spread across several bios. We need | ||
89 | * to track when the last one is finished. | ||
90 | */ | ||
91 | struct parallel_io { | ||
92 | struct kref refcnt; | ||
93 | struct rpc_call_ops call_ops; | ||
94 | void (*pnfs_callback) (void *data); | ||
95 | void *data; | ||
96 | }; | ||
97 | |||
98 | static inline struct parallel_io *alloc_parallel(void *data) | ||
99 | { | ||
100 | struct parallel_io *rv; | ||
101 | |||
102 | rv = kmalloc(sizeof(*rv), GFP_NOFS); | ||
103 | if (rv) { | ||
104 | rv->data = data; | ||
105 | kref_init(&rv->refcnt); | ||
106 | } | ||
107 | return rv; | ||
108 | } | ||
109 | |||
110 | static inline void get_parallel(struct parallel_io *p) | ||
111 | { | ||
112 | kref_get(&p->refcnt); | ||
113 | } | ||
114 | |||
115 | static void destroy_parallel(struct kref *kref) | ||
116 | { | ||
117 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); | ||
118 | |||
119 | dprintk("%s enter\n", __func__); | ||
120 | p->pnfs_callback(p->data); | ||
121 | kfree(p); | ||
122 | } | ||
123 | |||
124 | static inline void put_parallel(struct parallel_io *p) | ||
125 | { | ||
126 | kref_put(&p->refcnt, destroy_parallel); | ||
127 | } | ||
128 | |||
129 | static struct bio * | ||
130 | bl_submit_bio(int rw, struct bio *bio) | ||
131 | { | ||
132 | if (bio) { | ||
133 | get_parallel(bio->bi_private); | ||
134 | dprintk("%s submitting %s bio %u@%llu\n", __func__, | ||
135 | rw == READ ? "read" : "write", | ||
136 | bio->bi_size, (unsigned long long)bio->bi_sector); | ||
137 | submit_bio(rw, bio); | ||
138 | } | ||
139 | return NULL; | ||
140 | } | ||
141 | |||
142 | static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | ||
143 | struct pnfs_block_extent *be, | ||
144 | void (*end_io)(struct bio *, int err), | ||
145 | struct parallel_io *par) | ||
146 | { | ||
147 | struct bio *bio; | ||
148 | |||
149 | bio = bio_alloc(GFP_NOIO, npg); | ||
150 | if (!bio) | ||
151 | return NULL; | ||
152 | |||
153 | bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; | ||
154 | bio->bi_bdev = be->be_mdev; | ||
155 | bio->bi_end_io = end_io; | ||
156 | bio->bi_private = par; | ||
157 | return bio; | ||
158 | } | ||
159 | |||
160 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | ||
161 | sector_t isect, struct page *page, | ||
162 | struct pnfs_block_extent *be, | ||
163 | void (*end_io)(struct bio *, int err), | ||
164 | struct parallel_io *par) | ||
165 | { | ||
166 | retry: | ||
167 | if (!bio) { | ||
168 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | ||
169 | if (!bio) | ||
170 | return ERR_PTR(-ENOMEM); | ||
171 | } | ||
172 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | ||
173 | bio = bl_submit_bio(rw, bio); | ||
174 | goto retry; | ||
175 | } | ||
176 | return bio; | ||
177 | } | ||
178 | |||
179 | static void bl_set_lo_fail(struct pnfs_layout_segment *lseg) | ||
180 | { | ||
181 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
182 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | ||
183 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
184 | } else { | ||
185 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | ||
186 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | /* This is basically copied from mpage_end_io_read */ | ||
191 | static void bl_end_io_read(struct bio *bio, int err) | ||
192 | { | ||
193 | struct parallel_io *par = bio->bi_private; | ||
194 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
195 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
196 | struct nfs_read_data *rdata = (struct nfs_read_data *)par->data; | ||
197 | |||
198 | do { | ||
199 | struct page *page = bvec->bv_page; | ||
200 | |||
201 | if (--bvec >= bio->bi_io_vec) | ||
202 | prefetchw(&bvec->bv_page->flags); | ||
203 | if (uptodate) | ||
204 | SetPageUptodate(page); | ||
205 | } while (bvec >= bio->bi_io_vec); | ||
206 | if (!uptodate) { | ||
207 | if (!rdata->pnfs_error) | ||
208 | rdata->pnfs_error = -EIO; | ||
209 | bl_set_lo_fail(rdata->lseg); | ||
210 | } | ||
211 | bio_put(bio); | ||
212 | put_parallel(par); | ||
213 | } | ||
214 | |||
215 | static void bl_read_cleanup(struct work_struct *work) | ||
216 | { | ||
217 | struct rpc_task *task; | ||
218 | struct nfs_read_data *rdata; | ||
219 | dprintk("%s enter\n", __func__); | ||
220 | task = container_of(work, struct rpc_task, u.tk_work); | ||
221 | rdata = container_of(task, struct nfs_read_data, task); | ||
222 | pnfs_ld_read_done(rdata); | ||
223 | } | ||
224 | |||
225 | static void | ||
226 | bl_end_par_io_read(void *data) | ||
227 | { | ||
228 | struct nfs_read_data *rdata = data; | ||
229 | |||
230 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); | ||
231 | schedule_work(&rdata->task.u.tk_work); | ||
232 | } | ||
233 | |||
234 | /* We don't want normal .rpc_call_done callback used, so we replace it | ||
235 | * with this stub. | ||
236 | */ | ||
237 | static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata) | ||
238 | { | ||
239 | return; | ||
240 | } | ||
241 | |||
242 | static enum pnfs_try_status | ||
243 | bl_read_pagelist(struct nfs_read_data *rdata) | ||
244 | { | ||
245 | int i, hole; | ||
246 | struct bio *bio = NULL; | ||
247 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | ||
248 | sector_t isect, extent_length = 0; | ||
249 | struct parallel_io *par; | ||
250 | loff_t f_offset = rdata->args.offset; | ||
251 | size_t count = rdata->args.count; | ||
252 | struct page **pages = rdata->args.pages; | ||
253 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; | ||
254 | |||
255 | dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, | ||
256 | rdata->npages, f_offset, count); | ||
257 | |||
258 | par = alloc_parallel(rdata); | ||
259 | if (!par) | ||
260 | goto use_mds; | ||
261 | par->call_ops = *rdata->mds_ops; | ||
262 | par->call_ops.rpc_call_done = bl_rpc_do_nothing; | ||
263 | par->pnfs_callback = bl_end_par_io_read; | ||
264 | /* At this point, we can no longer jump to use_mds */ | ||
265 | |||
266 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); | ||
267 | /* Code assumes extents are page-aligned */ | ||
268 | for (i = pg_index; i < rdata->npages; i++) { | ||
269 | if (!extent_length) { | ||
270 | /* We've used up the previous extent */ | ||
271 | bl_put_extent(be); | ||
272 | bl_put_extent(cow_read); | ||
273 | bio = bl_submit_bio(READ, bio); | ||
274 | /* Get the next one */ | ||
275 | be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), | ||
276 | isect, &cow_read); | ||
277 | if (!be) { | ||
278 | rdata->pnfs_error = -EIO; | ||
279 | goto out; | ||
280 | } | ||
281 | extent_length = be->be_length - | ||
282 | (isect - be->be_f_offset); | ||
283 | if (cow_read) { | ||
284 | sector_t cow_length = cow_read->be_length - | ||
285 | (isect - cow_read->be_f_offset); | ||
286 | extent_length = min(extent_length, cow_length); | ||
287 | } | ||
288 | } | ||
289 | hole = is_hole(be, isect); | ||
290 | if (hole && !cow_read) { | ||
291 | bio = bl_submit_bio(READ, bio); | ||
292 | /* Fill hole w/ zeroes w/o accessing device */ | ||
293 | dprintk("%s Zeroing page for hole\n", __func__); | ||
294 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
295 | print_page(pages[i]); | ||
296 | SetPageUptodate(pages[i]); | ||
297 | } else { | ||
298 | struct pnfs_block_extent *be_read; | ||
299 | |||
300 | be_read = (hole && cow_read) ? cow_read : be; | ||
301 | bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, | ||
302 | isect, pages[i], be_read, | ||
303 | bl_end_io_read, par); | ||
304 | if (IS_ERR(bio)) { | ||
305 | rdata->pnfs_error = PTR_ERR(bio); | ||
306 | goto out; | ||
307 | } | ||
308 | } | ||
309 | isect += PAGE_CACHE_SECTORS; | ||
310 | extent_length -= PAGE_CACHE_SECTORS; | ||
311 | } | ||
312 | if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { | ||
313 | rdata->res.eof = 1; | ||
314 | rdata->res.count = rdata->inode->i_size - f_offset; | ||
315 | } else { | ||
316 | rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; | ||
317 | } | ||
318 | out: | ||
319 | bl_put_extent(be); | ||
320 | bl_put_extent(cow_read); | ||
321 | bl_submit_bio(READ, bio); | ||
322 | put_parallel(par); | ||
323 | return PNFS_ATTEMPTED; | ||
324 | |||
325 | use_mds: | ||
326 | dprintk("Giving up and using normal NFS\n"); | ||
327 | return PNFS_NOT_ATTEMPTED; | ||
328 | } | ||
329 | |||
330 | static void mark_extents_written(struct pnfs_block_layout *bl, | ||
331 | __u64 offset, __u32 count) | ||
332 | { | ||
333 | sector_t isect, end; | ||
334 | struct pnfs_block_extent *be; | ||
335 | |||
336 | dprintk("%s(%llu, %u)\n", __func__, offset, count); | ||
337 | if (count == 0) | ||
338 | return; | ||
339 | isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; | ||
340 | end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); | ||
341 | end >>= SECTOR_SHIFT; | ||
342 | while (isect < end) { | ||
343 | sector_t len; | ||
344 | be = bl_find_get_extent(bl, isect, NULL); | ||
345 | BUG_ON(!be); /* FIXME */ | ||
346 | len = min(end, be->be_f_offset + be->be_length) - isect; | ||
347 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
348 | bl_mark_for_commit(be, isect, len); /* What if fails? */ | ||
349 | isect += len; | ||
350 | bl_put_extent(be); | ||
351 | } | ||
352 | } | ||
353 | |||
354 | static void bl_end_io_write_zero(struct bio *bio, int err) | ||
355 | { | ||
356 | struct parallel_io *par = bio->bi_private; | ||
357 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
358 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
359 | struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; | ||
360 | |||
361 | do { | ||
362 | struct page *page = bvec->bv_page; | ||
363 | |||
364 | if (--bvec >= bio->bi_io_vec) | ||
365 | prefetchw(&bvec->bv_page->flags); | ||
366 | /* This is the zeroing page we added */ | ||
367 | end_page_writeback(page); | ||
368 | page_cache_release(page); | ||
369 | } while (bvec >= bio->bi_io_vec); | ||
370 | if (!uptodate) { | ||
371 | if (!wdata->pnfs_error) | ||
372 | wdata->pnfs_error = -EIO; | ||
373 | bl_set_lo_fail(wdata->lseg); | ||
374 | } | ||
375 | bio_put(bio); | ||
376 | put_parallel(par); | ||
377 | } | ||
378 | |||
379 | /* This is basically copied from mpage_end_io_read */ | ||
380 | static void bl_end_io_write(struct bio *bio, int err) | ||
381 | { | ||
382 | struct parallel_io *par = bio->bi_private; | ||
383 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
384 | struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; | ||
385 | |||
386 | if (!uptodate) { | ||
387 | if (!wdata->pnfs_error) | ||
388 | wdata->pnfs_error = -EIO; | ||
389 | bl_set_lo_fail(wdata->lseg); | ||
390 | } | ||
391 | bio_put(bio); | ||
392 | put_parallel(par); | ||
393 | } | ||
394 | |||
395 | /* Function scheduled for call during bl_end_par_io_write, | ||
396 | * it marks sectors as written and extends the commitlist. | ||
397 | */ | ||
398 | static void bl_write_cleanup(struct work_struct *work) | ||
399 | { | ||
400 | struct rpc_task *task; | ||
401 | struct nfs_write_data *wdata; | ||
402 | dprintk("%s enter\n", __func__); | ||
403 | task = container_of(work, struct rpc_task, u.tk_work); | ||
404 | wdata = container_of(task, struct nfs_write_data, task); | ||
405 | if (!wdata->pnfs_error) { | ||
406 | /* Marks for LAYOUTCOMMIT */ | ||
407 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | ||
408 | wdata->args.offset, wdata->args.count); | ||
409 | } | ||
410 | pnfs_ld_write_done(wdata); | ||
411 | } | ||
412 | |||
413 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | ||
414 | static void bl_end_par_io_write(void *data) | ||
415 | { | ||
416 | struct nfs_write_data *wdata = data; | ||
417 | |||
418 | wdata->task.tk_status = 0; | ||
419 | wdata->verf.committed = NFS_FILE_SYNC; | ||
420 | INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); | ||
421 | schedule_work(&wdata->task.u.tk_work); | ||
422 | } | ||
423 | |||
424 | /* FIXME STUB - mark intersection of layout and page as bad, so is not | ||
425 | * used again. | ||
426 | */ | ||
427 | static void mark_bad_read(void) | ||
428 | { | ||
429 | return; | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * map_block: map a requested I/0 block (isect) into an offset in the LVM | ||
434 | * block_device | ||
435 | */ | ||
436 | static void | ||
437 | map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) | ||
438 | { | ||
439 | dprintk("%s enter be=%p\n", __func__, be); | ||
440 | |||
441 | set_buffer_mapped(bh); | ||
442 | bh->b_bdev = be->be_mdev; | ||
443 | bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >> | ||
444 | (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT); | ||
445 | |||
446 | dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n", | ||
447 | __func__, (unsigned long long)isect, (long)bh->b_blocknr, | ||
448 | bh->b_size); | ||
449 | return; | ||
450 | } | ||
451 | |||
452 | /* Given an unmapped page, zero it or read in page for COW, page is locked | ||
453 | * by caller. | ||
454 | */ | ||
455 | static int | ||
456 | init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) | ||
457 | { | ||
458 | struct buffer_head *bh = NULL; | ||
459 | int ret = 0; | ||
460 | sector_t isect; | ||
461 | |||
462 | dprintk("%s enter, %p\n", __func__, page); | ||
463 | BUG_ON(PageUptodate(page)); | ||
464 | if (!cow_read) { | ||
465 | zero_user_segment(page, 0, PAGE_SIZE); | ||
466 | SetPageUptodate(page); | ||
467 | goto cleanup; | ||
468 | } | ||
469 | |||
470 | bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0); | ||
471 | if (!bh) { | ||
472 | ret = -ENOMEM; | ||
473 | goto cleanup; | ||
474 | } | ||
475 | |||
476 | isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT; | ||
477 | map_block(bh, isect, cow_read); | ||
478 | if (!bh_uptodate_or_lock(bh)) | ||
479 | ret = bh_submit_read(bh); | ||
480 | if (ret) | ||
481 | goto cleanup; | ||
482 | SetPageUptodate(page); | ||
483 | |||
484 | cleanup: | ||
485 | bl_put_extent(cow_read); | ||
486 | if (bh) | ||
487 | free_buffer_head(bh); | ||
488 | if (ret) { | ||
489 | /* Need to mark layout with bad read...should now | ||
490 | * just use nfs4 for reads and writes. | ||
491 | */ | ||
492 | mark_bad_read(); | ||
493 | } | ||
494 | return ret; | ||
495 | } | ||
496 | |||
497 | static enum pnfs_try_status | ||
498 | bl_write_pagelist(struct nfs_write_data *wdata, int sync) | ||
499 | { | ||
500 | int i, ret, npg_zero, pg_index, last = 0; | ||
501 | struct bio *bio = NULL; | ||
502 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | ||
503 | sector_t isect, last_isect = 0, extent_length = 0; | ||
504 | struct parallel_io *par; | ||
505 | loff_t offset = wdata->args.offset; | ||
506 | size_t count = wdata->args.count; | ||
507 | struct page **pages = wdata->args.pages; | ||
508 | struct page *page; | ||
509 | pgoff_t index; | ||
510 | u64 temp; | ||
511 | int npg_per_block = | ||
512 | NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; | ||
513 | |||
514 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); | ||
515 | /* At this point, wdata->pages is a (sequential) list of nfs_pages. | ||
516 | * We want to write each, and if there is an error set pnfs_error | ||
517 | * to have it redone using nfs. | ||
518 | */ | ||
519 | par = alloc_parallel(wdata); | ||
520 | if (!par) | ||
521 | return PNFS_NOT_ATTEMPTED; | ||
522 | par->call_ops = *wdata->mds_ops; | ||
523 | par->call_ops.rpc_call_done = bl_rpc_do_nothing; | ||
524 | par->pnfs_callback = bl_end_par_io_write; | ||
525 | /* At this point, have to be more careful with error handling */ | ||
526 | |||
527 | isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | ||
528 | be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); | ||
529 | if (!be || !is_writable(be, isect)) { | ||
530 | dprintk("%s no matching extents!\n", __func__); | ||
531 | wdata->pnfs_error = -EINVAL; | ||
532 | goto out; | ||
533 | } | ||
534 | |||
535 | /* First page inside INVALID extent */ | ||
536 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
537 | temp = offset >> PAGE_CACHE_SHIFT; | ||
538 | npg_zero = do_div(temp, npg_per_block); | ||
539 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & | ||
540 | (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | ||
541 | extent_length = be->be_length - (isect - be->be_f_offset); | ||
542 | |||
543 | fill_invalid_ext: | ||
544 | dprintk("%s need to zero %d pages\n", __func__, npg_zero); | ||
545 | for (;npg_zero > 0; npg_zero--) { | ||
546 | /* page ref released in bl_end_io_write_zero */ | ||
547 | index = isect >> PAGE_CACHE_SECTOR_SHIFT; | ||
548 | dprintk("%s zero %dth page: index %lu isect %llu\n", | ||
549 | __func__, npg_zero, index, | ||
550 | (unsigned long long)isect); | ||
551 | page = | ||
552 | find_or_create_page(wdata->inode->i_mapping, index, | ||
553 | GFP_NOFS); | ||
554 | if (!page) { | ||
555 | dprintk("%s oom\n", __func__); | ||
556 | wdata->pnfs_error = -ENOMEM; | ||
557 | goto out; | ||
558 | } | ||
559 | |||
560 | /* PageDirty: Other will write this out | ||
561 | * PageWriteback: Other is writing this out | ||
562 | * PageUptodate: It was read before | ||
563 | * sector_initialized: already written out | ||
564 | */ | ||
565 | if (PageDirty(page) || PageWriteback(page) || | ||
566 | bl_is_sector_init(be->be_inval, isect)) { | ||
567 | print_page(page); | ||
568 | unlock_page(page); | ||
569 | page_cache_release(page); | ||
570 | goto next_page; | ||
571 | } | ||
572 | if (!PageUptodate(page)) { | ||
573 | /* New page, readin or zero it */ | ||
574 | init_page_for_write(page, cow_read); | ||
575 | } | ||
576 | set_page_writeback(page); | ||
577 | unlock_page(page); | ||
578 | |||
579 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
580 | PAGE_CACHE_SECTORS, | ||
581 | NULL); | ||
582 | if (unlikely(ret)) { | ||
583 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
584 | __func__, ret); | ||
585 | end_page_writeback(page); | ||
586 | page_cache_release(page); | ||
587 | wdata->pnfs_error = ret; | ||
588 | goto out; | ||
589 | } | ||
590 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, | ||
591 | isect, page, be, | ||
592 | bl_end_io_write_zero, par); | ||
593 | if (IS_ERR(bio)) { | ||
594 | wdata->pnfs_error = PTR_ERR(bio); | ||
595 | goto out; | ||
596 | } | ||
597 | /* FIXME: This should be done in bi_end_io */ | ||
598 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | ||
599 | page->index << PAGE_CACHE_SHIFT, | ||
600 | PAGE_CACHE_SIZE); | ||
601 | next_page: | ||
602 | isect += PAGE_CACHE_SECTORS; | ||
603 | extent_length -= PAGE_CACHE_SECTORS; | ||
604 | } | ||
605 | if (last) | ||
606 | goto write_done; | ||
607 | } | ||
608 | bio = bl_submit_bio(WRITE, bio); | ||
609 | |||
610 | /* Middle pages */ | ||
611 | pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; | ||
612 | for (i = pg_index; i < wdata->npages; i++) { | ||
613 | if (!extent_length) { | ||
614 | /* We've used up the previous extent */ | ||
615 | bl_put_extent(be); | ||
616 | bio = bl_submit_bio(WRITE, bio); | ||
617 | /* Get the next one */ | ||
618 | be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), | ||
619 | isect, NULL); | ||
620 | if (!be || !is_writable(be, isect)) { | ||
621 | wdata->pnfs_error = -EINVAL; | ||
622 | goto out; | ||
623 | } | ||
624 | extent_length = be->be_length - | ||
625 | (isect - be->be_f_offset); | ||
626 | } | ||
627 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
628 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
629 | PAGE_CACHE_SECTORS, | ||
630 | NULL); | ||
631 | if (unlikely(ret)) { | ||
632 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
633 | __func__, ret); | ||
634 | wdata->pnfs_error = ret; | ||
635 | goto out; | ||
636 | } | ||
637 | } | ||
638 | bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, | ||
639 | isect, pages[i], be, | ||
640 | bl_end_io_write, par); | ||
641 | if (IS_ERR(bio)) { | ||
642 | wdata->pnfs_error = PTR_ERR(bio); | ||
643 | goto out; | ||
644 | } | ||
645 | isect += PAGE_CACHE_SECTORS; | ||
646 | last_isect = isect; | ||
647 | extent_length -= PAGE_CACHE_SECTORS; | ||
648 | } | ||
649 | |||
650 | /* Last page inside INVALID extent */ | ||
651 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
652 | bio = bl_submit_bio(WRITE, bio); | ||
653 | temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT; | ||
654 | npg_zero = npg_per_block - do_div(temp, npg_per_block); | ||
655 | if (npg_zero < npg_per_block) { | ||
656 | last = 1; | ||
657 | goto fill_invalid_ext; | ||
658 | } | ||
659 | } | ||
660 | |||
661 | write_done: | ||
662 | wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); | ||
663 | if (count < wdata->res.count) { | ||
664 | wdata->res.count = count; | ||
665 | } | ||
666 | out: | ||
667 | bl_put_extent(be); | ||
668 | bl_submit_bio(WRITE, bio); | ||
669 | put_parallel(par); | ||
670 | return PNFS_ATTEMPTED; | ||
671 | } | ||
672 | |||
673 | /* FIXME - range ignored */ | ||
674 | static void | ||
675 | release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) | ||
676 | { | ||
677 | int i; | ||
678 | struct pnfs_block_extent *be; | ||
679 | |||
680 | spin_lock(&bl->bl_ext_lock); | ||
681 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
682 | while (!list_empty(&bl->bl_extents[i])) { | ||
683 | be = list_first_entry(&bl->bl_extents[i], | ||
684 | struct pnfs_block_extent, | ||
685 | be_node); | ||
686 | list_del(&be->be_node); | ||
687 | bl_put_extent(be); | ||
688 | } | ||
689 | } | ||
690 | spin_unlock(&bl->bl_ext_lock); | ||
691 | } | ||
692 | |||
693 | static void | ||
694 | release_inval_marks(struct pnfs_inval_markings *marks) | ||
695 | { | ||
696 | struct pnfs_inval_tracking *pos, *temp; | ||
697 | |||
698 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { | ||
699 | list_del(&pos->it_link); | ||
700 | kfree(pos); | ||
701 | } | ||
702 | return; | ||
703 | } | ||
704 | |||
705 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
706 | { | ||
707 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
708 | |||
709 | dprintk("%s enter\n", __func__); | ||
710 | release_extents(bl, NULL); | ||
711 | release_inval_marks(&bl->bl_inval); | ||
712 | kfree(bl); | ||
713 | } | ||
714 | |||
715 | static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, | ||
716 | gfp_t gfp_flags) | ||
717 | { | ||
718 | struct pnfs_block_layout *bl; | ||
719 | |||
720 | dprintk("%s enter\n", __func__); | ||
721 | bl = kzalloc(sizeof(*bl), gfp_flags); | ||
722 | if (!bl) | ||
723 | return NULL; | ||
724 | spin_lock_init(&bl->bl_ext_lock); | ||
725 | INIT_LIST_HEAD(&bl->bl_extents[0]); | ||
726 | INIT_LIST_HEAD(&bl->bl_extents[1]); | ||
727 | INIT_LIST_HEAD(&bl->bl_commit); | ||
728 | INIT_LIST_HEAD(&bl->bl_committing); | ||
729 | bl->bl_count = 0; | ||
730 | bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT; | ||
731 | BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize); | ||
732 | return &bl->bl_layout; | ||
733 | } | ||
734 | |||
735 | static void bl_free_lseg(struct pnfs_layout_segment *lseg) | ||
736 | { | ||
737 | dprintk("%s enter\n", __func__); | ||
738 | kfree(lseg); | ||
739 | } | ||
740 | |||
741 | /* We pretty much ignore lseg, and store all data layout wide, so we | ||
742 | * can correctly merge. | ||
743 | */ | ||
744 | static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo, | ||
745 | struct nfs4_layoutget_res *lgr, | ||
746 | gfp_t gfp_flags) | ||
747 | { | ||
748 | struct pnfs_layout_segment *lseg; | ||
749 | int status; | ||
750 | |||
751 | dprintk("%s enter\n", __func__); | ||
752 | lseg = kzalloc(sizeof(*lseg), gfp_flags); | ||
753 | if (!lseg) | ||
754 | return ERR_PTR(-ENOMEM); | ||
755 | status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags); | ||
756 | if (status) { | ||
757 | /* We don't want to call the full-blown bl_free_lseg, | ||
758 | * since on error extents were not touched. | ||
759 | */ | ||
760 | kfree(lseg); | ||
761 | return ERR_PTR(status); | ||
762 | } | ||
763 | return lseg; | ||
764 | } | ||
765 | |||
766 | static void | ||
767 | bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, | ||
768 | const struct nfs4_layoutcommit_args *arg) | ||
769 | { | ||
770 | dprintk("%s enter\n", __func__); | ||
771 | encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg); | ||
772 | } | ||
773 | |||
774 | static void | ||
775 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | ||
776 | { | ||
777 | struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; | ||
778 | |||
779 | dprintk("%s enter\n", __func__); | ||
780 | clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status); | ||
781 | } | ||
782 | |||
783 | static void free_blk_mountid(struct block_mount_id *mid) | ||
784 | { | ||
785 | if (mid) { | ||
786 | struct pnfs_block_dev *dev; | ||
787 | spin_lock(&mid->bm_lock); | ||
788 | while (!list_empty(&mid->bm_devlist)) { | ||
789 | dev = list_first_entry(&mid->bm_devlist, | ||
790 | struct pnfs_block_dev, | ||
791 | bm_node); | ||
792 | list_del(&dev->bm_node); | ||
793 | bl_free_block_dev(dev); | ||
794 | } | ||
795 | spin_unlock(&mid->bm_lock); | ||
796 | kfree(mid); | ||
797 | } | ||
798 | } | ||
799 | |||
800 | /* This is mostly copied from the filelayout's get_device_info function. | ||
801 | * It seems much of this should be at the generic pnfs level. | ||
802 | */ | ||
803 | static struct pnfs_block_dev * | ||
804 | nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | ||
805 | struct nfs4_deviceid *d_id) | ||
806 | { | ||
807 | struct pnfs_device *dev; | ||
808 | struct pnfs_block_dev *rv = NULL; | ||
809 | u32 max_resp_sz; | ||
810 | int max_pages; | ||
811 | struct page **pages = NULL; | ||
812 | int i, rc; | ||
813 | |||
814 | /* | ||
815 | * Use the session max response size as the basis for setting | ||
816 | * GETDEVICEINFO's maxcount | ||
817 | */ | ||
818 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
819 | max_pages = max_resp_sz >> PAGE_SHIFT; | ||
820 | dprintk("%s max_resp_sz %u max_pages %d\n", | ||
821 | __func__, max_resp_sz, max_pages); | ||
822 | |||
823 | dev = kmalloc(sizeof(*dev), GFP_NOFS); | ||
824 | if (!dev) { | ||
825 | dprintk("%s kmalloc failed\n", __func__); | ||
826 | return NULL; | ||
827 | } | ||
828 | |||
829 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); | ||
830 | if (pages == NULL) { | ||
831 | kfree(dev); | ||
832 | return NULL; | ||
833 | } | ||
834 | for (i = 0; i < max_pages; i++) { | ||
835 | pages[i] = alloc_page(GFP_NOFS); | ||
836 | if (!pages[i]) | ||
837 | goto out_free; | ||
838 | } | ||
839 | |||
840 | memcpy(&dev->dev_id, d_id, sizeof(*d_id)); | ||
841 | dev->layout_type = LAYOUT_BLOCK_VOLUME; | ||
842 | dev->pages = pages; | ||
843 | dev->pgbase = 0; | ||
844 | dev->pglen = PAGE_SIZE * max_pages; | ||
845 | dev->mincount = 0; | ||
846 | |||
847 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | ||
848 | rc = nfs4_proc_getdeviceinfo(server, dev); | ||
849 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
850 | if (rc) | ||
851 | goto out_free; | ||
852 | |||
853 | rv = nfs4_blk_decode_device(server, dev); | ||
854 | out_free: | ||
855 | for (i = 0; i < max_pages; i++) | ||
856 | __free_page(pages[i]); | ||
857 | kfree(pages); | ||
858 | kfree(dev); | ||
859 | return rv; | ||
860 | } | ||
861 | |||
862 | static int | ||
863 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | ||
864 | { | ||
865 | struct block_mount_id *b_mt_id = NULL; | ||
866 | struct pnfs_devicelist *dlist = NULL; | ||
867 | struct pnfs_block_dev *bdev; | ||
868 | LIST_HEAD(block_disklist); | ||
869 | int status = 0, i; | ||
870 | |||
871 | dprintk("%s enter\n", __func__); | ||
872 | |||
873 | if (server->pnfs_blksize == 0) { | ||
874 | dprintk("%s Server did not return blksize\n", __func__); | ||
875 | return -EINVAL; | ||
876 | } | ||
877 | b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); | ||
878 | if (!b_mt_id) { | ||
879 | status = -ENOMEM; | ||
880 | goto out_error; | ||
881 | } | ||
882 | /* Initialize nfs4 block layout mount id */ | ||
883 | spin_lock_init(&b_mt_id->bm_lock); | ||
884 | INIT_LIST_HEAD(&b_mt_id->bm_devlist); | ||
885 | |||
886 | dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); | ||
887 | if (!dlist) { | ||
888 | status = -ENOMEM; | ||
889 | goto out_error; | ||
890 | } | ||
891 | dlist->eof = 0; | ||
892 | while (!dlist->eof) { | ||
893 | status = nfs4_proc_getdevicelist(server, fh, dlist); | ||
894 | if (status) | ||
895 | goto out_error; | ||
896 | dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", | ||
897 | __func__, dlist->num_devs, dlist->eof); | ||
898 | for (i = 0; i < dlist->num_devs; i++) { | ||
899 | bdev = nfs4_blk_get_deviceinfo(server, fh, | ||
900 | &dlist->dev_id[i]); | ||
901 | if (!bdev) { | ||
902 | status = -ENODEV; | ||
903 | goto out_error; | ||
904 | } | ||
905 | spin_lock(&b_mt_id->bm_lock); | ||
906 | list_add(&bdev->bm_node, &b_mt_id->bm_devlist); | ||
907 | spin_unlock(&b_mt_id->bm_lock); | ||
908 | } | ||
909 | } | ||
910 | dprintk("%s SUCCESS\n", __func__); | ||
911 | server->pnfs_ld_data = b_mt_id; | ||
912 | |||
913 | out_return: | ||
914 | kfree(dlist); | ||
915 | return status; | ||
916 | |||
917 | out_error: | ||
918 | free_blk_mountid(b_mt_id); | ||
919 | goto out_return; | ||
920 | } | ||
921 | |||
922 | static int | ||
923 | bl_clear_layoutdriver(struct nfs_server *server) | ||
924 | { | ||
925 | struct block_mount_id *b_mt_id = server->pnfs_ld_data; | ||
926 | |||
927 | dprintk("%s enter\n", __func__); | ||
928 | free_blk_mountid(b_mt_id); | ||
929 | dprintk("%s RETURNS\n", __func__); | ||
930 | return 0; | ||
931 | } | ||
932 | |||
933 | static const struct nfs_pageio_ops bl_pg_read_ops = { | ||
934 | .pg_init = pnfs_generic_pg_init_read, | ||
935 | .pg_test = pnfs_generic_pg_test, | ||
936 | .pg_doio = pnfs_generic_pg_readpages, | ||
937 | }; | ||
938 | |||
939 | static const struct nfs_pageio_ops bl_pg_write_ops = { | ||
940 | .pg_init = pnfs_generic_pg_init_write, | ||
941 | .pg_test = pnfs_generic_pg_test, | ||
942 | .pg_doio = pnfs_generic_pg_writepages, | ||
943 | }; | ||
944 | |||
945 | static struct pnfs_layoutdriver_type blocklayout_type = { | ||
946 | .id = LAYOUT_BLOCK_VOLUME, | ||
947 | .name = "LAYOUT_BLOCK_VOLUME", | ||
948 | .read_pagelist = bl_read_pagelist, | ||
949 | .write_pagelist = bl_write_pagelist, | ||
950 | .alloc_layout_hdr = bl_alloc_layout_hdr, | ||
951 | .free_layout_hdr = bl_free_layout_hdr, | ||
952 | .alloc_lseg = bl_alloc_lseg, | ||
953 | .free_lseg = bl_free_lseg, | ||
954 | .encode_layoutcommit = bl_encode_layoutcommit, | ||
955 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, | ||
956 | .set_layoutdriver = bl_set_layoutdriver, | ||
957 | .clear_layoutdriver = bl_clear_layoutdriver, | ||
958 | .pg_read_ops = &bl_pg_read_ops, | ||
959 | .pg_write_ops = &bl_pg_write_ops, | ||
960 | }; | ||
961 | |||
962 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
963 | .upcall = bl_pipe_upcall, | ||
964 | .downcall = bl_pipe_downcall, | ||
965 | .destroy_msg = bl_pipe_destroy_msg, | ||
966 | }; | ||
967 | |||
968 | static int __init nfs4blocklayout_init(void) | ||
969 | { | ||
970 | struct vfsmount *mnt; | ||
971 | struct path path; | ||
972 | int ret; | ||
973 | |||
974 | dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); | ||
975 | |||
976 | ret = pnfs_register_layoutdriver(&blocklayout_type); | ||
977 | if (ret) | ||
978 | goto out; | ||
979 | |||
980 | init_waitqueue_head(&bl_wq); | ||
981 | |||
982 | mnt = rpc_get_mount(); | ||
983 | if (IS_ERR(mnt)) { | ||
984 | ret = PTR_ERR(mnt); | ||
985 | goto out_remove; | ||
986 | } | ||
987 | |||
988 | ret = vfs_path_lookup(mnt->mnt_root, | ||
989 | mnt, | ||
990 | NFS_PIPE_DIRNAME, 0, &path); | ||
991 | if (ret) | ||
992 | goto out_remove; | ||
993 | |||
994 | bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, | ||
995 | &bl_upcall_ops, 0); | ||
996 | if (IS_ERR(bl_device_pipe)) { | ||
997 | ret = PTR_ERR(bl_device_pipe); | ||
998 | goto out_remove; | ||
999 | } | ||
1000 | out: | ||
1001 | return ret; | ||
1002 | |||
1003 | out_remove: | ||
1004 | pnfs_unregister_layoutdriver(&blocklayout_type); | ||
1005 | return ret; | ||
1006 | } | ||
1007 | |||
1008 | static void __exit nfs4blocklayout_exit(void) | ||
1009 | { | ||
1010 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", | ||
1011 | __func__); | ||
1012 | |||
1013 | pnfs_unregister_layoutdriver(&blocklayout_type); | ||
1014 | rpc_unlink(bl_device_pipe); | ||
1015 | } | ||
1016 | |||
1017 | MODULE_ALIAS("nfs-layouttype4-3"); | ||
1018 | |||
1019 | module_init(nfs4blocklayout_init); | ||
1020 | module_exit(nfs4blocklayout_exit); | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h new file mode 100644 index 000000000000..f27d827960a3 --- /dev/null +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -0,0 +1,207 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.h | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | #ifndef FS_NFS_NFS4BLOCKLAYOUT_H | ||
33 | #define FS_NFS_NFS4BLOCKLAYOUT_H | ||
34 | |||
35 | #include <linux/device-mapper.h> | ||
36 | #include <linux/nfs_fs.h> | ||
37 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
38 | |||
39 | #include "../pnfs.h" | ||
40 | |||
41 | #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) | ||
42 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | ||
43 | |||
44 | struct block_mount_id { | ||
45 | spinlock_t bm_lock; /* protects list */ | ||
46 | struct list_head bm_devlist; /* holds pnfs_block_dev */ | ||
47 | }; | ||
48 | |||
49 | struct pnfs_block_dev { | ||
50 | struct list_head bm_node; | ||
51 | struct nfs4_deviceid bm_mdevid; /* associated devid */ | ||
52 | struct block_device *bm_mdev; /* meta device itself */ | ||
53 | }; | ||
54 | |||
55 | enum exstate4 { | ||
56 | PNFS_BLOCK_READWRITE_DATA = 0, | ||
57 | PNFS_BLOCK_READ_DATA = 1, | ||
58 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ | ||
59 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | ||
60 | }; | ||
61 | |||
62 | #define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */ | ||
63 | |||
64 | struct my_tree { | ||
65 | sector_t mtt_step_size; /* Internal sector alignment */ | ||
66 | struct list_head mtt_stub; /* Should be a radix tree */ | ||
67 | }; | ||
68 | |||
69 | struct pnfs_inval_markings { | ||
70 | spinlock_t im_lock; | ||
71 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ | ||
72 | sector_t im_block_size; /* Server blocksize in sectors */ | ||
73 | }; | ||
74 | |||
75 | struct pnfs_inval_tracking { | ||
76 | struct list_head it_link; | ||
77 | int it_sector; | ||
78 | int it_tags; | ||
79 | }; | ||
80 | |||
81 | /* sector_t fields are all in 512-byte sectors */ | ||
82 | struct pnfs_block_extent { | ||
83 | struct kref be_refcnt; | ||
84 | struct list_head be_node; /* link into lseg list */ | ||
85 | struct nfs4_deviceid be_devid; /* FIXME: could use device cache instead */ | ||
86 | struct block_device *be_mdev; | ||
87 | sector_t be_f_offset; /* the starting offset in the file */ | ||
88 | sector_t be_length; /* the size of the extent */ | ||
89 | sector_t be_v_offset; /* the starting offset in the volume */ | ||
90 | enum exstate4 be_state; /* the state of this extent */ | ||
91 | struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */ | ||
92 | }; | ||
93 | |||
94 | /* Shortened extent used by LAYOUTCOMMIT */ | ||
95 | struct pnfs_block_short_extent { | ||
96 | struct list_head bse_node; | ||
97 | struct nfs4_deviceid bse_devid; | ||
98 | struct block_device *bse_mdev; | ||
99 | sector_t bse_f_offset; /* the starting offset in the file */ | ||
100 | sector_t bse_length; /* the size of the extent */ | ||
101 | }; | ||
102 | |||
103 | static inline void | ||
104 | BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) | ||
105 | { | ||
106 | spin_lock_init(&marks->im_lock); | ||
107 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); | ||
108 | marks->im_block_size = blocksize; | ||
109 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, | ||
110 | blocksize); | ||
111 | } | ||
112 | |||
113 | enum extentclass4 { | ||
114 | RW_EXTENT = 0, /* READWRTE and INVAL */ | ||
115 | RO_EXTENT = 1, /* READ and NONE */ | ||
116 | EXTENT_LISTS = 2, | ||
117 | }; | ||
118 | |||
119 | static inline int bl_choose_list(enum exstate4 state) | ||
120 | { | ||
121 | if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA) | ||
122 | return RO_EXTENT; | ||
123 | else | ||
124 | return RW_EXTENT; | ||
125 | } | ||
126 | |||
127 | struct pnfs_block_layout { | ||
128 | struct pnfs_layout_hdr bl_layout; | ||
129 | struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */ | ||
130 | spinlock_t bl_ext_lock; /* Protects list manipulation */ | ||
131 | struct list_head bl_extents[EXTENT_LISTS]; /* R and RW extents */ | ||
132 | struct list_head bl_commit; /* Needs layout commit */ | ||
133 | struct list_head bl_committing; /* Layout committing */ | ||
134 | unsigned int bl_count; /* entries in bl_commit */ | ||
135 | sector_t bl_blocksize; /* Server blocksize in sectors */ | ||
136 | }; | ||
137 | |||
138 | #define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data)) | ||
139 | |||
140 | static inline struct pnfs_block_layout * | ||
141 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) | ||
142 | { | ||
143 | return container_of(lo, struct pnfs_block_layout, bl_layout); | ||
144 | } | ||
145 | |||
146 | static inline struct pnfs_block_layout * | ||
147 | BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) | ||
148 | { | ||
149 | return BLK_LO2EXT(lseg->pls_layout); | ||
150 | } | ||
151 | |||
152 | struct bl_dev_msg { | ||
153 | int status; | ||
154 | uint32_t major, minor; | ||
155 | }; | ||
156 | |||
157 | struct bl_msg_hdr { | ||
158 | u8 type; | ||
159 | u16 totallen; /* length of entire message, including hdr itself */ | ||
160 | }; | ||
161 | |||
162 | extern struct dentry *bl_device_pipe; | ||
163 | extern wait_queue_head_t bl_wq; | ||
164 | |||
165 | #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ | ||
166 | #define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ | ||
167 | #define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ | ||
168 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ | ||
169 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ | ||
170 | |||
171 | /* blocklayoutdev.c */ | ||
172 | ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *, | ||
173 | char __user *, size_t); | ||
174 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); | ||
175 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); | ||
176 | struct block_device *nfs4_blkdev_get(dev_t dev); | ||
177 | int nfs4_blkdev_put(struct block_device *bdev); | ||
178 | struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, | ||
179 | struct pnfs_device *dev); | ||
180 | int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | ||
181 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | ||
182 | |||
183 | /* blocklayoutdm.c */ | ||
184 | void bl_free_block_dev(struct pnfs_block_dev *bdev); | ||
185 | |||
186 | /* extents.c */ | ||
187 | struct pnfs_block_extent * | ||
188 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | ||
189 | struct pnfs_block_extent **cow_read); | ||
190 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
191 | sector_t offset, sector_t length, | ||
192 | sector_t **pages); | ||
193 | void bl_put_extent(struct pnfs_block_extent *be); | ||
194 | struct pnfs_block_extent *bl_alloc_extent(void); | ||
195 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); | ||
196 | int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
197 | struct xdr_stream *xdr, | ||
198 | const struct nfs4_layoutcommit_args *arg); | ||
199 | void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
200 | const struct nfs4_layoutcommit_args *arg, | ||
201 | int status); | ||
202 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
203 | struct pnfs_block_extent *new); | ||
204 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
205 | sector_t offset, sector_t length); | ||
206 | |||
207 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c new file mode 100644 index 000000000000..a83b393fb01c --- /dev/null +++ b/fs/nfs/blocklayout/blocklayoutdev.c | |||
@@ -0,0 +1,410 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdev.c | ||
3 | * | ||
4 | * Device operations for the pnfs nfs4 file layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/buffer_head.h> /* __bread */ | ||
34 | |||
35 | #include <linux/genhd.h> | ||
36 | #include <linux/blkdev.h> | ||
37 | #include <linux/hash.h> | ||
38 | |||
39 | #include "blocklayout.h" | ||
40 | |||
41 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
42 | |||
43 | static int decode_sector_number(__be32 **rp, sector_t *sp) | ||
44 | { | ||
45 | uint64_t s; | ||
46 | |||
47 | *rp = xdr_decode_hyper(*rp, &s); | ||
48 | if (s & 0x1ff) { | ||
49 | printk(KERN_WARNING "%s: sector not aligned\n", __func__); | ||
50 | return -1; | ||
51 | } | ||
52 | *sp = s >> SECTOR_SHIFT; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* Open a block_device by device number. */ | ||
57 | struct block_device *nfs4_blkdev_get(dev_t dev) | ||
58 | { | ||
59 | struct block_device *bd; | ||
60 | |||
61 | dprintk("%s enter\n", __func__); | ||
62 | bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); | ||
63 | if (IS_ERR(bd)) | ||
64 | goto fail; | ||
65 | return bd; | ||
66 | fail: | ||
67 | dprintk("%s failed to open device : %ld\n", | ||
68 | __func__, PTR_ERR(bd)); | ||
69 | return NULL; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Release the block device | ||
74 | */ | ||
75 | int nfs4_blkdev_put(struct block_device *bdev) | ||
76 | { | ||
77 | dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), | ||
78 | MINOR(bdev->bd_dev)); | ||
79 | return blkdev_put(bdev, FMODE_READ); | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Shouldn't there be a rpc_generic_upcall() to do this for us? | ||
84 | */ | ||
85 | ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, | ||
86 | char __user *dst, size_t buflen) | ||
87 | { | ||
88 | char *data = (char *)msg->data + msg->copied; | ||
89 | size_t mlen = min(msg->len - msg->copied, buflen); | ||
90 | unsigned long left; | ||
91 | |||
92 | left = copy_to_user(dst, data, mlen); | ||
93 | if (left == mlen) { | ||
94 | msg->errno = -EFAULT; | ||
95 | return -EFAULT; | ||
96 | } | ||
97 | |||
98 | mlen -= left; | ||
99 | msg->copied += mlen; | ||
100 | msg->errno = 0; | ||
101 | return mlen; | ||
102 | } | ||
103 | |||
104 | static struct bl_dev_msg bl_mount_reply; | ||
105 | |||
106 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
107 | size_t mlen) | ||
108 | { | ||
109 | if (mlen != sizeof (struct bl_dev_msg)) | ||
110 | return -EINVAL; | ||
111 | |||
112 | if (copy_from_user(&bl_mount_reply, src, mlen) != 0) | ||
113 | return -EFAULT; | ||
114 | |||
115 | wake_up(&bl_wq); | ||
116 | |||
117 | return mlen; | ||
118 | } | ||
119 | |||
120 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
121 | { | ||
122 | if (msg->errno >= 0) | ||
123 | return; | ||
124 | wake_up(&bl_wq); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. | ||
129 | */ | ||
130 | struct pnfs_block_dev * | ||
131 | nfs4_blk_decode_device(struct nfs_server *server, | ||
132 | struct pnfs_device *dev) | ||
133 | { | ||
134 | struct pnfs_block_dev *rv = NULL; | ||
135 | struct block_device *bd = NULL; | ||
136 | struct rpc_pipe_msg msg; | ||
137 | struct bl_msg_hdr bl_msg = { | ||
138 | .type = BL_DEVICE_MOUNT, | ||
139 | .totallen = dev->mincount, | ||
140 | }; | ||
141 | uint8_t *dataptr; | ||
142 | DECLARE_WAITQUEUE(wq, current); | ||
143 | struct bl_dev_msg *reply = &bl_mount_reply; | ||
144 | int offset, len, i; | ||
145 | |||
146 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
147 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | ||
148 | dev->mincount); | ||
149 | |||
150 | memset(&msg, 0, sizeof(msg)); | ||
151 | msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); | ||
152 | if (!msg.data) { | ||
153 | rv = ERR_PTR(-ENOMEM); | ||
154 | goto out; | ||
155 | } | ||
156 | |||
157 | memcpy(msg.data, &bl_msg, sizeof(bl_msg)); | ||
158 | dataptr = (uint8_t *) msg.data; | ||
159 | len = dev->mincount; | ||
160 | offset = sizeof(bl_msg); | ||
161 | for (i = 0; len > 0; i++) { | ||
162 | memcpy(&dataptr[offset], page_address(dev->pages[i]), | ||
163 | len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); | ||
164 | len -= PAGE_CACHE_SIZE; | ||
165 | offset += PAGE_CACHE_SIZE; | ||
166 | } | ||
167 | msg.len = sizeof(bl_msg) + dev->mincount; | ||
168 | |||
169 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
170 | add_wait_queue(&bl_wq, &wq); | ||
171 | if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { | ||
172 | remove_wait_queue(&bl_wq, &wq); | ||
173 | goto out; | ||
174 | } | ||
175 | |||
176 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
177 | schedule(); | ||
178 | __set_current_state(TASK_RUNNING); | ||
179 | remove_wait_queue(&bl_wq, &wq); | ||
180 | |||
181 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
182 | dprintk("%s failed to open device: %d\n", | ||
183 | __func__, reply->status); | ||
184 | rv = ERR_PTR(-EINVAL); | ||
185 | goto out; | ||
186 | } | ||
187 | |||
188 | bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); | ||
189 | if (IS_ERR(bd)) { | ||
190 | dprintk("%s failed to open device : %ld\n", | ||
191 | __func__, PTR_ERR(bd)); | ||
192 | goto out; | ||
193 | } | ||
194 | |||
195 | rv = kzalloc(sizeof(*rv), GFP_NOFS); | ||
196 | if (!rv) { | ||
197 | rv = ERR_PTR(-ENOMEM); | ||
198 | goto out; | ||
199 | } | ||
200 | |||
201 | rv->bm_mdev = bd; | ||
202 | memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); | ||
203 | dprintk("%s Created device %s with bd_block_size %u\n", | ||
204 | __func__, | ||
205 | bd->bd_disk->disk_name, | ||
206 | bd->bd_block_size); | ||
207 | |||
208 | out: | ||
209 | kfree(msg.data); | ||
210 | return rv; | ||
211 | } | ||
212 | |||
213 | /* Map deviceid returned by the server to constructed block_device */ | ||
214 | static struct block_device *translate_devid(struct pnfs_layout_hdr *lo, | ||
215 | struct nfs4_deviceid *id) | ||
216 | { | ||
217 | struct block_device *rv = NULL; | ||
218 | struct block_mount_id *mid; | ||
219 | struct pnfs_block_dev *dev; | ||
220 | |||
221 | dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id); | ||
222 | mid = BLK_ID(lo); | ||
223 | spin_lock(&mid->bm_lock); | ||
224 | list_for_each_entry(dev, &mid->bm_devlist, bm_node) { | ||
225 | if (memcmp(id->data, dev->bm_mdevid.data, | ||
226 | NFS4_DEVICEID4_SIZE) == 0) { | ||
227 | rv = dev->bm_mdev; | ||
228 | goto out; | ||
229 | } | ||
230 | } | ||
231 | out: | ||
232 | spin_unlock(&mid->bm_lock); | ||
233 | dprintk("%s returning %p\n", __func__, rv); | ||
234 | return rv; | ||
235 | } | ||
236 | |||
237 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ | ||
238 | struct layout_verification { | ||
239 | u32 mode; /* R or RW */ | ||
240 | u64 start; /* Expected start of next non-COW extent */ | ||
241 | u64 inval; /* Start of INVAL coverage */ | ||
242 | u64 cowread; /* End of COW read coverage */ | ||
243 | }; | ||
244 | |||
245 | /* Verify the extent meets the layout requirements of the pnfs-block draft, | ||
246 | * section 2.3.1. | ||
247 | */ | ||
248 | static int verify_extent(struct pnfs_block_extent *be, | ||
249 | struct layout_verification *lv) | ||
250 | { | ||
251 | if (lv->mode == IOMODE_READ) { | ||
252 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
253 | be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
254 | return -EIO; | ||
255 | if (be->be_f_offset != lv->start) | ||
256 | return -EIO; | ||
257 | lv->start += be->be_length; | ||
258 | return 0; | ||
259 | } | ||
260 | /* lv->mode == IOMODE_RW */ | ||
261 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
262 | if (be->be_f_offset != lv->start) | ||
263 | return -EIO; | ||
264 | if (lv->cowread > lv->start) | ||
265 | return -EIO; | ||
266 | lv->start += be->be_length; | ||
267 | lv->inval = lv->start; | ||
268 | return 0; | ||
269 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
270 | if (be->be_f_offset != lv->start) | ||
271 | return -EIO; | ||
272 | lv->start += be->be_length; | ||
273 | return 0; | ||
274 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
275 | if (be->be_f_offset > lv->start) | ||
276 | return -EIO; | ||
277 | if (be->be_f_offset < lv->inval) | ||
278 | return -EIO; | ||
279 | if (be->be_f_offset < lv->cowread) | ||
280 | return -EIO; | ||
281 | /* It looks like you might want to min this with lv->start, | ||
282 | * but you really don't. | ||
283 | */ | ||
284 | lv->inval = lv->inval + be->be_length; | ||
285 | lv->cowread = be->be_f_offset + be->be_length; | ||
286 | return 0; | ||
287 | } else | ||
288 | return -EIO; | ||
289 | } | ||
290 | |||
291 | /* XDR decode pnfs_block_layout4 structure */ | ||
292 | int | ||
293 | nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | ||
294 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) | ||
295 | { | ||
296 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
297 | int i, status = -EIO; | ||
298 | uint32_t count; | ||
299 | struct pnfs_block_extent *be = NULL, *save; | ||
300 | struct xdr_stream stream; | ||
301 | struct xdr_buf buf; | ||
302 | struct page *scratch; | ||
303 | __be32 *p; | ||
304 | struct layout_verification lv = { | ||
305 | .mode = lgr->range.iomode, | ||
306 | .start = lgr->range.offset >> SECTOR_SHIFT, | ||
307 | .inval = lgr->range.offset >> SECTOR_SHIFT, | ||
308 | .cowread = lgr->range.offset >> SECTOR_SHIFT, | ||
309 | }; | ||
310 | LIST_HEAD(extents); | ||
311 | |||
312 | dprintk("---> %s\n", __func__); | ||
313 | |||
314 | scratch = alloc_page(gfp_flags); | ||
315 | if (!scratch) | ||
316 | return -ENOMEM; | ||
317 | |||
318 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); | ||
319 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
320 | |||
321 | p = xdr_inline_decode(&stream, 4); | ||
322 | if (unlikely(!p)) | ||
323 | goto out_err; | ||
324 | |||
325 | count = be32_to_cpup(p++); | ||
326 | |||
327 | dprintk("%s enter, number of extents %i\n", __func__, count); | ||
328 | p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count); | ||
329 | if (unlikely(!p)) | ||
330 | goto out_err; | ||
331 | |||
332 | /* Decode individual extents, putting them in temporary | ||
333 | * staging area until whole layout is decoded to make error | ||
334 | * recovery easier. | ||
335 | */ | ||
336 | for (i = 0; i < count; i++) { | ||
337 | be = bl_alloc_extent(); | ||
338 | if (!be) { | ||
339 | status = -ENOMEM; | ||
340 | goto out_err; | ||
341 | } | ||
342 | memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE); | ||
343 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
344 | be->be_mdev = translate_devid(lo, &be->be_devid); | ||
345 | if (!be->be_mdev) | ||
346 | goto out_err; | ||
347 | |||
348 | /* The next three values are read in as bytes, | ||
349 | * but stored as 512-byte sector lengths | ||
350 | */ | ||
351 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
352 | goto out_err; | ||
353 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
354 | goto out_err; | ||
355 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
356 | goto out_err; | ||
357 | be->be_state = be32_to_cpup(p++); | ||
358 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
359 | be->be_inval = &bl->bl_inval; | ||
360 | if (verify_extent(be, &lv)) { | ||
361 | dprintk("%s verify failed\n", __func__); | ||
362 | goto out_err; | ||
363 | } | ||
364 | list_add_tail(&be->be_node, &extents); | ||
365 | } | ||
366 | if (lgr->range.offset + lgr->range.length != | ||
367 | lv.start << SECTOR_SHIFT) { | ||
368 | dprintk("%s Final length mismatch\n", __func__); | ||
369 | be = NULL; | ||
370 | goto out_err; | ||
371 | } | ||
372 | if (lv.start < lv.cowread) { | ||
373 | dprintk("%s Final uncovered COW extent\n", __func__); | ||
374 | be = NULL; | ||
375 | goto out_err; | ||
376 | } | ||
377 | /* Extents decoded properly, now try to merge them in to | ||
378 | * existing layout extents. | ||
379 | */ | ||
380 | spin_lock(&bl->bl_ext_lock); | ||
381 | list_for_each_entry_safe(be, save, &extents, be_node) { | ||
382 | list_del(&be->be_node); | ||
383 | status = bl_add_merge_extent(bl, be); | ||
384 | if (status) { | ||
385 | spin_unlock(&bl->bl_ext_lock); | ||
386 | /* This is a fairly catastrophic error, as the | ||
387 | * entire layout extent lists are now corrupted. | ||
388 | * We should have some way to distinguish this. | ||
389 | */ | ||
390 | be = NULL; | ||
391 | goto out_err; | ||
392 | } | ||
393 | } | ||
394 | spin_unlock(&bl->bl_ext_lock); | ||
395 | status = 0; | ||
396 | out: | ||
397 | __free_page(scratch); | ||
398 | dprintk("%s returns %i\n", __func__, status); | ||
399 | return status; | ||
400 | |||
401 | out_err: | ||
402 | bl_put_extent(be); | ||
403 | while (!list_empty(&extents)) { | ||
404 | be = list_first_entry(&extents, struct pnfs_block_extent, | ||
405 | be_node); | ||
406 | list_del(&be->be_node); | ||
407 | bl_put_extent(be); | ||
408 | } | ||
409 | goto out; | ||
410 | } | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c new file mode 100644 index 000000000000..d055c7558073 --- /dev/null +++ b/fs/nfs/blocklayout/blocklayoutdm.c | |||
@@ -0,0 +1,111 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdm.c | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2007 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Fred Isaman <iisaman@umich.edu> | ||
10 | * Andy Adamson <andros@citi.umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include <linux/genhd.h> /* gendisk - used in a dprintk*/ | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/hash.h> | ||
36 | |||
37 | #include "blocklayout.h" | ||
38 | |||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
40 | |||
41 | static void dev_remove(dev_t dev) | ||
42 | { | ||
43 | struct rpc_pipe_msg msg; | ||
44 | struct bl_dev_msg bl_umount_request; | ||
45 | struct bl_msg_hdr bl_msg = { | ||
46 | .type = BL_DEVICE_UMOUNT, | ||
47 | .totallen = sizeof(bl_umount_request), | ||
48 | }; | ||
49 | uint8_t *dataptr; | ||
50 | DECLARE_WAITQUEUE(wq, current); | ||
51 | |||
52 | dprintk("Entering %s\n", __func__); | ||
53 | |||
54 | memset(&msg, 0, sizeof(msg)); | ||
55 | msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); | ||
56 | if (!msg.data) | ||
57 | goto out; | ||
58 | |||
59 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); | ||
60 | bl_umount_request.major = MAJOR(dev); | ||
61 | bl_umount_request.minor = MINOR(dev); | ||
62 | |||
63 | memcpy(msg.data, &bl_msg, sizeof(bl_msg)); | ||
64 | dataptr = (uint8_t *) msg.data; | ||
65 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | ||
66 | msg.len = sizeof(bl_msg) + bl_msg.totallen; | ||
67 | |||
68 | add_wait_queue(&bl_wq, &wq); | ||
69 | if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { | ||
70 | remove_wait_queue(&bl_wq, &wq); | ||
71 | goto out; | ||
72 | } | ||
73 | |||
74 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
75 | schedule(); | ||
76 | __set_current_state(TASK_RUNNING); | ||
77 | remove_wait_queue(&bl_wq, &wq); | ||
78 | |||
79 | out: | ||
80 | kfree(msg.data); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Release meta device | ||
85 | */ | ||
86 | static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) | ||
87 | { | ||
88 | int rv; | ||
89 | |||
90 | dprintk("%s Releasing\n", __func__); | ||
91 | rv = nfs4_blkdev_put(bdev->bm_mdev); | ||
92 | if (rv) | ||
93 | printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", | ||
94 | __func__, rv); | ||
95 | |||
96 | dev_remove(bdev->bm_mdev->bd_dev); | ||
97 | } | ||
98 | |||
99 | void bl_free_block_dev(struct pnfs_block_dev *bdev) | ||
100 | { | ||
101 | if (bdev) { | ||
102 | if (bdev->bm_mdev) { | ||
103 | dprintk("%s Removing DM device: %d:%d\n", | ||
104 | __func__, | ||
105 | MAJOR(bdev->bm_mdev->bd_dev), | ||
106 | MINOR(bdev->bm_mdev->bd_dev)); | ||
107 | nfs4_blk_metadev_release(bdev); | ||
108 | } | ||
109 | kfree(bdev); | ||
110 | } | ||
111 | } | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c new file mode 100644 index 000000000000..19fa7b0b8c00 --- /dev/null +++ b/fs/nfs/blocklayout/extents.c | |||
@@ -0,0 +1,935 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.h | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
35 | |||
36 | /* Bit numbers */ | ||
37 | #define EXTENT_INITIALIZED 0 | ||
38 | #define EXTENT_WRITTEN 1 | ||
39 | #define EXTENT_IN_COMMIT 2 | ||
40 | #define INTERNAL_EXISTS MY_MAX_TAGS | ||
41 | #define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1) | ||
42 | |||
43 | /* Returns largest t<=s s.t. t%base==0 */ | ||
44 | static inline sector_t normalize(sector_t s, int base) | ||
45 | { | ||
46 | sector_t tmp = s; /* Since do_div modifies its argument */ | ||
47 | return s - do_div(tmp, base); | ||
48 | } | ||
49 | |||
50 | static inline sector_t normalize_up(sector_t s, int base) | ||
51 | { | ||
52 | return normalize(s + base - 1, base); | ||
53 | } | ||
54 | |||
55 | /* Complete stub using list while determine API wanted */ | ||
56 | |||
57 | /* Returns tags, or negative */ | ||
58 | static int32_t _find_entry(struct my_tree *tree, u64 s) | ||
59 | { | ||
60 | struct pnfs_inval_tracking *pos; | ||
61 | |||
62 | dprintk("%s(%llu) enter\n", __func__, s); | ||
63 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
64 | if (pos->it_sector > s) | ||
65 | continue; | ||
66 | else if (pos->it_sector == s) | ||
67 | return pos->it_tags & INTERNAL_MASK; | ||
68 | else | ||
69 | break; | ||
70 | } | ||
71 | return -ENOENT; | ||
72 | } | ||
73 | |||
74 | static inline | ||
75 | int _has_tag(struct my_tree *tree, u64 s, int32_t tag) | ||
76 | { | ||
77 | int32_t tags; | ||
78 | |||
79 | dprintk("%s(%llu, %i) enter\n", __func__, s, tag); | ||
80 | s = normalize(s, tree->mtt_step_size); | ||
81 | tags = _find_entry(tree, s); | ||
82 | if ((tags < 0) || !(tags & (1 << tag))) | ||
83 | return 0; | ||
84 | else | ||
85 | return 1; | ||
86 | } | ||
87 | |||
88 | /* Creates entry with tag, or if entry already exists, unions tag to it. | ||
89 | * If storage is not NULL, newly created entry will use it. | ||
90 | * Returns number of entries added, or negative on error. | ||
91 | */ | ||
92 | static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | ||
93 | struct pnfs_inval_tracking *storage) | ||
94 | { | ||
95 | int found = 0; | ||
96 | struct pnfs_inval_tracking *pos; | ||
97 | |||
98 | dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); | ||
99 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
100 | if (pos->it_sector > s) | ||
101 | continue; | ||
102 | else if (pos->it_sector == s) { | ||
103 | found = 1; | ||
104 | break; | ||
105 | } else | ||
106 | break; | ||
107 | } | ||
108 | if (found) { | ||
109 | pos->it_tags |= (1 << tag); | ||
110 | return 0; | ||
111 | } else { | ||
112 | struct pnfs_inval_tracking *new; | ||
113 | if (storage) | ||
114 | new = storage; | ||
115 | else { | ||
116 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
117 | if (!new) | ||
118 | return -ENOMEM; | ||
119 | } | ||
120 | new->it_sector = s; | ||
121 | new->it_tags = (1 << tag); | ||
122 | list_add(&new->it_link, &pos->it_link); | ||
123 | return 1; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /* XXXX Really want option to not create */ | ||
128 | /* Over range, unions tag with existing entries, else creates entry with tag */ | ||
129 | static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | ||
130 | { | ||
131 | u64 i; | ||
132 | |||
133 | dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length); | ||
134 | for (i = normalize(s, tree->mtt_step_size); i < s + length; | ||
135 | i += tree->mtt_step_size) | ||
136 | if (_add_entry(tree, i, tag, NULL)) | ||
137 | return -ENOMEM; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* Ensure that future operations on given range of tree will not malloc */ | ||
142 | static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | ||
143 | { | ||
144 | u64 start, end, s; | ||
145 | int count, i, used = 0, status = -ENOMEM; | ||
146 | struct pnfs_inval_tracking **storage; | ||
147 | |||
148 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | ||
149 | start = normalize(offset, tree->mtt_step_size); | ||
150 | end = normalize_up(offset + length, tree->mtt_step_size); | ||
151 | count = (int)(end - start) / (int)tree->mtt_step_size; | ||
152 | |||
153 | /* Pre-malloc what memory we might need */ | ||
154 | storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); | ||
155 | if (!storage) | ||
156 | return -ENOMEM; | ||
157 | for (i = 0; i < count; i++) { | ||
158 | storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking), | ||
159 | GFP_NOFS); | ||
160 | if (!storage[i]) | ||
161 | goto out_cleanup; | ||
162 | } | ||
163 | |||
164 | /* Now need lock - HOW??? */ | ||
165 | |||
166 | for (s = start; s < end; s += tree->mtt_step_size) | ||
167 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | ||
168 | |||
169 | /* Unlock - HOW??? */ | ||
170 | status = 0; | ||
171 | |||
172 | out_cleanup: | ||
173 | for (i = used; i < count; i++) { | ||
174 | if (!storage[i]) | ||
175 | break; | ||
176 | kfree(storage[i]); | ||
177 | } | ||
178 | kfree(storage); | ||
179 | return status; | ||
180 | } | ||
181 | |||
182 | static void set_needs_init(sector_t *array, sector_t offset) | ||
183 | { | ||
184 | sector_t *p = array; | ||
185 | |||
186 | dprintk("%s enter\n", __func__); | ||
187 | if (!p) | ||
188 | return; | ||
189 | while (*p < offset) | ||
190 | p++; | ||
191 | if (*p == offset) | ||
192 | return; | ||
193 | else if (*p == ~0) { | ||
194 | *p++ = offset; | ||
195 | *p = ~0; | ||
196 | return; | ||
197 | } else { | ||
198 | sector_t *save = p; | ||
199 | dprintk("%s Adding %llu\n", __func__, (u64)offset); | ||
200 | while (*p != ~0) | ||
201 | p++; | ||
202 | p++; | ||
203 | memmove(save + 1, save, (char *)p - (char *)save); | ||
204 | *save = offset; | ||
205 | return; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* We are relying on page lock to serialize this */ | ||
210 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | ||
211 | { | ||
212 | int rv; | ||
213 | |||
214 | spin_lock(&marks->im_lock); | ||
215 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | ||
216 | spin_unlock(&marks->im_lock); | ||
217 | return rv; | ||
218 | } | ||
219 | |||
220 | /* Assume start, end already sector aligned */ | ||
221 | static int | ||
222 | _range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | ||
223 | { | ||
224 | struct pnfs_inval_tracking *pos; | ||
225 | u64 expect = 0; | ||
226 | |||
227 | dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | ||
228 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
229 | if (pos->it_sector >= end) | ||
230 | continue; | ||
231 | if (!expect) { | ||
232 | if ((pos->it_sector == end - tree->mtt_step_size) && | ||
233 | (pos->it_tags & (1 << tag))) { | ||
234 | expect = pos->it_sector - tree->mtt_step_size; | ||
235 | if (pos->it_sector < tree->mtt_step_size || expect < start) | ||
236 | return 1; | ||
237 | continue; | ||
238 | } else { | ||
239 | return 0; | ||
240 | } | ||
241 | } | ||
242 | if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | ||
243 | return 0; | ||
244 | expect -= tree->mtt_step_size; | ||
245 | if (expect < start) | ||
246 | return 1; | ||
247 | } | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static int is_range_written(struct pnfs_inval_markings *marks, | ||
252 | sector_t start, sector_t end) | ||
253 | { | ||
254 | int rv; | ||
255 | |||
256 | spin_lock(&marks->im_lock); | ||
257 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | ||
258 | spin_unlock(&marks->im_lock); | ||
259 | return rv; | ||
260 | } | ||
261 | |||
262 | /* Marks sectors in [offest, offset_length) as having been initialized. | ||
263 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | ||
264 | * Notes where partial block is initialized, and helps prepare it for | ||
265 | * complete initialization later. | ||
266 | */ | ||
267 | /* Currently assumes offset is page-aligned */ | ||
268 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
269 | sector_t offset, sector_t length, | ||
270 | sector_t **pages) | ||
271 | { | ||
272 | sector_t s, start, end; | ||
273 | sector_t *array = NULL; /* Pages to mark */ | ||
274 | |||
275 | dprintk("%s(offset=%llu,len=%llu) enter\n", | ||
276 | __func__, (u64)offset, (u64)length); | ||
277 | s = max((sector_t) 3, | ||
278 | 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS))); | ||
279 | dprintk("%s set max=%llu\n", __func__, (u64)s); | ||
280 | if (pages) { | ||
281 | array = kmalloc(s * sizeof(sector_t), GFP_NOFS); | ||
282 | if (!array) | ||
283 | goto outerr; | ||
284 | array[0] = ~0; | ||
285 | } | ||
286 | |||
287 | start = normalize(offset, marks->im_block_size); | ||
288 | end = normalize_up(offset + length, marks->im_block_size); | ||
289 | if (_preload_range(&marks->im_tree, start, end - start)) | ||
290 | goto outerr; | ||
291 | |||
292 | spin_lock(&marks->im_lock); | ||
293 | |||
294 | for (s = normalize_up(start, PAGE_CACHE_SECTORS); | ||
295 | s < offset; s += PAGE_CACHE_SECTORS) { | ||
296 | dprintk("%s pre-area pages\n", __func__); | ||
297 | /* Portion of used block is not initialized */ | ||
298 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
299 | set_needs_init(array, s); | ||
300 | } | ||
301 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | ||
302 | goto out_unlock; | ||
303 | for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); | ||
304 | s < end; s += PAGE_CACHE_SECTORS) { | ||
305 | dprintk("%s post-area pages\n", __func__); | ||
306 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
307 | set_needs_init(array, s); | ||
308 | } | ||
309 | |||
310 | spin_unlock(&marks->im_lock); | ||
311 | |||
312 | if (pages) { | ||
313 | if (array[0] == ~0) { | ||
314 | kfree(array); | ||
315 | *pages = NULL; | ||
316 | } else | ||
317 | *pages = array; | ||
318 | } | ||
319 | return 0; | ||
320 | |||
321 | out_unlock: | ||
322 | spin_unlock(&marks->im_lock); | ||
323 | outerr: | ||
324 | if (pages) { | ||
325 | kfree(array); | ||
326 | *pages = NULL; | ||
327 | } | ||
328 | return -ENOMEM; | ||
329 | } | ||
330 | |||
331 | /* Marks sectors in [offest, offset+length) as having been written to disk. | ||
332 | * All lengths should be block aligned. | ||
333 | */ | ||
334 | static int mark_written_sectors(struct pnfs_inval_markings *marks, | ||
335 | sector_t offset, sector_t length) | ||
336 | { | ||
337 | int status; | ||
338 | |||
339 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, | ||
340 | (u64)offset, (u64)length); | ||
341 | spin_lock(&marks->im_lock); | ||
342 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); | ||
343 | spin_unlock(&marks->im_lock); | ||
344 | return status; | ||
345 | } | ||
346 | |||
347 | static void print_short_extent(struct pnfs_block_short_extent *be) | ||
348 | { | ||
349 | dprintk("PRINT SHORT EXTENT extent %p\n", be); | ||
350 | if (be) { | ||
351 | dprintk(" be_f_offset %llu\n", (u64)be->bse_f_offset); | ||
352 | dprintk(" be_length %llu\n", (u64)be->bse_length); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static void print_clist(struct list_head *list, unsigned int count) | ||
357 | { | ||
358 | struct pnfs_block_short_extent *be; | ||
359 | unsigned int i = 0; | ||
360 | |||
361 | ifdebug(FACILITY) { | ||
362 | printk(KERN_DEBUG "****************\n"); | ||
363 | printk(KERN_DEBUG "Extent list looks like:\n"); | ||
364 | list_for_each_entry(be, list, bse_node) { | ||
365 | i++; | ||
366 | print_short_extent(be); | ||
367 | } | ||
368 | if (i != count) | ||
369 | printk(KERN_DEBUG "\n\nExpected %u entries\n\n\n", count); | ||
370 | printk(KERN_DEBUG "****************\n"); | ||
371 | } | ||
372 | } | ||
373 | |||
374 | /* Note: In theory, we should do more checking that devid's match between | ||
375 | * old and new, but if they don't, the lists are too corrupt to salvage anyway. | ||
376 | */ | ||
377 | /* Note this is very similar to bl_add_merge_extent */ | ||
378 | static void add_to_commitlist(struct pnfs_block_layout *bl, | ||
379 | struct pnfs_block_short_extent *new) | ||
380 | { | ||
381 | struct list_head *clist = &bl->bl_commit; | ||
382 | struct pnfs_block_short_extent *old, *save; | ||
383 | sector_t end = new->bse_f_offset + new->bse_length; | ||
384 | |||
385 | dprintk("%s enter\n", __func__); | ||
386 | print_short_extent(new); | ||
387 | print_clist(clist, bl->bl_count); | ||
388 | bl->bl_count++; | ||
389 | /* Scan for proper place to insert, extending new to the left | ||
390 | * as much as possible. | ||
391 | */ | ||
392 | list_for_each_entry_safe(old, save, clist, bse_node) { | ||
393 | if (new->bse_f_offset < old->bse_f_offset) | ||
394 | break; | ||
395 | if (end <= old->bse_f_offset + old->bse_length) { | ||
396 | /* Range is already in list */ | ||
397 | bl->bl_count--; | ||
398 | kfree(new); | ||
399 | return; | ||
400 | } else if (new->bse_f_offset <= | ||
401 | old->bse_f_offset + old->bse_length) { | ||
402 | /* new overlaps or abuts existing be */ | ||
403 | if (new->bse_mdev == old->bse_mdev) { | ||
404 | /* extend new to fully replace old */ | ||
405 | new->bse_length += new->bse_f_offset - | ||
406 | old->bse_f_offset; | ||
407 | new->bse_f_offset = old->bse_f_offset; | ||
408 | list_del(&old->bse_node); | ||
409 | bl->bl_count--; | ||
410 | kfree(old); | ||
411 | } | ||
412 | } | ||
413 | } | ||
414 | /* Note that if we never hit the above break, old will not point to a | ||
415 | * valid extent. However, in that case &old->bse_node==list. | ||
416 | */ | ||
417 | list_add_tail(&new->bse_node, &old->bse_node); | ||
418 | /* Scan forward for overlaps. If we find any, extend new and | ||
419 | * remove the overlapped extent. | ||
420 | */ | ||
421 | old = list_prepare_entry(new, clist, bse_node); | ||
422 | list_for_each_entry_safe_continue(old, save, clist, bse_node) { | ||
423 | if (end < old->bse_f_offset) | ||
424 | break; | ||
425 | /* new overlaps or abuts old */ | ||
426 | if (new->bse_mdev == old->bse_mdev) { | ||
427 | if (end < old->bse_f_offset + old->bse_length) { | ||
428 | /* extend new to fully cover old */ | ||
429 | end = old->bse_f_offset + old->bse_length; | ||
430 | new->bse_length = end - new->bse_f_offset; | ||
431 | } | ||
432 | list_del(&old->bse_node); | ||
433 | bl->bl_count--; | ||
434 | kfree(old); | ||
435 | } | ||
436 | } | ||
437 | dprintk("%s: after merging\n", __func__); | ||
438 | print_clist(clist, bl->bl_count); | ||
439 | } | ||
440 | |||
441 | /* Note the range described by offset, length is guaranteed to be contained | ||
442 | * within be. | ||
443 | */ | ||
444 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
445 | sector_t offset, sector_t length) | ||
446 | { | ||
447 | sector_t new_end, end = offset + length; | ||
448 | struct pnfs_block_short_extent *new; | ||
449 | struct pnfs_block_layout *bl = container_of(be->be_inval, | ||
450 | struct pnfs_block_layout, | ||
451 | bl_inval); | ||
452 | |||
453 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
454 | if (!new) | ||
455 | return -ENOMEM; | ||
456 | |||
457 | mark_written_sectors(be->be_inval, offset, length); | ||
458 | /* We want to add the range to commit list, but it must be | ||
459 | * block-normalized, and verified that the normalized range has | ||
460 | * been entirely written to disk. | ||
461 | */ | ||
462 | new->bse_f_offset = offset; | ||
463 | offset = normalize(offset, bl->bl_blocksize); | ||
464 | if (offset < new->bse_f_offset) { | ||
465 | if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | ||
466 | new->bse_f_offset = offset; | ||
467 | else | ||
468 | new->bse_f_offset = offset + bl->bl_blocksize; | ||
469 | } | ||
470 | new_end = normalize_up(end, bl->bl_blocksize); | ||
471 | if (end < new_end) { | ||
472 | if (is_range_written(be->be_inval, end, new_end)) | ||
473 | end = new_end; | ||
474 | else | ||
475 | end = new_end - bl->bl_blocksize; | ||
476 | } | ||
477 | if (end <= new->bse_f_offset) { | ||
478 | kfree(new); | ||
479 | return 0; | ||
480 | } | ||
481 | new->bse_length = end - new->bse_f_offset; | ||
482 | new->bse_devid = be->be_devid; | ||
483 | new->bse_mdev = be->be_mdev; | ||
484 | |||
485 | spin_lock(&bl->bl_ext_lock); | ||
486 | /* new will be freed, either by add_to_commitlist if it decides not | ||
487 | * to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
488 | */ | ||
489 | add_to_commitlist(bl, new); | ||
490 | spin_unlock(&bl->bl_ext_lock); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | static void print_bl_extent(struct pnfs_block_extent *be) | ||
495 | { | ||
496 | dprintk("PRINT EXTENT extent %p\n", be); | ||
497 | if (be) { | ||
498 | dprintk(" be_f_offset %llu\n", (u64)be->be_f_offset); | ||
499 | dprintk(" be_length %llu\n", (u64)be->be_length); | ||
500 | dprintk(" be_v_offset %llu\n", (u64)be->be_v_offset); | ||
501 | dprintk(" be_state %d\n", be->be_state); | ||
502 | } | ||
503 | } | ||
504 | |||
505 | static void | ||
506 | destroy_extent(struct kref *kref) | ||
507 | { | ||
508 | struct pnfs_block_extent *be; | ||
509 | |||
510 | be = container_of(kref, struct pnfs_block_extent, be_refcnt); | ||
511 | dprintk("%s be=%p\n", __func__, be); | ||
512 | kfree(be); | ||
513 | } | ||
514 | |||
515 | void | ||
516 | bl_put_extent(struct pnfs_block_extent *be) | ||
517 | { | ||
518 | if (be) { | ||
519 | dprintk("%s enter %p (%i)\n", __func__, be, | ||
520 | atomic_read(&be->be_refcnt.refcount)); | ||
521 | kref_put(&be->be_refcnt, destroy_extent); | ||
522 | } | ||
523 | } | ||
524 | |||
525 | struct pnfs_block_extent *bl_alloc_extent(void) | ||
526 | { | ||
527 | struct pnfs_block_extent *be; | ||
528 | |||
529 | be = kmalloc(sizeof(struct pnfs_block_extent), GFP_NOFS); | ||
530 | if (!be) | ||
531 | return NULL; | ||
532 | INIT_LIST_HEAD(&be->be_node); | ||
533 | kref_init(&be->be_refcnt); | ||
534 | be->be_inval = NULL; | ||
535 | return be; | ||
536 | } | ||
537 | |||
538 | static void print_elist(struct list_head *list) | ||
539 | { | ||
540 | struct pnfs_block_extent *be; | ||
541 | dprintk("****************\n"); | ||
542 | dprintk("Extent list looks like:\n"); | ||
543 | list_for_each_entry(be, list, be_node) { | ||
544 | print_bl_extent(be); | ||
545 | } | ||
546 | dprintk("****************\n"); | ||
547 | } | ||
548 | |||
549 | static inline int | ||
550 | extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new) | ||
551 | { | ||
552 | /* Note this assumes new->be_f_offset >= old->be_f_offset */ | ||
553 | return (new->be_state == old->be_state) && | ||
554 | ((new->be_state == PNFS_BLOCK_NONE_DATA) || | ||
555 | ((new->be_v_offset - old->be_v_offset == | ||
556 | new->be_f_offset - old->be_f_offset) && | ||
557 | new->be_mdev == old->be_mdev)); | ||
558 | } | ||
559 | |||
560 | /* Adds new to appropriate list in bl, modifying new and removing existing | ||
561 | * extents as appropriate to deal with overlaps. | ||
562 | * | ||
563 | * See bl_find_get_extent for list constraints. | ||
564 | * | ||
565 | * Refcount on new is already set. If end up not using it, or error out, | ||
566 | * need to put the reference. | ||
567 | * | ||
568 | * bl->bl_ext_lock is held by caller. | ||
569 | */ | ||
570 | int | ||
571 | bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
572 | struct pnfs_block_extent *new) | ||
573 | { | ||
574 | struct pnfs_block_extent *be, *tmp; | ||
575 | sector_t end = new->be_f_offset + new->be_length; | ||
576 | struct list_head *list; | ||
577 | |||
578 | dprintk("%s enter with be=%p\n", __func__, new); | ||
579 | print_bl_extent(new); | ||
580 | list = &bl->bl_extents[bl_choose_list(new->be_state)]; | ||
581 | print_elist(list); | ||
582 | |||
583 | /* Scan for proper place to insert, extending new to the left | ||
584 | * as much as possible. | ||
585 | */ | ||
586 | list_for_each_entry_safe_reverse(be, tmp, list, be_node) { | ||
587 | if (new->be_f_offset >= be->be_f_offset + be->be_length) | ||
588 | break; | ||
589 | if (new->be_f_offset >= be->be_f_offset) { | ||
590 | if (end <= be->be_f_offset + be->be_length) { | ||
591 | /* new is a subset of existing be*/ | ||
592 | if (extents_consistent(be, new)) { | ||
593 | dprintk("%s: new is subset, ignoring\n", | ||
594 | __func__); | ||
595 | bl_put_extent(new); | ||
596 | return 0; | ||
597 | } else { | ||
598 | goto out_err; | ||
599 | } | ||
600 | } else { | ||
601 | /* |<-- be -->| | ||
602 | * |<-- new -->| */ | ||
603 | if (extents_consistent(be, new)) { | ||
604 | /* extend new to fully replace be */ | ||
605 | new->be_length += new->be_f_offset - | ||
606 | be->be_f_offset; | ||
607 | new->be_f_offset = be->be_f_offset; | ||
608 | new->be_v_offset = be->be_v_offset; | ||
609 | dprintk("%s: removing %p\n", __func__, be); | ||
610 | list_del(&be->be_node); | ||
611 | bl_put_extent(be); | ||
612 | } else { | ||
613 | goto out_err; | ||
614 | } | ||
615 | } | ||
616 | } else if (end >= be->be_f_offset + be->be_length) { | ||
617 | /* new extent overlap existing be */ | ||
618 | if (extents_consistent(be, new)) { | ||
619 | /* extend new to fully replace be */ | ||
620 | dprintk("%s: removing %p\n", __func__, be); | ||
621 | list_del(&be->be_node); | ||
622 | bl_put_extent(be); | ||
623 | } else { | ||
624 | goto out_err; | ||
625 | } | ||
626 | } else if (end > be->be_f_offset) { | ||
627 | /* |<-- be -->| | ||
628 | *|<-- new -->| */ | ||
629 | if (extents_consistent(new, be)) { | ||
630 | /* extend new to fully replace be */ | ||
631 | new->be_length += be->be_f_offset + be->be_length - | ||
632 | new->be_f_offset - new->be_length; | ||
633 | dprintk("%s: removing %p\n", __func__, be); | ||
634 | list_del(&be->be_node); | ||
635 | bl_put_extent(be); | ||
636 | } else { | ||
637 | goto out_err; | ||
638 | } | ||
639 | } | ||
640 | } | ||
641 | /* Note that if we never hit the above break, be will not point to a | ||
642 | * valid extent. However, in that case &be->be_node==list. | ||
643 | */ | ||
644 | list_add(&new->be_node, &be->be_node); | ||
645 | dprintk("%s: inserting new\n", __func__); | ||
646 | print_elist(list); | ||
647 | /* FIXME - The per-list consistency checks have all been done, | ||
648 | * should now check cross-list consistency. | ||
649 | */ | ||
650 | return 0; | ||
651 | |||
652 | out_err: | ||
653 | bl_put_extent(new); | ||
654 | return -EIO; | ||
655 | } | ||
656 | |||
657 | /* Returns extent, or NULL. If a second READ extent exists, it is returned | ||
658 | * in cow_read, if given. | ||
659 | * | ||
660 | * The extents are kept in two seperate ordered lists, one for READ and NONE, | ||
661 | * one for READWRITE and INVALID. Within each list, we assume: | ||
662 | * 1. Extents are ordered by file offset. | ||
663 | * 2. For any given isect, there is at most one extents that matches. | ||
664 | */ | ||
665 | struct pnfs_block_extent * | ||
666 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | ||
667 | struct pnfs_block_extent **cow_read) | ||
668 | { | ||
669 | struct pnfs_block_extent *be, *cow, *ret; | ||
670 | int i; | ||
671 | |||
672 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
673 | cow = ret = NULL; | ||
674 | spin_lock(&bl->bl_ext_lock); | ||
675 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
676 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
677 | if (isect >= be->be_f_offset + be->be_length) | ||
678 | break; | ||
679 | if (isect >= be->be_f_offset) { | ||
680 | /* We have found an extent */ | ||
681 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
682 | atomic_read(&be->be_refcnt.refcount)); | ||
683 | kref_get(&be->be_refcnt); | ||
684 | if (!ret) | ||
685 | ret = be; | ||
686 | else if (be->be_state != PNFS_BLOCK_READ_DATA) | ||
687 | bl_put_extent(be); | ||
688 | else | ||
689 | cow = be; | ||
690 | break; | ||
691 | } | ||
692 | } | ||
693 | if (ret && | ||
694 | (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) | ||
695 | break; | ||
696 | } | ||
697 | spin_unlock(&bl->bl_ext_lock); | ||
698 | if (cow_read) | ||
699 | *cow_read = cow; | ||
700 | print_bl_extent(ret); | ||
701 | return ret; | ||
702 | } | ||
703 | |||
704 | /* Similar to bl_find_get_extent, but called with lock held, and ignores cow */ | ||
705 | static struct pnfs_block_extent * | ||
706 | bl_find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) | ||
707 | { | ||
708 | struct pnfs_block_extent *be, *ret = NULL; | ||
709 | int i; | ||
710 | |||
711 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
712 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
713 | if (ret) | ||
714 | break; | ||
715 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
716 | if (isect >= be->be_f_offset + be->be_length) | ||
717 | break; | ||
718 | if (isect >= be->be_f_offset) { | ||
719 | /* We have found an extent */ | ||
720 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
721 | atomic_read(&be->be_refcnt.refcount)); | ||
722 | kref_get(&be->be_refcnt); | ||
723 | ret = be; | ||
724 | break; | ||
725 | } | ||
726 | } | ||
727 | } | ||
728 | print_bl_extent(ret); | ||
729 | return ret; | ||
730 | } | ||
731 | |||
732 | int | ||
733 | encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
734 | struct xdr_stream *xdr, | ||
735 | const struct nfs4_layoutcommit_args *arg) | ||
736 | { | ||
737 | struct pnfs_block_short_extent *lce, *save; | ||
738 | unsigned int count = 0; | ||
739 | __be32 *p, *xdr_start; | ||
740 | |||
741 | dprintk("%s enter\n", __func__); | ||
742 | /* BUG - creation of bl_commit is buggy - need to wait for | ||
743 | * entire block to be marked WRITTEN before it can be added. | ||
744 | */ | ||
745 | spin_lock(&bl->bl_ext_lock); | ||
746 | /* Want to adjust for possible truncate */ | ||
747 | /* We now want to adjust argument range */ | ||
748 | |||
749 | /* XDR encode the ranges found */ | ||
750 | xdr_start = xdr_reserve_space(xdr, 8); | ||
751 | if (!xdr_start) | ||
752 | goto out; | ||
753 | list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) { | ||
754 | p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data)); | ||
755 | if (!p) | ||
756 | break; | ||
757 | p = xdr_encode_opaque_fixed(p, lce->bse_devid.data, NFS4_DEVICEID4_SIZE); | ||
758 | p = xdr_encode_hyper(p, lce->bse_f_offset << SECTOR_SHIFT); | ||
759 | p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); | ||
760 | p = xdr_encode_hyper(p, 0LL); | ||
761 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
762 | list_del(&lce->bse_node); | ||
763 | list_add_tail(&lce->bse_node, &bl->bl_committing); | ||
764 | bl->bl_count--; | ||
765 | count++; | ||
766 | } | ||
767 | xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4); | ||
768 | xdr_start[1] = cpu_to_be32(count); | ||
769 | out: | ||
770 | spin_unlock(&bl->bl_ext_lock); | ||
771 | dprintk("%s found %i ranges\n", __func__, count); | ||
772 | return 0; | ||
773 | } | ||
774 | |||
775 | /* Helper function to set_to_rw that initialize a new extent */ | ||
776 | static void | ||
777 | _prep_new_extent(struct pnfs_block_extent *new, | ||
778 | struct pnfs_block_extent *orig, | ||
779 | sector_t offset, sector_t length, int state) | ||
780 | { | ||
781 | kref_init(&new->be_refcnt); | ||
782 | /* don't need to INIT_LIST_HEAD(&new->be_node) */ | ||
783 | memcpy(&new->be_devid, &orig->be_devid, sizeof(struct nfs4_deviceid)); | ||
784 | new->be_mdev = orig->be_mdev; | ||
785 | new->be_f_offset = offset; | ||
786 | new->be_length = length; | ||
787 | new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset; | ||
788 | new->be_state = state; | ||
789 | new->be_inval = orig->be_inval; | ||
790 | } | ||
791 | |||
792 | /* Tries to merge be with extent in front of it in list. | ||
793 | * Frees storage if not used. | ||
794 | */ | ||
795 | static struct pnfs_block_extent * | ||
796 | _front_merge(struct pnfs_block_extent *be, struct list_head *head, | ||
797 | struct pnfs_block_extent *storage) | ||
798 | { | ||
799 | struct pnfs_block_extent *prev; | ||
800 | |||
801 | if (!storage) | ||
802 | goto no_merge; | ||
803 | if (&be->be_node == head || be->be_node.prev == head) | ||
804 | goto no_merge; | ||
805 | prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node); | ||
806 | if ((prev->be_f_offset + prev->be_length != be->be_f_offset) || | ||
807 | !extents_consistent(prev, be)) | ||
808 | goto no_merge; | ||
809 | _prep_new_extent(storage, prev, prev->be_f_offset, | ||
810 | prev->be_length + be->be_length, prev->be_state); | ||
811 | list_replace(&prev->be_node, &storage->be_node); | ||
812 | bl_put_extent(prev); | ||
813 | list_del(&be->be_node); | ||
814 | bl_put_extent(be); | ||
815 | return storage; | ||
816 | |||
817 | no_merge: | ||
818 | kfree(storage); | ||
819 | return be; | ||
820 | } | ||
821 | |||
822 | static u64 | ||
823 | set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length) | ||
824 | { | ||
825 | u64 rv = offset + length; | ||
826 | struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old; | ||
827 | struct pnfs_block_extent *children[3]; | ||
828 | struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL; | ||
829 | int i = 0, j; | ||
830 | |||
831 | dprintk("%s(%llu, %llu)\n", __func__, offset, length); | ||
832 | /* Create storage for up to three new extents e1, e2, e3 */ | ||
833 | e1 = kmalloc(sizeof(*e1), GFP_ATOMIC); | ||
834 | e2 = kmalloc(sizeof(*e2), GFP_ATOMIC); | ||
835 | e3 = kmalloc(sizeof(*e3), GFP_ATOMIC); | ||
836 | /* BUG - we are ignoring any failure */ | ||
837 | if (!e1 || !e2 || !e3) | ||
838 | goto out_nosplit; | ||
839 | |||
840 | spin_lock(&bl->bl_ext_lock); | ||
841 | be = bl_find_get_extent_locked(bl, offset); | ||
842 | rv = be->be_f_offset + be->be_length; | ||
843 | if (be->be_state != PNFS_BLOCK_INVALID_DATA) { | ||
844 | spin_unlock(&bl->bl_ext_lock); | ||
845 | goto out_nosplit; | ||
846 | } | ||
847 | /* Add e* to children, bumping e*'s krefs */ | ||
848 | if (be->be_f_offset != offset) { | ||
849 | _prep_new_extent(e1, be, be->be_f_offset, | ||
850 | offset - be->be_f_offset, | ||
851 | PNFS_BLOCK_INVALID_DATA); | ||
852 | children[i++] = e1; | ||
853 | print_bl_extent(e1); | ||
854 | } else | ||
855 | merge1 = e1; | ||
856 | _prep_new_extent(e2, be, offset, | ||
857 | min(length, be->be_f_offset + be->be_length - offset), | ||
858 | PNFS_BLOCK_READWRITE_DATA); | ||
859 | children[i++] = e2; | ||
860 | print_bl_extent(e2); | ||
861 | if (offset + length < be->be_f_offset + be->be_length) { | ||
862 | _prep_new_extent(e3, be, e2->be_f_offset + e2->be_length, | ||
863 | be->be_f_offset + be->be_length - | ||
864 | offset - length, | ||
865 | PNFS_BLOCK_INVALID_DATA); | ||
866 | children[i++] = e3; | ||
867 | print_bl_extent(e3); | ||
868 | } else | ||
869 | merge2 = e3; | ||
870 | |||
871 | /* Remove be from list, and insert the e* */ | ||
872 | /* We don't get refs on e*, since this list is the base reference | ||
873 | * set when init'ed. | ||
874 | */ | ||
875 | if (i < 3) | ||
876 | children[i] = NULL; | ||
877 | new = children[0]; | ||
878 | list_replace(&be->be_node, &new->be_node); | ||
879 | bl_put_extent(be); | ||
880 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1); | ||
881 | for (j = 1; j < i; j++) { | ||
882 | old = new; | ||
883 | new = children[j]; | ||
884 | list_add(&new->be_node, &old->be_node); | ||
885 | } | ||
886 | if (merge2) { | ||
887 | /* This is a HACK, should just create a _back_merge function */ | ||
888 | new = list_entry(new->be_node.next, | ||
889 | struct pnfs_block_extent, be_node); | ||
890 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2); | ||
891 | } | ||
892 | spin_unlock(&bl->bl_ext_lock); | ||
893 | |||
894 | /* Since we removed the base reference above, be is now scheduled for | ||
895 | * destruction. | ||
896 | */ | ||
897 | bl_put_extent(be); | ||
898 | dprintk("%s returns %llu after split\n", __func__, rv); | ||
899 | return rv; | ||
900 | |||
901 | out_nosplit: | ||
902 | kfree(e1); | ||
903 | kfree(e2); | ||
904 | kfree(e3); | ||
905 | dprintk("%s returns %llu without splitting\n", __func__, rv); | ||
906 | return rv; | ||
907 | } | ||
908 | |||
909 | void | ||
910 | clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
911 | const struct nfs4_layoutcommit_args *arg, | ||
912 | int status) | ||
913 | { | ||
914 | struct pnfs_block_short_extent *lce, *save; | ||
915 | |||
916 | dprintk("%s status %d\n", __func__, status); | ||
917 | list_for_each_entry_safe(lce, save, &bl->bl_committing, bse_node) { | ||
918 | if (likely(!status)) { | ||
919 | u64 offset = lce->bse_f_offset; | ||
920 | u64 end = offset + lce->bse_length; | ||
921 | |||
922 | do { | ||
923 | offset = set_to_rw(bl, offset, end - offset); | ||
924 | } while (offset < end); | ||
925 | list_del(&lce->bse_node); | ||
926 | |||
927 | kfree(lce); | ||
928 | } else { | ||
929 | list_del(&lce->bse_node); | ||
930 | spin_lock(&bl->bl_ext_lock); | ||
931 | add_to_commitlist(bl, lce); | ||
932 | spin_unlock(&bl->bl_ext_lock); | ||
933 | } | ||
934 | } | ||
935 | } | ||
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 76f856e284e4..7cf6cafcc007 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/completion.h> | 7 | #include <linux/completion.h> |
8 | #include <linux/sunrpc/cache.h> | 8 | #include <linux/sunrpc/cache.h> |
9 | #include <asm/atomic.h> | 9 | #include <linux/atomic.h> |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * Deferred request handling | 12 | * Deferred request handling |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383bb565..07df5f1d85e5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum { | |||
38 | struct cb_process_state { | 38 | struct cb_process_state { |
39 | __be32 drc_status; | 39 | __be32 drc_status; |
40 | struct nfs_client *clp; | 40 | struct nfs_client *clp; |
41 | int slotid; | ||
41 | }; | 42 | }; |
42 | 43 | ||
43 | struct cb_compound_hdr_arg { | 44 | struct cb_compound_hdr_arg { |
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( | |||
166 | void *dummy, struct cb_process_state *cps); | 167 | void *dummy, struct cb_process_state *cps); |
167 | 168 | ||
168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); | 169 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); |
169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); | ||
170 | 170 | ||
171 | struct cb_devicenotifyitem { | 171 | struct cb_devicenotifyitem { |
172 | uint32_t cbd_notify_type; | 172 | uint32_t cbd_notify_type; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954e9bb9..43926add945b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf | |||
111 | static u32 initiate_file_draining(struct nfs_client *clp, | 111 | static u32 initiate_file_draining(struct nfs_client *clp, |
112 | struct cb_layoutrecallargs *args) | 112 | struct cb_layoutrecallargs *args) |
113 | { | 113 | { |
114 | struct nfs_server *server; | ||
114 | struct pnfs_layout_hdr *lo; | 115 | struct pnfs_layout_hdr *lo; |
115 | struct inode *ino; | 116 | struct inode *ino; |
116 | bool found = false; | 117 | bool found = false; |
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
118 | LIST_HEAD(free_me_list); | 119 | LIST_HEAD(free_me_list); |
119 | 120 | ||
120 | spin_lock(&clp->cl_lock); | 121 | spin_lock(&clp->cl_lock); |
121 | list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { | 122 | rcu_read_lock(); |
122 | if (nfs_compare_fh(&args->cbl_fh, | 123 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
123 | &NFS_I(lo->plh_inode)->fh)) | 124 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
124 | continue; | 125 | if (nfs_compare_fh(&args->cbl_fh, |
125 | ino = igrab(lo->plh_inode); | 126 | &NFS_I(lo->plh_inode)->fh)) |
126 | if (!ino) | 127 | continue; |
127 | continue; | 128 | ino = igrab(lo->plh_inode); |
128 | found = true; | 129 | if (!ino) |
129 | /* Without this, layout can be freed as soon | 130 | continue; |
130 | * as we release cl_lock. | 131 | found = true; |
131 | */ | 132 | /* Without this, layout can be freed as soon |
132 | get_layout_hdr(lo); | 133 | * as we release cl_lock. |
133 | break; | 134 | */ |
135 | get_layout_hdr(lo); | ||
136 | break; | ||
137 | } | ||
138 | if (found) | ||
139 | break; | ||
134 | } | 140 | } |
141 | rcu_read_unlock(); | ||
135 | spin_unlock(&clp->cl_lock); | 142 | spin_unlock(&clp->cl_lock); |
143 | |||
136 | if (!found) | 144 | if (!found) |
137 | return NFS4ERR_NOMATCHING_LAYOUT; | 145 | return NFS4ERR_NOMATCHING_LAYOUT; |
138 | 146 | ||
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
154 | static u32 initiate_bulk_draining(struct nfs_client *clp, | 162 | static u32 initiate_bulk_draining(struct nfs_client *clp, |
155 | struct cb_layoutrecallargs *args) | 163 | struct cb_layoutrecallargs *args) |
156 | { | 164 | { |
165 | struct nfs_server *server; | ||
157 | struct pnfs_layout_hdr *lo; | 166 | struct pnfs_layout_hdr *lo; |
158 | struct inode *ino; | 167 | struct inode *ino; |
159 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; | 168 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; |
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, | |||
167 | }; | 176 | }; |
168 | 177 | ||
169 | spin_lock(&clp->cl_lock); | 178 | spin_lock(&clp->cl_lock); |
170 | list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { | 179 | rcu_read_lock(); |
180 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | ||
171 | if ((args->cbl_recall_type == RETURN_FSID) && | 181 | if ((args->cbl_recall_type == RETURN_FSID) && |
172 | memcmp(&NFS_SERVER(lo->plh_inode)->fsid, | 182 | memcmp(&server->fsid, &args->cbl_fsid, |
173 | &args->cbl_fsid, sizeof(struct nfs_fsid))) | 183 | sizeof(struct nfs_fsid))) |
174 | continue; | ||
175 | if (!igrab(lo->plh_inode)) | ||
176 | continue; | 184 | continue; |
177 | get_layout_hdr(lo); | 185 | |
178 | BUG_ON(!list_empty(&lo->plh_bulk_recall)); | 186 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
179 | list_add(&lo->plh_bulk_recall, &recall_list); | 187 | if (!igrab(lo->plh_inode)) |
188 | continue; | ||
189 | get_layout_hdr(lo); | ||
190 | BUG_ON(!list_empty(&lo->plh_bulk_recall)); | ||
191 | list_add(&lo->plh_bulk_recall, &recall_list); | ||
192 | } | ||
180 | } | 193 | } |
194 | rcu_read_unlock(); | ||
181 | spin_unlock(&clp->cl_lock); | 195 | spin_unlock(&clp->cl_lock); |
196 | |||
182 | list_for_each_entry_safe(lo, tmp, | 197 | list_for_each_entry_safe(lo, tmp, |
183 | &recall_list, plh_bulk_recall) { | 198 | &recall_list, plh_bulk_recall) { |
184 | ino = lo->plh_inode; | 199 | ino = lo->plh_inode; |
@@ -333,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
333 | /* Normal */ | 348 | /* Normal */ |
334 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { | 349 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { |
335 | slot->seq_nr++; | 350 | slot->seq_nr++; |
336 | return htonl(NFS4_OK); | 351 | goto out_ok; |
337 | } | 352 | } |
338 | 353 | ||
339 | /* Replay */ | 354 | /* Replay */ |
@@ -352,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
352 | /* Wraparound */ | 367 | /* Wraparound */ |
353 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { | 368 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { |
354 | slot->seq_nr = 1; | 369 | slot->seq_nr = 1; |
355 | return htonl(NFS4_OK); | 370 | goto out_ok; |
356 | } | 371 | } |
357 | 372 | ||
358 | /* Misordered request */ | 373 | /* Misordered request */ |
359 | return htonl(NFS4ERR_SEQ_MISORDERED); | 374 | return htonl(NFS4ERR_SEQ_MISORDERED); |
375 | out_ok: | ||
376 | tbl->highest_used_slotid = args->csa_slotid; | ||
377 | return htonl(NFS4_OK); | ||
360 | } | 378 | } |
361 | 379 | ||
362 | /* | 380 | /* |
@@ -418,26 +436,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
418 | struct cb_sequenceres *res, | 436 | struct cb_sequenceres *res, |
419 | struct cb_process_state *cps) | 437 | struct cb_process_state *cps) |
420 | { | 438 | { |
439 | struct nfs4_slot_table *tbl; | ||
421 | struct nfs_client *clp; | 440 | struct nfs_client *clp; |
422 | int i; | 441 | int i; |
423 | __be32 status = htonl(NFS4ERR_BADSESSION); | 442 | __be32 status = htonl(NFS4ERR_BADSESSION); |
424 | 443 | ||
425 | cps->clp = NULL; | ||
426 | |||
427 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); | 444 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); |
428 | if (clp == NULL) | 445 | if (clp == NULL) |
429 | goto out; | 446 | goto out; |
430 | 447 | ||
448 | tbl = &clp->cl_session->bc_slot_table; | ||
449 | |||
450 | spin_lock(&tbl->slot_tbl_lock); | ||
431 | /* state manager is resetting the session */ | 451 | /* state manager is resetting the session */ |
432 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { | 452 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { |
433 | status = NFS4ERR_DELAY; | 453 | spin_unlock(&tbl->slot_tbl_lock); |
454 | status = htonl(NFS4ERR_DELAY); | ||
455 | /* Return NFS4ERR_BADSESSION if we're draining the session | ||
456 | * in order to reset it. | ||
457 | */ | ||
458 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
459 | status = htonl(NFS4ERR_BADSESSION); | ||
434 | goto out; | 460 | goto out; |
435 | } | 461 | } |
436 | 462 | ||
437 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); | 463 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); |
464 | spin_unlock(&tbl->slot_tbl_lock); | ||
438 | if (status) | 465 | if (status) |
439 | goto out; | 466 | goto out; |
440 | 467 | ||
468 | cps->slotid = args->csa_slotid; | ||
469 | |||
441 | /* | 470 | /* |
442 | * Check for pending referring calls. If a match is found, a | 471 | * Check for pending referring calls. If a match is found, a |
443 | * related callback was received before the response to the original | 472 | * related callback was received before the response to the original |
@@ -454,7 +483,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
454 | res->csr_slotid = args->csa_slotid; | 483 | res->csr_slotid = args->csa_slotid; |
455 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 484 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
456 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 485 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
457 | nfs4_cb_take_slot(clp); | ||
458 | 486 | ||
459 | out: | 487 | out: |
460 | cps->clp = clp; /* put in nfs4_callback_compound */ | 488 | cps->clp = clp; /* put in nfs4_callback_compound */ |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a77e043..918ad647afea 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) | |||
754 | * Let the state manager know callback processing done. | 754 | * Let the state manager know callback processing done. |
755 | * A single slot, so highest used slotid is either 0 or -1 | 755 | * A single slot, so highest used slotid is either 0 or -1 |
756 | */ | 756 | */ |
757 | tbl->highest_used_slotid--; | 757 | tbl->highest_used_slotid = -1; |
758 | nfs4_check_drain_bc_complete(session); | 758 | nfs4_check_drain_bc_complete(session); |
759 | spin_unlock(&tbl->slot_tbl_lock); | 759 | spin_unlock(&tbl->slot_tbl_lock); |
760 | } | 760 | } |
761 | 761 | ||
762 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 762 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
763 | { | 763 | { |
764 | if (clp && clp->cl_session) | 764 | if (cps->slotid != -1) |
765 | nfs4_callback_free_slot(clp->cl_session); | 765 | nfs4_callback_free_slot(cps->clp->cl_session); |
766 | } | ||
767 | |||
768 | /* A single slot, so highest used slotid is either 0 or -1 */ | ||
769 | void nfs4_cb_take_slot(struct nfs_client *clp) | ||
770 | { | ||
771 | struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; | ||
772 | |||
773 | spin_lock(&tbl->slot_tbl_lock); | ||
774 | tbl->highest_used_slotid++; | ||
775 | BUG_ON(tbl->highest_used_slotid != 0); | ||
776 | spin_unlock(&tbl->slot_tbl_lock); | ||
777 | } | 766 | } |
778 | 767 | ||
779 | #else /* CONFIG_NFS_V4_1 */ | 768 | #else /* CONFIG_NFS_V4_1 */ |
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) | |||
784 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); | 773 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); |
785 | } | 774 | } |
786 | 775 | ||
787 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 776 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
788 | { | 777 | { |
789 | } | 778 | } |
790 | #endif /* CONFIG_NFS_V4_1 */ | 779 | #endif /* CONFIG_NFS_V4_1 */ |
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
866 | struct cb_process_state cps = { | 855 | struct cb_process_state cps = { |
867 | .drc_status = 0, | 856 | .drc_status = 0, |
868 | .clp = NULL, | 857 | .clp = NULL, |
858 | .slotid = -1, | ||
869 | }; | 859 | }; |
870 | unsigned int nops = 0; | 860 | unsigned int nops = 0; |
871 | 861 | ||
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
906 | 896 | ||
907 | *hdr_res.status = status; | 897 | *hdr_res.status = status; |
908 | *hdr_res.nops = htonl(nops); | 898 | *hdr_res.nops = htonl(nops); |
909 | nfs4_cb_free_slot(cps.clp); | 899 | nfs4_cb_free_slot(&cps); |
910 | nfs_put_client(cps.clp); | 900 | nfs_put_client(cps.clp); |
911 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); | 901 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); |
912 | return rpc_success; | 902 | return rpc_success; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b88b65b..5833fbbf59b0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -105,7 +105,7 @@ struct rpc_program nfs_program = { | |||
105 | .nrvers = ARRAY_SIZE(nfs_version), | 105 | .nrvers = ARRAY_SIZE(nfs_version), |
106 | .version = nfs_version, | 106 | .version = nfs_version, |
107 | .stats = &nfs_rpcstat, | 107 | .stats = &nfs_rpcstat, |
108 | .pipe_dir_name = "/nfs", | 108 | .pipe_dir_name = NFS_PIPE_DIRNAME, |
109 | }; | 109 | }; |
110 | 110 | ||
111 | struct rpc_stat nfs_rpcstat = { | 111 | struct rpc_stat nfs_rpcstat = { |
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
188 | cred = rpc_lookup_machine_cred(); | 188 | cred = rpc_lookup_machine_cred(); |
189 | if (!IS_ERR(cred)) | 189 | if (!IS_ERR(cred)) |
190 | clp->cl_machine_cred = cred; | 190 | clp->cl_machine_cred = cred; |
191 | #if defined(CONFIG_NFS_V4_1) | ||
192 | INIT_LIST_HEAD(&clp->cl_layouts); | ||
193 | #endif | ||
194 | nfs_fscache_get_client_cookie(clp); | 191 | nfs_fscache_get_client_cookie(clp); |
195 | 192 | ||
196 | return clp; | 193 | return clp; |
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp) | |||
293 | nfs4_deviceid_purge_client(clp); | 290 | nfs4_deviceid_purge_client(clp); |
294 | 291 | ||
295 | kfree(clp->cl_hostname); | 292 | kfree(clp->cl_hostname); |
293 | kfree(clp->server_scope); | ||
296 | kfree(clp); | 294 | kfree(clp); |
297 | 295 | ||
298 | dprintk("<-- nfs_free_client()\n"); | 296 | dprintk("<-- nfs_free_client()\n"); |
@@ -906,7 +904,9 @@ error: | |||
906 | /* | 904 | /* |
907 | * Load up the server record from information gained in an fsinfo record | 905 | * Load up the server record from information gained in an fsinfo record |
908 | */ | 906 | */ |
909 | static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) | 907 | static void nfs_server_set_fsinfo(struct nfs_server *server, |
908 | struct nfs_fh *mntfh, | ||
909 | struct nfs_fsinfo *fsinfo) | ||
910 | { | 910 | { |
911 | unsigned long max_rpc_payload; | 911 | unsigned long max_rpc_payload; |
912 | 912 | ||
@@ -936,7 +936,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * | |||
936 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) | 936 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) |
937 | server->wsize = NFS_MAX_FILE_IO_SIZE; | 937 | server->wsize = NFS_MAX_FILE_IO_SIZE; |
938 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 938 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
939 | set_pnfs_layoutdriver(server, fsinfo->layouttype); | 939 | server->pnfs_blksize = fsinfo->blksize; |
940 | set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); | ||
940 | 941 | ||
941 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); | 942 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); |
942 | 943 | ||
@@ -982,7 +983,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str | |||
982 | if (error < 0) | 983 | if (error < 0) |
983 | goto out_error; | 984 | goto out_error; |
984 | 985 | ||
985 | nfs_server_set_fsinfo(server, &fsinfo); | 986 | nfs_server_set_fsinfo(server, mntfh, &fsinfo); |
986 | 987 | ||
987 | /* Get some general file system info */ | 988 | /* Get some general file system info */ |
988 | if (server->namelen == 0) { | 989 | if (server->namelen == 0) { |
@@ -1062,6 +1063,7 @@ static struct nfs_server *nfs_alloc_server(void) | |||
1062 | INIT_LIST_HEAD(&server->client_link); | 1063 | INIT_LIST_HEAD(&server->client_link); |
1063 | INIT_LIST_HEAD(&server->master_link); | 1064 | INIT_LIST_HEAD(&server->master_link); |
1064 | INIT_LIST_HEAD(&server->delegations); | 1065 | INIT_LIST_HEAD(&server->delegations); |
1066 | INIT_LIST_HEAD(&server->layouts); | ||
1065 | 1067 | ||
1066 | atomic_set(&server->active, 0); | 1068 | atomic_set(&server->active, 0); |
1067 | 1069 | ||
@@ -1464,7 +1466,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | |||
1464 | dprintk("<-- %s %p\n", __func__, clp); | 1466 | dprintk("<-- %s %p\n", __func__, clp); |
1465 | return clp; | 1467 | return clp; |
1466 | } | 1468 | } |
1467 | EXPORT_SYMBOL(nfs4_set_ds_client); | 1469 | EXPORT_SYMBOL_GPL(nfs4_set_ds_client); |
1468 | 1470 | ||
1469 | /* | 1471 | /* |
1470 | * Session has been established, and the client marked ready. | 1472 | * Session has been established, and the client marked ready. |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dd25c2aec375..321a66bc3846 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode) | |||
398 | return err; | 398 | return err; |
399 | } | 399 | } |
400 | 400 | ||
401 | static void nfs_mark_return_delegation(struct nfs_delegation *delegation) | 401 | static void nfs_mark_return_delegation(struct nfs_server *server, |
402 | struct nfs_delegation *delegation) | ||
402 | { | 403 | { |
403 | struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; | ||
404 | |||
405 | set_bit(NFS_DELEGATION_RETURN, &delegation->flags); | 404 | set_bit(NFS_DELEGATION_RETURN, &delegation->flags); |
406 | set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); | 405 | set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); |
407 | } | 406 | } |
408 | 407 | ||
409 | /** | 408 | /** |
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, | |||
441 | if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) | 440 | if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) |
442 | continue; | 441 | continue; |
443 | if (delegation->type & flags) | 442 | if (delegation->type & flags) |
444 | nfs_mark_return_delegation(delegation); | 443 | nfs_mark_return_delegation(server, delegation); |
445 | } | 444 | } |
446 | } | 445 | } |
447 | 446 | ||
@@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) | |||
508 | list_for_each_entry_rcu(delegation, &server->delegations, super_list) { | 507 | list_for_each_entry_rcu(delegation, &server->delegations, super_list) { |
509 | if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) | 508 | if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) |
510 | continue; | 509 | continue; |
511 | nfs_mark_return_delegation(delegation); | 510 | nfs_mark_return_delegation(server, delegation); |
512 | } | 511 | } |
513 | } | 512 | } |
514 | 513 | ||
@@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) | |||
539 | int nfs_async_inode_return_delegation(struct inode *inode, | 538 | int nfs_async_inode_return_delegation(struct inode *inode, |
540 | const nfs4_stateid *stateid) | 539 | const nfs4_stateid *stateid) |
541 | { | 540 | { |
542 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 541 | struct nfs_server *server = NFS_SERVER(inode); |
542 | struct nfs_client *clp = server->nfs_client; | ||
543 | struct nfs_delegation *delegation; | 543 | struct nfs_delegation *delegation; |
544 | 544 | ||
545 | rcu_read_lock(); | 545 | rcu_read_lock(); |
@@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, | |||
549 | rcu_read_unlock(); | 549 | rcu_read_unlock(); |
550 | return -ENOENT; | 550 | return -ENOENT; |
551 | } | 551 | } |
552 | nfs_mark_return_delegation(delegation); | 552 | nfs_mark_return_delegation(server, delegation); |
553 | rcu_read_unlock(); | 553 | rcu_read_unlock(); |
554 | 554 | ||
555 | nfs_delegation_run_state_manager(clp); | 555 | nfs_delegation_run_state_manager(clp); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 57f578e2560a..b238d95ac48c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -134,18 +134,19 @@ const struct inode_operations nfs4_dir_inode_operations = { | |||
134 | 134 | ||
135 | #endif /* CONFIG_NFS_V4 */ | 135 | #endif /* CONFIG_NFS_V4 */ |
136 | 136 | ||
137 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred) | 137 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) |
138 | { | 138 | { |
139 | struct nfs_open_dir_context *ctx; | 139 | struct nfs_open_dir_context *ctx; |
140 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 140 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
141 | if (ctx != NULL) { | 141 | if (ctx != NULL) { |
142 | ctx->duped = 0; | 142 | ctx->duped = 0; |
143 | ctx->attr_gencount = NFS_I(dir)->attr_gencount; | ||
143 | ctx->dir_cookie = 0; | 144 | ctx->dir_cookie = 0; |
144 | ctx->dup_cookie = 0; | 145 | ctx->dup_cookie = 0; |
145 | ctx->cred = get_rpccred(cred); | 146 | ctx->cred = get_rpccred(cred); |
146 | } else | 147 | return ctx; |
147 | ctx = ERR_PTR(-ENOMEM); | 148 | } |
148 | return ctx; | 149 | return ERR_PTR(-ENOMEM); |
149 | } | 150 | } |
150 | 151 | ||
151 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) | 152 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) |
@@ -173,7 +174,7 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
173 | cred = rpc_lookup_cred(); | 174 | cred = rpc_lookup_cred(); |
174 | if (IS_ERR(cred)) | 175 | if (IS_ERR(cred)) |
175 | return PTR_ERR(cred); | 176 | return PTR_ERR(cred); |
176 | ctx = alloc_nfs_open_dir_context(cred); | 177 | ctx = alloc_nfs_open_dir_context(inode, cred); |
177 | if (IS_ERR(ctx)) { | 178 | if (IS_ERR(ctx)) { |
178 | res = PTR_ERR(ctx); | 179 | res = PTR_ERR(ctx); |
179 | goto out; | 180 | goto out; |
@@ -323,7 +324,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
323 | { | 324 | { |
324 | loff_t diff = desc->file->f_pos - desc->current_index; | 325 | loff_t diff = desc->file->f_pos - desc->current_index; |
325 | unsigned int index; | 326 | unsigned int index; |
326 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
327 | 327 | ||
328 | if (diff < 0) | 328 | if (diff < 0) |
329 | goto out_eof; | 329 | goto out_eof; |
@@ -336,7 +336,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
336 | index = (unsigned int)diff; | 336 | index = (unsigned int)diff; |
337 | *desc->dir_cookie = array->array[index].cookie; | 337 | *desc->dir_cookie = array->array[index].cookie; |
338 | desc->cache_entry_index = index; | 338 | desc->cache_entry_index = index; |
339 | ctx->duped = 0; | ||
340 | return 0; | 339 | return 0; |
341 | out_eof: | 340 | out_eof: |
342 | desc->eof = 1; | 341 | desc->eof = 1; |
@@ -349,14 +348,34 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
349 | int i; | 348 | int i; |
350 | loff_t new_pos; | 349 | loff_t new_pos; |
351 | int status = -EAGAIN; | 350 | int status = -EAGAIN; |
352 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
353 | 351 | ||
354 | for (i = 0; i < array->size; i++) { | 352 | for (i = 0; i < array->size; i++) { |
355 | if (array->array[i].cookie == *desc->dir_cookie) { | 353 | if (array->array[i].cookie == *desc->dir_cookie) { |
354 | struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode); | ||
355 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
356 | |||
356 | new_pos = desc->current_index + i; | 357 | new_pos = desc->current_index + i; |
357 | if (new_pos < desc->file->f_pos) { | 358 | if (ctx->attr_gencount != nfsi->attr_gencount |
359 | || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) { | ||
360 | ctx->duped = 0; | ||
361 | ctx->attr_gencount = nfsi->attr_gencount; | ||
362 | } else if (new_pos < desc->file->f_pos) { | ||
363 | if (ctx->duped > 0 | ||
364 | && ctx->dup_cookie == *desc->dir_cookie) { | ||
365 | if (printk_ratelimit()) { | ||
366 | pr_notice("NFS: directory %s/%s contains a readdir loop." | ||
367 | "Please contact your server vendor. " | ||
368 | "The file: %s has duplicate cookie %llu\n", | ||
369 | desc->file->f_dentry->d_parent->d_name.name, | ||
370 | desc->file->f_dentry->d_name.name, | ||
371 | array->array[i].string.name, | ||
372 | *desc->dir_cookie); | ||
373 | } | ||
374 | status = -ELOOP; | ||
375 | goto out; | ||
376 | } | ||
358 | ctx->dup_cookie = *desc->dir_cookie; | 377 | ctx->dup_cookie = *desc->dir_cookie; |
359 | ctx->duped = 1; | 378 | ctx->duped = -1; |
360 | } | 379 | } |
361 | desc->file->f_pos = new_pos; | 380 | desc->file->f_pos = new_pos; |
362 | desc->cache_entry_index = i; | 381 | desc->cache_entry_index = i; |
@@ -368,6 +387,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
368 | if (*desc->dir_cookie == array->last_cookie) | 387 | if (*desc->dir_cookie == array->last_cookie) |
369 | desc->eof = 1; | 388 | desc->eof = 1; |
370 | } | 389 | } |
390 | out: | ||
371 | return status; | 391 | return status; |
372 | } | 392 | } |
373 | 393 | ||
@@ -740,19 +760,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
740 | struct nfs_cache_array *array = NULL; | 760 | struct nfs_cache_array *array = NULL; |
741 | struct nfs_open_dir_context *ctx = file->private_data; | 761 | struct nfs_open_dir_context *ctx = file->private_data; |
742 | 762 | ||
743 | if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) { | ||
744 | if (printk_ratelimit()) { | ||
745 | pr_notice("NFS: directory %s/%s contains a readdir loop. " | ||
746 | "Please contact your server vendor. " | ||
747 | "Offending cookie: %llu\n", | ||
748 | file->f_dentry->d_parent->d_name.name, | ||
749 | file->f_dentry->d_name.name, | ||
750 | *desc->dir_cookie); | ||
751 | } | ||
752 | res = -ELOOP; | ||
753 | goto out; | ||
754 | } | ||
755 | |||
756 | array = nfs_readdir_get_array(desc->page); | 763 | array = nfs_readdir_get_array(desc->page); |
757 | if (IS_ERR(array)) { | 764 | if (IS_ERR(array)) { |
758 | res = PTR_ERR(array); | 765 | res = PTR_ERR(array); |
@@ -774,6 +781,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
774 | *desc->dir_cookie = array->array[i+1].cookie; | 781 | *desc->dir_cookie = array->array[i+1].cookie; |
775 | else | 782 | else |
776 | *desc->dir_cookie = array->last_cookie; | 783 | *desc->dir_cookie = array->last_cookie; |
784 | if (ctx->duped != 0) | ||
785 | ctx->duped = 1; | ||
777 | } | 786 | } |
778 | if (array->eof_index >= 0) | 787 | if (array->eof_index >= 0) |
779 | desc->eof = 1; | 788 | desc->eof = 1; |
@@ -805,6 +814,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
805 | struct page *page = NULL; | 814 | struct page *page = NULL; |
806 | int status; | 815 | int status; |
807 | struct inode *inode = desc->file->f_path.dentry->d_inode; | 816 | struct inode *inode = desc->file->f_path.dentry->d_inode; |
817 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
808 | 818 | ||
809 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", | 819 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", |
810 | (unsigned long long)*desc->dir_cookie); | 820 | (unsigned long long)*desc->dir_cookie); |
@@ -818,6 +828,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
818 | desc->page_index = 0; | 828 | desc->page_index = 0; |
819 | desc->last_cookie = *desc->dir_cookie; | 829 | desc->last_cookie = *desc->dir_cookie; |
820 | desc->page = page; | 830 | desc->page = page; |
831 | ctx->duped = 0; | ||
821 | 832 | ||
822 | status = nfs_readdir_xdr_to_array(desc, page, inode); | 833 | status = nfs_readdir_xdr_to_array(desc, page, inode); |
823 | if (status < 0) | 834 | if (status < 0) |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b35d25b98da6..1940f1a56a5f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -53,7 +53,7 @@ | |||
53 | 53 | ||
54 | #include <asm/system.h> | 54 | #include <asm/system.h> |
55 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
56 | #include <asm/atomic.h> | 56 | #include <linux/atomic.h> |
57 | 57 | ||
58 | #include "internal.h" | 58 | #include "internal.h" |
59 | #include "iostat.h" | 59 | #include "iostat.h" |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347a2daa..ab12913dd473 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb); | |||
277 | extern char *nfs_path(char **p, struct dentry *dentry, | 277 | extern char *nfs_path(char **p, struct dentry *dentry, |
278 | char *buffer, ssize_t buflen); | 278 | char *buffer, ssize_t buflen); |
279 | extern struct vfsmount *nfs_d_automount(struct path *path); | 279 | extern struct vfsmount *nfs_d_automount(struct path *path); |
280 | #ifdef CONFIG_NFS_V4 | ||
281 | rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); | ||
282 | #endif | ||
280 | 283 | ||
281 | /* getroot.c */ | 284 | /* getroot.c */ |
282 | extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, | 285 | extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, |
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, | |||
288 | extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); | 291 | extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); |
289 | #endif | 292 | #endif |
290 | 293 | ||
294 | struct nfs_pageio_descriptor; | ||
291 | /* read.c */ | 295 | /* read.c */ |
292 | extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, | 296 | extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, |
293 | const struct rpc_call_ops *call_ops); | 297 | const struct rpc_call_ops *call_ops); |
294 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | 298 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); |
299 | extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, | ||
300 | struct list_head *head); | ||
301 | |||
302 | extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); | ||
303 | extern void nfs_readdata_release(struct nfs_read_data *rdata); | ||
295 | 304 | ||
296 | /* write.c */ | 305 | /* write.c */ |
306 | extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, | ||
307 | struct list_head *head); | ||
308 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); | ||
309 | extern void nfs_writedata_release(struct nfs_write_data *wdata); | ||
297 | extern void nfs_commit_free(struct nfs_write_data *p); | 310 | extern void nfs_commit_free(struct nfs_write_data *p); |
298 | extern int nfs_initiate_write(struct nfs_write_data *data, | 311 | extern int nfs_initiate_write(struct nfs_write_data *data, |
299 | struct rpc_clnt *clnt, | 312 | struct rpc_clnt *clnt, |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1f063bacd285..8102391bb374 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -119,7 +119,7 @@ Elong: | |||
119 | } | 119 | } |
120 | 120 | ||
121 | #ifdef CONFIG_NFS_V4 | 121 | #ifdef CONFIG_NFS_V4 |
122 | static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) | 122 | rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) |
123 | { | 123 | { |
124 | struct gss_api_mech *mech; | 124 | struct gss_api_mech *mech; |
125 | struct xdr_netobj oid; | 125 | struct xdr_netobj oid; |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index e49e73107e62..7ef23979896d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -415,7 +415,7 @@ fail: | |||
415 | } | 415 | } |
416 | 416 | ||
417 | int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, | 417 | int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, |
418 | mode_t mode) | 418 | umode_t mode) |
419 | { | 419 | { |
420 | struct posix_acl *dfacl, *acl; | 420 | struct posix_acl *dfacl, *acl; |
421 | int error = 0; | 421 | int error = 0; |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 38053d823eb0..85f1690ca08c 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -316,7 +316,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
316 | int flags, struct nfs_open_context *ctx) | 316 | int flags, struct nfs_open_context *ctx) |
317 | { | 317 | { |
318 | struct nfs3_createdata *data; | 318 | struct nfs3_createdata *data; |
319 | mode_t mode = sattr->ia_mode; | 319 | umode_t mode = sattr->ia_mode; |
320 | int status = -ENOMEM; | 320 | int status = -ENOMEM; |
321 | 321 | ||
322 | dprintk("NFS call create %s\n", dentry->d_name.name); | 322 | dprintk("NFS call create %s\n", dentry->d_name.name); |
@@ -562,7 +562,7 @@ static int | |||
562 | nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | 562 | nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) |
563 | { | 563 | { |
564 | struct nfs3_createdata *data; | 564 | struct nfs3_createdata *data; |
565 | int mode = sattr->ia_mode; | 565 | umode_t mode = sattr->ia_mode; |
566 | int status = -ENOMEM; | 566 | int status = -ENOMEM; |
567 | 567 | ||
568 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); | 568 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); |
@@ -681,7 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
681 | dev_t rdev) | 681 | dev_t rdev) |
682 | { | 682 | { |
683 | struct nfs3_createdata *data; | 683 | struct nfs3_createdata *data; |
684 | mode_t mode = sattr->ia_mode; | 684 | umode_t mode = sattr->ia_mode; |
685 | int status = -ENOMEM; | 685 | int status = -ENOMEM; |
686 | 686 | ||
687 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, | 687 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b788f2eb1ba0..1ec1a85fa71c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -48,6 +48,7 @@ enum nfs4_client_state { | |||
48 | NFS4CLNT_SESSION_RESET, | 48 | NFS4CLNT_SESSION_RESET, |
49 | NFS4CLNT_RECALL_SLOT, | 49 | NFS4CLNT_RECALL_SLOT, |
50 | NFS4CLNT_LEASE_CONFIRM, | 50 | NFS4CLNT_LEASE_CONFIRM, |
51 | NFS4CLNT_SERVER_SCOPE_MISMATCH, | ||
51 | }; | 52 | }; |
52 | 53 | ||
53 | enum nfs4_session_state { | 54 | enum nfs4_session_state { |
@@ -66,6 +67,8 @@ struct nfs4_minor_version_ops { | |||
66 | int cache_reply); | 67 | int cache_reply); |
67 | int (*validate_stateid)(struct nfs_delegation *, | 68 | int (*validate_stateid)(struct nfs_delegation *, |
68 | const nfs4_stateid *); | 69 | const nfs4_stateid *); |
70 | int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, | ||
71 | struct nfs_fsinfo *); | ||
69 | const struct nfs4_state_recovery_ops *reboot_recovery_ops; | 72 | const struct nfs4_state_recovery_ops *reboot_recovery_ops; |
70 | const struct nfs4_state_recovery_ops *nograce_recovery_ops; | 73 | const struct nfs4_state_recovery_ops *nograce_recovery_ops; |
71 | const struct nfs4_state_maintenance_ops *state_renewal_ops; | 74 | const struct nfs4_state_maintenance_ops *state_renewal_ops; |
@@ -315,7 +318,7 @@ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; | |||
315 | extern const u32 nfs4_fattr_bitmap[2]; | 318 | extern const u32 nfs4_fattr_bitmap[2]; |
316 | extern const u32 nfs4_statfs_bitmap[2]; | 319 | extern const u32 nfs4_statfs_bitmap[2]; |
317 | extern const u32 nfs4_pathconf_bitmap[2]; | 320 | extern const u32 nfs4_pathconf_bitmap[2]; |
318 | extern const u32 nfs4_fsinfo_bitmap[2]; | 321 | extern const u32 nfs4_fsinfo_bitmap[3]; |
319 | extern const u32 nfs4_fs_locations_bitmap[2]; | 322 | extern const u32 nfs4_fs_locations_bitmap[2]; |
320 | 323 | ||
321 | /* nfs4renewd.c */ | 324 | /* nfs4renewd.c */ |
@@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *); | |||
349 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); | 352 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); |
350 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); | 353 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); |
351 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); | 354 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); |
355 | extern void nfs41_handle_server_scope(struct nfs_client *, | ||
356 | struct server_scope **); | ||
352 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); | 357 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); |
353 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | 358 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); |
354 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); | 359 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f9d03abcd04c..e8915d4840ad 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -170,7 +170,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) | |||
170 | 170 | ||
171 | pnfs_set_layoutcommit(wdata); | 171 | pnfs_set_layoutcommit(wdata); |
172 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, | 172 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, |
173 | (unsigned long) wdata->lseg->pls_end_pos); | 173 | (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); |
174 | } | 174 | } |
175 | 175 | ||
176 | /* | 176 | /* |
@@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
334 | __func__, data->inode->i_ino, | 334 | __func__, data->inode->i_ino, |
335 | data->args.pgbase, (size_t)data->args.count, offset); | 335 | data->args.pgbase, (size_t)data->args.count, offset); |
336 | 336 | ||
337 | if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) | ||
338 | return PNFS_NOT_ATTEMPTED; | ||
339 | |||
337 | /* Retrieve the correct rpc_client for the byte range */ | 340 | /* Retrieve the correct rpc_client for the byte range */ |
338 | j = nfs4_fl_calc_j_index(lseg, offset); | 341 | j = nfs4_fl_calc_j_index(lseg, offset); |
339 | idx = nfs4_fl_calc_ds_index(lseg, j); | 342 | idx = nfs4_fl_calc_ds_index(lseg, j); |
@@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
344 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | 347 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); |
345 | return PNFS_NOT_ATTEMPTED; | 348 | return PNFS_NOT_ATTEMPTED; |
346 | } | 349 | } |
347 | dprintk("%s USE DS:ip %x %hu\n", __func__, | 350 | dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); |
348 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | ||
349 | 351 | ||
350 | /* No multipath support. Use first DS */ | 352 | /* No multipath support. Use first DS */ |
351 | data->ds_clp = ds->ds_clp; | 353 | data->ds_clp = ds->ds_clp; |
@@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
374 | struct nfs_fh *fh; | 376 | struct nfs_fh *fh; |
375 | int status; | 377 | int status; |
376 | 378 | ||
379 | if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) | ||
380 | return PNFS_NOT_ATTEMPTED; | ||
381 | |||
377 | /* Retrieve the correct rpc_client for the byte range */ | 382 | /* Retrieve the correct rpc_client for the byte range */ |
378 | j = nfs4_fl_calc_j_index(lseg, offset); | 383 | j = nfs4_fl_calc_j_index(lseg, offset); |
379 | idx = nfs4_fl_calc_ds_index(lseg, j); | 384 | idx = nfs4_fl_calc_ds_index(lseg, j); |
@@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
384 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | 389 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); |
385 | return PNFS_NOT_ATTEMPTED; | 390 | return PNFS_NOT_ATTEMPTED; |
386 | } | 391 | } |
387 | dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, | 392 | dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, |
388 | data->inode->i_ino, sync, (size_t) data->args.count, offset, | 393 | data->inode->i_ino, sync, (size_t) data->args.count, offset, |
389 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | 394 | ds->ds_remotestr); |
390 | 395 | ||
391 | data->write_done_cb = filelayout_write_done_cb; | 396 | data->write_done_cb = filelayout_write_done_cb; |
392 | data->ds_clp = ds->ds_clp; | 397 | data->ds_clp = ds->ds_clp; |
@@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
428 | 433 | ||
429 | dprintk("--> %s\n", __func__); | 434 | dprintk("--> %s\n", __func__); |
430 | 435 | ||
436 | /* FIXME: remove this check when layout segment support is added */ | ||
437 | if (lgr->range.offset != 0 || | ||
438 | lgr->range.length != NFS4_MAX_UINT64) { | ||
439 | dprintk("%s Only whole file layouts supported. Use MDS i/o\n", | ||
440 | __func__); | ||
441 | goto out; | ||
442 | } | ||
443 | |||
431 | if (fl->pattern_offset > lgr->range.offset) { | 444 | if (fl->pattern_offset > lgr->range.offset) { |
432 | dprintk("%s pattern_offset %lld too large\n", | 445 | dprintk("%s pattern_offset %lld too large\n", |
433 | __func__, fl->pattern_offset); | 446 | __func__, fl->pattern_offset); |
@@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
449 | goto out; | 462 | goto out; |
450 | } else | 463 | } else |
451 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | 464 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
465 | /* Found deviceid is being reaped */ | ||
466 | if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) | ||
467 | goto out_put; | ||
468 | |||
452 | fl->dsaddr = dsaddr; | 469 | fl->dsaddr = dsaddr; |
453 | 470 | ||
454 | if (fl->first_stripe_index < 0 || | 471 | if (fl->first_stripe_index < 0 || |
@@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
659 | * return true : coalesce page | 676 | * return true : coalesce page |
660 | * return false : don't coalesce page | 677 | * return false : don't coalesce page |
661 | */ | 678 | */ |
662 | bool | 679 | static bool |
663 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 680 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
664 | struct nfs_page *req) | 681 | struct nfs_page *req) |
665 | { | 682 | { |
@@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
670 | !nfs_generic_pg_test(pgio, prev, req)) | 687 | !nfs_generic_pg_test(pgio, prev, req)) |
671 | return false; | 688 | return false; |
672 | 689 | ||
673 | if (!pgio->pg_lseg) | ||
674 | return 1; | ||
675 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; | 690 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; |
676 | r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; | 691 | r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; |
677 | stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; | 692 | stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; |
@@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
682 | return (p_stripe == r_stripe); | 697 | return (p_stripe == r_stripe); |
683 | } | 698 | } |
684 | 699 | ||
700 | void | ||
701 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | ||
702 | struct nfs_page *req) | ||
703 | { | ||
704 | BUG_ON(pgio->pg_lseg != NULL); | ||
705 | |||
706 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
707 | req->wb_context, | ||
708 | 0, | ||
709 | NFS4_MAX_UINT64, | ||
710 | IOMODE_READ, | ||
711 | GFP_KERNEL); | ||
712 | /* If no lseg, fall back to read through mds */ | ||
713 | if (pgio->pg_lseg == NULL) | ||
714 | nfs_pageio_reset_read_mds(pgio); | ||
715 | } | ||
716 | |||
717 | void | ||
718 | filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | ||
719 | struct nfs_page *req) | ||
720 | { | ||
721 | BUG_ON(pgio->pg_lseg != NULL); | ||
722 | |||
723 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
724 | req->wb_context, | ||
725 | 0, | ||
726 | NFS4_MAX_UINT64, | ||
727 | IOMODE_RW, | ||
728 | GFP_NOFS); | ||
729 | /* If no lseg, fall back to write through mds */ | ||
730 | if (pgio->pg_lseg == NULL) | ||
731 | nfs_pageio_reset_write_mds(pgio); | ||
732 | } | ||
733 | |||
734 | static const struct nfs_pageio_ops filelayout_pg_read_ops = { | ||
735 | .pg_init = filelayout_pg_init_read, | ||
736 | .pg_test = filelayout_pg_test, | ||
737 | .pg_doio = pnfs_generic_pg_readpages, | ||
738 | }; | ||
739 | |||
740 | static const struct nfs_pageio_ops filelayout_pg_write_ops = { | ||
741 | .pg_init = filelayout_pg_init_write, | ||
742 | .pg_test = filelayout_pg_test, | ||
743 | .pg_doio = pnfs_generic_pg_writepages, | ||
744 | }; | ||
745 | |||
685 | static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) | 746 | static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) |
686 | { | 747 | { |
687 | return !FILELAYOUT_LSEG(lseg)->commit_through_mds; | 748 | return !FILELAYOUT_LSEG(lseg)->commit_through_mds; |
@@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
879 | .owner = THIS_MODULE, | 940 | .owner = THIS_MODULE, |
880 | .alloc_lseg = filelayout_alloc_lseg, | 941 | .alloc_lseg = filelayout_alloc_lseg, |
881 | .free_lseg = filelayout_free_lseg, | 942 | .free_lseg = filelayout_free_lseg, |
882 | .pg_test = filelayout_pg_test, | 943 | .pg_read_ops = &filelayout_pg_read_ops, |
944 | .pg_write_ops = &filelayout_pg_write_ops, | ||
883 | .mark_pnfs_commit = filelayout_mark_pnfs_commit, | 945 | .mark_pnfs_commit = filelayout_mark_pnfs_commit, |
884 | .choose_commit_list = filelayout_choose_commit_list, | 946 | .choose_commit_list = filelayout_choose_commit_list, |
885 | .commit_pagelist = filelayout_commit_pagelist, | 947 | .commit_pagelist = filelayout_commit_pagelist, |
@@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void) | |||
902 | pnfs_unregister_layoutdriver(&filelayout_type); | 964 | pnfs_unregister_layoutdriver(&filelayout_type); |
903 | } | 965 | } |
904 | 966 | ||
967 | MODULE_ALIAS("nfs-layouttype4-1"); | ||
968 | |||
905 | module_init(nfs4filelayout_init); | 969 | module_init(nfs4filelayout_init); |
906 | module_exit(nfs4filelayout_exit); | 970 | module_exit(nfs4filelayout_exit); |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index cebe01e3795e..2e42284253fa 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -47,10 +47,17 @@ enum stripetype4 { | |||
47 | }; | 47 | }; |
48 | 48 | ||
49 | /* Individual ip address */ | 49 | /* Individual ip address */ |
50 | struct nfs4_pnfs_ds_addr { | ||
51 | struct sockaddr_storage da_addr; | ||
52 | size_t da_addrlen; | ||
53 | struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
54 | char *da_remotestr; /* human readable addr+port */ | ||
55 | }; | ||
56 | |||
50 | struct nfs4_pnfs_ds { | 57 | struct nfs4_pnfs_ds { |
51 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | 58 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ |
52 | u32 ds_ip_addr; | 59 | char *ds_remotestr; /* comma sep list of addrs */ |
53 | u32 ds_port; | 60 | struct list_head ds_addrs; |
54 | struct nfs_client *ds_clp; | 61 | struct nfs_client *ds_clp; |
55 | atomic_t ds_count; | 62 | atomic_t ds_count; |
56 | }; | 63 | }; |
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) | |||
89 | generic_hdr); | 96 | generic_hdr); |
90 | } | 97 | } |
91 | 98 | ||
99 | static inline struct nfs4_deviceid_node * | ||
100 | FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) | ||
101 | { | ||
102 | return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; | ||
103 | } | ||
104 | |||
92 | extern struct nfs_fh * | 105 | extern struct nfs_fh * |
93 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | 106 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); |
94 | 107 | ||
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3b7bf1377264..ed388aae9689 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds) | |||
56 | printk("%s NULL device\n", __func__); | 56 | printk("%s NULL device\n", __func__); |
57 | return; | 57 | return; |
58 | } | 58 | } |
59 | printk(" ip_addr %x port %hu\n" | 59 | printk(" ds %s\n" |
60 | " ref count %d\n" | 60 | " ref count %d\n" |
61 | " client %p\n" | 61 | " client %p\n" |
62 | " cl_exchange_flags %x\n", | 62 | " cl_exchange_flags %x\n", |
63 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | 63 | ds->ds_remotestr, |
64 | atomic_read(&ds->ds_count), ds->ds_clp, | 64 | atomic_read(&ds->ds_count), ds->ds_clp, |
65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | 65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); |
66 | } | 66 | } |
67 | 67 | ||
68 | /* nfs4_ds_cache_lock is held */ | 68 | static bool |
69 | static struct nfs4_pnfs_ds * | 69 | same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) |
70 | _data_server_lookup_locked(u32 ip_addr, u32 port) | ||
71 | { | 70 | { |
72 | struct nfs4_pnfs_ds *ds; | 71 | struct sockaddr_in *a, *b; |
72 | struct sockaddr_in6 *a6, *b6; | ||
73 | |||
74 | if (addr1->sa_family != addr2->sa_family) | ||
75 | return false; | ||
76 | |||
77 | switch (addr1->sa_family) { | ||
78 | case AF_INET: | ||
79 | a = (struct sockaddr_in *)addr1; | ||
80 | b = (struct sockaddr_in *)addr2; | ||
81 | |||
82 | if (a->sin_addr.s_addr == b->sin_addr.s_addr && | ||
83 | a->sin_port == b->sin_port) | ||
84 | return true; | ||
85 | break; | ||
86 | |||
87 | case AF_INET6: | ||
88 | a6 = (struct sockaddr_in6 *)addr1; | ||
89 | b6 = (struct sockaddr_in6 *)addr2; | ||
90 | |||
91 | /* LINKLOCAL addresses must have matching scope_id */ | ||
92 | if (ipv6_addr_scope(&a6->sin6_addr) == | ||
93 | IPV6_ADDR_SCOPE_LINKLOCAL && | ||
94 | a6->sin6_scope_id != b6->sin6_scope_id) | ||
95 | return false; | ||
96 | |||
97 | if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && | ||
98 | a6->sin6_port == b6->sin6_port) | ||
99 | return true; | ||
100 | break; | ||
101 | |||
102 | default: | ||
103 | dprintk("%s: unhandled address family: %u\n", | ||
104 | __func__, addr1->sa_family); | ||
105 | return false; | ||
106 | } | ||
73 | 107 | ||
74 | dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", | 108 | return false; |
75 | ntohl(ip_addr), ntohs(port)); | 109 | } |
76 | 110 | ||
77 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | 111 | /* |
78 | if (ds->ds_ip_addr == ip_addr && | 112 | * Lookup DS by addresses. The first matching address returns true. |
79 | ds->ds_port == port) { | 113 | * nfs4_ds_cache_lock is held |
80 | return ds; | 114 | */ |
115 | static struct nfs4_pnfs_ds * | ||
116 | _data_server_lookup_locked(struct list_head *dsaddrs) | ||
117 | { | ||
118 | struct nfs4_pnfs_ds *ds; | ||
119 | struct nfs4_pnfs_ds_addr *da1, *da2; | ||
120 | |||
121 | list_for_each_entry(da1, dsaddrs, da_node) { | ||
122 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | ||
123 | list_for_each_entry(da2, &ds->ds_addrs, da_node) { | ||
124 | if (same_sockaddr( | ||
125 | (struct sockaddr *)&da1->da_addr, | ||
126 | (struct sockaddr *)&da2->da_addr)) | ||
127 | return ds; | ||
128 | } | ||
81 | } | 129 | } |
82 | } | 130 | } |
83 | return NULL; | 131 | return NULL; |
84 | } | 132 | } |
85 | 133 | ||
86 | /* | 134 | /* |
135 | * Compare two lists of addresses. | ||
136 | */ | ||
137 | static bool | ||
138 | _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, | ||
139 | struct list_head *dsaddrs2) | ||
140 | { | ||
141 | struct nfs4_pnfs_ds_addr *da1, *da2; | ||
142 | size_t count1 = 0, | ||
143 | count2 = 0; | ||
144 | |||
145 | list_for_each_entry(da1, dsaddrs1, da_node) | ||
146 | count1++; | ||
147 | |||
148 | list_for_each_entry(da2, dsaddrs2, da_node) { | ||
149 | bool found = false; | ||
150 | count2++; | ||
151 | list_for_each_entry(da1, dsaddrs1, da_node) { | ||
152 | if (same_sockaddr((struct sockaddr *)&da1->da_addr, | ||
153 | (struct sockaddr *)&da2->da_addr)) { | ||
154 | found = true; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | if (!found) | ||
159 | return false; | ||
160 | } | ||
161 | |||
162 | return (count1 == count2); | ||
163 | } | ||
164 | |||
165 | /* | ||
87 | * Create an rpc connection to the nfs4_pnfs_ds data server | 166 | * Create an rpc connection to the nfs4_pnfs_ds data server |
88 | * Currently only support IPv4 | 167 | * Currently only supports IPv4 and IPv6 addresses |
89 | */ | 168 | */ |
90 | static int | 169 | static int |
91 | nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | 170 | nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) |
92 | { | 171 | { |
93 | struct nfs_client *clp; | 172 | struct nfs_client *clp = ERR_PTR(-EIO); |
94 | struct sockaddr_in sin; | 173 | struct nfs4_pnfs_ds_addr *da; |
95 | int status = 0; | 174 | int status = 0; |
96 | 175 | ||
97 | dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, | 176 | dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, |
98 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | ||
99 | mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); | 177 | mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); |
100 | 178 | ||
101 | sin.sin_family = AF_INET; | 179 | BUG_ON(list_empty(&ds->ds_addrs)); |
102 | sin.sin_addr.s_addr = ds->ds_ip_addr; | 180 | |
103 | sin.sin_port = ds->ds_port; | 181 | list_for_each_entry(da, &ds->ds_addrs, da_node) { |
182 | dprintk("%s: DS %s: trying address %s\n", | ||
183 | __func__, ds->ds_remotestr, da->da_remotestr); | ||
184 | |||
185 | clp = nfs4_set_ds_client(mds_srv->nfs_client, | ||
186 | (struct sockaddr *)&da->da_addr, | ||
187 | da->da_addrlen, IPPROTO_TCP); | ||
188 | if (!IS_ERR(clp)) | ||
189 | break; | ||
190 | } | ||
104 | 191 | ||
105 | clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, | ||
106 | sizeof(sin), IPPROTO_TCP); | ||
107 | if (IS_ERR(clp)) { | 192 | if (IS_ERR(clp)) { |
108 | status = PTR_ERR(clp); | 193 | status = PTR_ERR(clp); |
109 | goto out; | 194 | goto out; |
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | |||
115 | goto out_put; | 200 | goto out_put; |
116 | } | 201 | } |
117 | ds->ds_clp = clp; | 202 | ds->ds_clp = clp; |
118 | dprintk("%s [existing] ip=%x, port=%hu\n", __func__, | 203 | dprintk("%s [existing] server=%s\n", __func__, |
119 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | 204 | ds->ds_remotestr); |
120 | goto out; | 205 | goto out; |
121 | } | 206 | } |
122 | 207 | ||
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | |||
135 | goto out_put; | 220 | goto out_put; |
136 | 221 | ||
137 | ds->ds_clp = clp; | 222 | ds->ds_clp = clp; |
138 | dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), | 223 | dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); |
139 | ntohs(ds->ds_port)); | ||
140 | out: | 224 | out: |
141 | return status; | 225 | return status; |
142 | out_put: | 226 | out_put: |
@@ -147,12 +231,25 @@ out_put: | |||
147 | static void | 231 | static void |
148 | destroy_ds(struct nfs4_pnfs_ds *ds) | 232 | destroy_ds(struct nfs4_pnfs_ds *ds) |
149 | { | 233 | { |
234 | struct nfs4_pnfs_ds_addr *da; | ||
235 | |||
150 | dprintk("--> %s\n", __func__); | 236 | dprintk("--> %s\n", __func__); |
151 | ifdebug(FACILITY) | 237 | ifdebug(FACILITY) |
152 | print_ds(ds); | 238 | print_ds(ds); |
153 | 239 | ||
154 | if (ds->ds_clp) | 240 | if (ds->ds_clp) |
155 | nfs_put_client(ds->ds_clp); | 241 | nfs_put_client(ds->ds_clp); |
242 | |||
243 | while (!list_empty(&ds->ds_addrs)) { | ||
244 | da = list_first_entry(&ds->ds_addrs, | ||
245 | struct nfs4_pnfs_ds_addr, | ||
246 | da_node); | ||
247 | list_del_init(&da->da_node); | ||
248 | kfree(da->da_remotestr); | ||
249 | kfree(da); | ||
250 | } | ||
251 | |||
252 | kfree(ds->ds_remotestr); | ||
156 | kfree(ds); | 253 | kfree(ds); |
157 | } | 254 | } |
158 | 255 | ||
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |||
179 | kfree(dsaddr); | 276 | kfree(dsaddr); |
180 | } | 277 | } |
181 | 278 | ||
279 | /* | ||
280 | * Create a string with a human readable address and port to avoid | ||
281 | * complicated setup around many dprinks. | ||
282 | */ | ||
283 | static char * | ||
284 | nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
285 | { | ||
286 | struct nfs4_pnfs_ds_addr *da; | ||
287 | char *remotestr; | ||
288 | size_t len; | ||
289 | char *p; | ||
290 | |||
291 | len = 3; /* '{', '}' and eol */ | ||
292 | list_for_each_entry(da, dsaddrs, da_node) { | ||
293 | len += strlen(da->da_remotestr) + 1; /* string plus comma */ | ||
294 | } | ||
295 | |||
296 | remotestr = kzalloc(len, gfp_flags); | ||
297 | if (!remotestr) | ||
298 | return NULL; | ||
299 | |||
300 | p = remotestr; | ||
301 | *(p++) = '{'; | ||
302 | len--; | ||
303 | list_for_each_entry(da, dsaddrs, da_node) { | ||
304 | size_t ll = strlen(da->da_remotestr); | ||
305 | |||
306 | if (ll > len) | ||
307 | goto out_err; | ||
308 | |||
309 | memcpy(p, da->da_remotestr, ll); | ||
310 | p += ll; | ||
311 | len -= ll; | ||
312 | |||
313 | if (len < 1) | ||
314 | goto out_err; | ||
315 | (*p++) = ','; | ||
316 | len--; | ||
317 | } | ||
318 | if (len < 2) | ||
319 | goto out_err; | ||
320 | *(p++) = '}'; | ||
321 | *p = '\0'; | ||
322 | return remotestr; | ||
323 | out_err: | ||
324 | kfree(remotestr); | ||
325 | return NULL; | ||
326 | } | ||
327 | |||
182 | static struct nfs4_pnfs_ds * | 328 | static struct nfs4_pnfs_ds * |
183 | nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) | 329 | nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) |
184 | { | 330 | { |
185 | struct nfs4_pnfs_ds *tmp_ds, *ds; | 331 | struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; |
332 | char *remotestr; | ||
186 | 333 | ||
187 | ds = kzalloc(sizeof(*tmp_ds), gfp_flags); | 334 | if (list_empty(dsaddrs)) { |
335 | dprintk("%s: no addresses defined\n", __func__); | ||
336 | goto out; | ||
337 | } | ||
338 | |||
339 | ds = kzalloc(sizeof(*ds), gfp_flags); | ||
188 | if (!ds) | 340 | if (!ds) |
189 | goto out; | 341 | goto out; |
190 | 342 | ||
343 | /* this is only used for debugging, so it's ok if its NULL */ | ||
344 | remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); | ||
345 | |||
191 | spin_lock(&nfs4_ds_cache_lock); | 346 | spin_lock(&nfs4_ds_cache_lock); |
192 | tmp_ds = _data_server_lookup_locked(ip_addr, port); | 347 | tmp_ds = _data_server_lookup_locked(dsaddrs); |
193 | if (tmp_ds == NULL) { | 348 | if (tmp_ds == NULL) { |
194 | ds->ds_ip_addr = ip_addr; | 349 | INIT_LIST_HEAD(&ds->ds_addrs); |
195 | ds->ds_port = port; | 350 | list_splice_init(dsaddrs, &ds->ds_addrs); |
351 | ds->ds_remotestr = remotestr; | ||
196 | atomic_set(&ds->ds_count, 1); | 352 | atomic_set(&ds->ds_count, 1); |
197 | INIT_LIST_HEAD(&ds->ds_node); | 353 | INIT_LIST_HEAD(&ds->ds_node); |
198 | ds->ds_clp = NULL; | 354 | ds->ds_clp = NULL; |
199 | list_add(&ds->ds_node, &nfs4_data_server_cache); | 355 | list_add(&ds->ds_node, &nfs4_data_server_cache); |
200 | dprintk("%s add new data server ip 0x%x\n", __func__, | 356 | dprintk("%s add new data server %s\n", __func__, |
201 | ds->ds_ip_addr); | 357 | ds->ds_remotestr); |
202 | } else { | 358 | } else { |
359 | if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, | ||
360 | dsaddrs)) { | ||
361 | dprintk("%s: multipath address mismatch: %s != %s", | ||
362 | __func__, tmp_ds->ds_remotestr, remotestr); | ||
363 | } | ||
364 | kfree(remotestr); | ||
203 | kfree(ds); | 365 | kfree(ds); |
204 | atomic_inc(&tmp_ds->ds_count); | 366 | atomic_inc(&tmp_ds->ds_count); |
205 | dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", | 367 | dprintk("%s data server %s found, inc'ed ds_count to %d\n", |
206 | __func__, tmp_ds->ds_ip_addr, | 368 | __func__, tmp_ds->ds_remotestr, |
207 | atomic_read(&tmp_ds->ds_count)); | 369 | atomic_read(&tmp_ds->ds_count)); |
208 | ds = tmp_ds; | 370 | ds = tmp_ds; |
209 | } | 371 | } |
@@ -213,18 +375,22 @@ out: | |||
213 | } | 375 | } |
214 | 376 | ||
215 | /* | 377 | /* |
216 | * Currently only support ipv4, and one multi-path address. | 378 | * Currently only supports ipv4, ipv6 and one multi-path address. |
217 | */ | 379 | */ |
218 | static struct nfs4_pnfs_ds * | 380 | static struct nfs4_pnfs_ds_addr * |
219 | decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) | 381 | decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) |
220 | { | 382 | { |
221 | struct nfs4_pnfs_ds *ds = NULL; | 383 | struct nfs4_pnfs_ds_addr *da = NULL; |
222 | char *buf; | 384 | char *buf, *portstr; |
223 | const char *ipend, *pstr; | 385 | u32 port; |
224 | u32 ip_addr, port; | 386 | int nlen, rlen; |
225 | int nlen, rlen, i; | ||
226 | int tmp[2]; | 387 | int tmp[2]; |
227 | __be32 *p; | 388 | __be32 *p; |
389 | char *netid, *match_netid; | ||
390 | size_t len, match_netid_len; | ||
391 | char *startsep = ""; | ||
392 | char *endsep = ""; | ||
393 | |||
228 | 394 | ||
229 | /* r_netid */ | 395 | /* r_netid */ |
230 | p = xdr_inline_decode(streamp, 4); | 396 | p = xdr_inline_decode(streamp, 4); |
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla | |||
236 | if (unlikely(!p)) | 402 | if (unlikely(!p)) |
237 | goto out_err; | 403 | goto out_err; |
238 | 404 | ||
239 | /* Check that netid is "tcp" */ | 405 | netid = kmalloc(nlen+1, gfp_flags); |
240 | if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { | 406 | if (unlikely(!netid)) |
241 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | ||
242 | goto out_err; | 407 | goto out_err; |
243 | } | ||
244 | 408 | ||
245 | /* r_addr */ | 409 | netid[nlen] = '\0'; |
410 | memcpy(netid, p, nlen); | ||
411 | |||
412 | /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ | ||
246 | p = xdr_inline_decode(streamp, 4); | 413 | p = xdr_inline_decode(streamp, 4); |
247 | if (unlikely(!p)) | 414 | if (unlikely(!p)) |
248 | goto out_err; | 415 | goto out_free_netid; |
249 | rlen = be32_to_cpup(p); | 416 | rlen = be32_to_cpup(p); |
250 | 417 | ||
251 | p = xdr_inline_decode(streamp, rlen); | 418 | p = xdr_inline_decode(streamp, rlen); |
252 | if (unlikely(!p)) | 419 | if (unlikely(!p)) |
253 | goto out_err; | 420 | goto out_free_netid; |
254 | 421 | ||
255 | /* ipv6 length plus port is legal */ | 422 | /* port is ".ABC.DEF", 8 chars max */ |
256 | if (rlen > INET6_ADDRSTRLEN + 8) { | 423 | if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { |
257 | dprintk("%s: Invalid address, length %d\n", __func__, | 424 | dprintk("%s: Invalid address, length %d\n", __func__, |
258 | rlen); | 425 | rlen); |
259 | goto out_err; | 426 | goto out_free_netid; |
260 | } | 427 | } |
261 | buf = kmalloc(rlen + 1, gfp_flags); | 428 | buf = kmalloc(rlen + 1, gfp_flags); |
262 | if (!buf) { | 429 | if (!buf) { |
263 | dprintk("%s: Not enough memory\n", __func__); | 430 | dprintk("%s: Not enough memory\n", __func__); |
264 | goto out_err; | 431 | goto out_free_netid; |
265 | } | 432 | } |
266 | buf[rlen] = '\0'; | 433 | buf[rlen] = '\0'; |
267 | memcpy(buf, p, rlen); | 434 | memcpy(buf, p, rlen); |
268 | 435 | ||
269 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | 436 | /* replace port '.' with '-' */ |
270 | for (i = 0; i < 2; i++) { | 437 | portstr = strrchr(buf, '.'); |
271 | char *res = strrchr(buf, '.'); | 438 | if (!portstr) { |
272 | if (!res) { | 439 | dprintk("%s: Failed finding expected dot in port\n", |
273 | dprintk("%s: Failed finding expected dots in port\n", | 440 | __func__); |
274 | __func__); | 441 | goto out_free_buf; |
275 | goto out_free; | 442 | } |
276 | } | 443 | *portstr = '-'; |
277 | *res = '-'; | 444 | |
445 | /* find '.' between address and port */ | ||
446 | portstr = strrchr(buf, '.'); | ||
447 | if (!portstr) { | ||
448 | dprintk("%s: Failed finding expected dot between address and " | ||
449 | "port\n", __func__); | ||
450 | goto out_free_buf; | ||
278 | } | 451 | } |
452 | *portstr = '\0'; | ||
279 | 453 | ||
280 | /* Currently only support ipv4 address */ | 454 | da = kzalloc(sizeof(*da), gfp_flags); |
281 | if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { | 455 | if (unlikely(!da)) |
282 | dprintk("%s: Only ipv4 addresses supported\n", __func__); | 456 | goto out_free_buf; |
283 | goto out_free; | 457 | |
458 | INIT_LIST_HEAD(&da->da_node); | ||
459 | |||
460 | if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, | ||
461 | sizeof(da->da_addr))) { | ||
462 | dprintk("%s: error parsing address %s\n", __func__, buf); | ||
463 | goto out_free_da; | ||
284 | } | 464 | } |
285 | 465 | ||
286 | /* port */ | 466 | portstr++; |
287 | pstr = ipend; | 467 | sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); |
288 | sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); | ||
289 | port = htons((tmp[0] << 8) | (tmp[1])); | 468 | port = htons((tmp[0] << 8) | (tmp[1])); |
290 | 469 | ||
291 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); | 470 | switch (da->da_addr.ss_family) { |
292 | dprintk("%s: Decoded address and port %s\n", __func__, buf); | 471 | case AF_INET: |
293 | out_free: | 472 | ((struct sockaddr_in *)&da->da_addr)->sin_port = port; |
473 | da->da_addrlen = sizeof(struct sockaddr_in); | ||
474 | match_netid = "tcp"; | ||
475 | match_netid_len = 3; | ||
476 | break; | ||
477 | |||
478 | case AF_INET6: | ||
479 | ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; | ||
480 | da->da_addrlen = sizeof(struct sockaddr_in6); | ||
481 | match_netid = "tcp6"; | ||
482 | match_netid_len = 4; | ||
483 | startsep = "["; | ||
484 | endsep = "]"; | ||
485 | break; | ||
486 | |||
487 | default: | ||
488 | dprintk("%s: unsupported address family: %u\n", | ||
489 | __func__, da->da_addr.ss_family); | ||
490 | goto out_free_da; | ||
491 | } | ||
492 | |||
493 | if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { | ||
494 | dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", | ||
495 | __func__, netid, match_netid); | ||
496 | goto out_free_da; | ||
497 | } | ||
498 | |||
499 | /* save human readable address */ | ||
500 | len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; | ||
501 | da->da_remotestr = kzalloc(len, gfp_flags); | ||
502 | |||
503 | /* NULL is ok, only used for dprintk */ | ||
504 | if (da->da_remotestr) | ||
505 | snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, | ||
506 | buf, endsep, ntohs(port)); | ||
507 | |||
508 | dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); | ||
294 | kfree(buf); | 509 | kfree(buf); |
510 | kfree(netid); | ||
511 | return da; | ||
512 | |||
513 | out_free_da: | ||
514 | kfree(da); | ||
515 | out_free_buf: | ||
516 | dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); | ||
517 | kfree(buf); | ||
518 | out_free_netid: | ||
519 | kfree(netid); | ||
295 | out_err: | 520 | out_err: |
296 | return ds; | 521 | return NULL; |
297 | } | 522 | } |
298 | 523 | ||
299 | /* Decode opaque device data and return the result */ | 524 | /* Decode opaque device data and return the result */ |
@@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
310 | struct xdr_stream stream; | 535 | struct xdr_stream stream; |
311 | struct xdr_buf buf; | 536 | struct xdr_buf buf; |
312 | struct page *scratch; | 537 | struct page *scratch; |
538 | struct list_head dsaddrs; | ||
539 | struct nfs4_pnfs_ds_addr *da; | ||
313 | 540 | ||
314 | /* set up xdr stream */ | 541 | /* set up xdr stream */ |
315 | scratch = alloc_page(gfp_flags); | 542 | scratch = alloc_page(gfp_flags); |
@@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
386 | NFS_SERVER(ino)->nfs_client, | 613 | NFS_SERVER(ino)->nfs_client, |
387 | &pdev->dev_id); | 614 | &pdev->dev_id); |
388 | 615 | ||
616 | INIT_LIST_HEAD(&dsaddrs); | ||
617 | |||
389 | for (i = 0; i < dsaddr->ds_num; i++) { | 618 | for (i = 0; i < dsaddr->ds_num; i++) { |
390 | int j; | 619 | int j; |
391 | u32 mp_count; | 620 | u32 mp_count; |
@@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
395 | goto out_err_free_deviceid; | 624 | goto out_err_free_deviceid; |
396 | 625 | ||
397 | mp_count = be32_to_cpup(p); /* multipath count */ | 626 | mp_count = be32_to_cpup(p); /* multipath count */ |
398 | if (mp_count > 1) { | ||
399 | printk(KERN_WARNING | ||
400 | "%s: Multipath count %d not supported, " | ||
401 | "skipping all greater than 1\n", __func__, | ||
402 | mp_count); | ||
403 | } | ||
404 | for (j = 0; j < mp_count; j++) { | 627 | for (j = 0; j < mp_count; j++) { |
405 | if (j == 0) { | 628 | da = decode_ds_addr(&stream, gfp_flags); |
406 | dsaddr->ds_list[i] = decode_and_add_ds(&stream, | 629 | if (da) |
407 | ino, gfp_flags); | 630 | list_add_tail(&da->da_node, &dsaddrs); |
408 | if (dsaddr->ds_list[i] == NULL) | 631 | } |
409 | goto out_err_free_deviceid; | 632 | if (list_empty(&dsaddrs)) { |
410 | } else { | 633 | dprintk("%s: no suitable DS addresses found\n", |
411 | u32 len; | 634 | __func__); |
412 | /* skip extra multipath */ | 635 | goto out_err_free_deviceid; |
413 | 636 | } | |
414 | /* read len, skip */ | 637 | |
415 | p = xdr_inline_decode(&stream, 4); | 638 | dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); |
416 | if (unlikely(!p)) | 639 | if (!dsaddr->ds_list[i]) |
417 | goto out_err_free_deviceid; | 640 | goto out_err_drain_dsaddrs; |
418 | len = be32_to_cpup(p); | 641 | |
419 | 642 | /* If DS was already in cache, free ds addrs */ | |
420 | p = xdr_inline_decode(&stream, len); | 643 | while (!list_empty(&dsaddrs)) { |
421 | if (unlikely(!p)) | 644 | da = list_first_entry(&dsaddrs, |
422 | goto out_err_free_deviceid; | 645 | struct nfs4_pnfs_ds_addr, |
423 | 646 | da_node); | |
424 | /* read len, skip */ | 647 | list_del_init(&da->da_node); |
425 | p = xdr_inline_decode(&stream, 4); | 648 | kfree(da->da_remotestr); |
426 | if (unlikely(!p)) | 649 | kfree(da); |
427 | goto out_err_free_deviceid; | ||
428 | len = be32_to_cpup(p); | ||
429 | |||
430 | p = xdr_inline_decode(&stream, len); | ||
431 | if (unlikely(!p)) | ||
432 | goto out_err_free_deviceid; | ||
433 | } | ||
434 | } | 650 | } |
435 | } | 651 | } |
436 | 652 | ||
437 | __free_page(scratch); | 653 | __free_page(scratch); |
438 | return dsaddr; | 654 | return dsaddr; |
439 | 655 | ||
656 | out_err_drain_dsaddrs: | ||
657 | while (!list_empty(&dsaddrs)) { | ||
658 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | ||
659 | da_node); | ||
660 | list_del_init(&da->da_node); | ||
661 | kfree(da->da_remotestr); | ||
662 | kfree(da); | ||
663 | } | ||
440 | out_err_free_deviceid: | 664 | out_err_free_deviceid: |
441 | nfs4_fl_free_deviceid(dsaddr); | 665 | nfs4_fl_free_deviceid(dsaddr); |
442 | /* stripe_indicies was part of dsaddr */ | 666 | /* stripe_indicies was part of dsaddr */ |
@@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | |||
591 | 815 | ||
592 | static void | 816 | static void |
593 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, | 817 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, |
594 | int err, u32 ds_addr) | 818 | int err, const char *ds_remotestr) |
595 | { | 819 | { |
596 | u32 *p = (u32 *)&dsaddr->id_node.deviceid; | 820 | u32 *p = (u32 *)&dsaddr->id_node.deviceid; |
597 | 821 | ||
598 | printk(KERN_ERR "NFS: data server %x connection error %d." | 822 | printk(KERN_ERR "NFS: data server %s connection error %d." |
599 | " Deviceid [%x%x%x%x] marked out of use.\n", | 823 | " Deviceid [%x%x%x%x] marked out of use.\n", |
600 | ds_addr, err, p[0], p[1], p[2], p[3]); | 824 | ds_remotestr, err, p[0], p[1], p[2], p[3]); |
601 | 825 | ||
602 | spin_lock(&nfs4_ds_cache_lock); | 826 | spin_lock(&nfs4_ds_cache_lock); |
603 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; | 827 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; |
@@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |||
628 | err = nfs4_ds_connect(s, ds); | 852 | err = nfs4_ds_connect(s, ds); |
629 | if (err) { | 853 | if (err) { |
630 | filelayout_mark_devid_negative(dsaddr, err, | 854 | filelayout_mark_devid_negative(dsaddr, err, |
631 | ntohl(ds->ds_ip_addr)); | 855 | ds->ds_remotestr); |
632 | return NULL; | 856 | return NULL; |
633 | } | 857 | } |
634 | } | 858 | } |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26bece8f3083..8c77039e7a81 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
80 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 80 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
81 | struct nfs_fattr *fattr, struct iattr *sattr, | 81 | struct nfs_fattr *fattr, struct iattr *sattr, |
82 | struct nfs4_state *state); | 82 | struct nfs4_state *state); |
83 | 83 | #ifdef CONFIG_NFS_V4_1 | |
84 | static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); | ||
85 | static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); | ||
86 | #endif | ||
84 | /* Prevent leaks of NFSv4 errors into userland */ | 87 | /* Prevent leaks of NFSv4 errors into userland */ |
85 | static int nfs4_map_errors(int err) | 88 | static int nfs4_map_errors(int err) |
86 | { | 89 | { |
@@ -137,12 +140,13 @@ const u32 nfs4_pathconf_bitmap[2] = { | |||
137 | 0 | 140 | 0 |
138 | }; | 141 | }; |
139 | 142 | ||
140 | const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE | 143 | const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE |
141 | | FATTR4_WORD0_MAXREAD | 144 | | FATTR4_WORD0_MAXREAD |
142 | | FATTR4_WORD0_MAXWRITE | 145 | | FATTR4_WORD0_MAXWRITE |
143 | | FATTR4_WORD0_LEASE_TIME, | 146 | | FATTR4_WORD0_LEASE_TIME, |
144 | FATTR4_WORD1_TIME_DELTA | 147 | FATTR4_WORD1_TIME_DELTA |
145 | | FATTR4_WORD1_FS_LAYOUT_TYPES | 148 | | FATTR4_WORD1_FS_LAYOUT_TYPES, |
149 | FATTR4_WORD2_LAYOUT_BLKSIZE | ||
146 | }; | 150 | }; |
147 | 151 | ||
148 | const u32 nfs4_fs_locations_bitmap[2] = { | 152 | const u32 nfs4_fs_locations_bitmap[2] = { |
@@ -1689,6 +1693,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta | |||
1689 | return ret; | 1693 | return ret; |
1690 | } | 1694 | } |
1691 | 1695 | ||
1696 | #if defined(CONFIG_NFS_V4_1) | ||
1697 | static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) | ||
1698 | { | ||
1699 | int status; | ||
1700 | struct nfs_server *server = NFS_SERVER(state->inode); | ||
1701 | |||
1702 | status = nfs41_test_stateid(server, state); | ||
1703 | if (status == NFS_OK) | ||
1704 | return 0; | ||
1705 | nfs41_free_stateid(server, state); | ||
1706 | return nfs4_open_expired(sp, state); | ||
1707 | } | ||
1708 | #endif | ||
1709 | |||
1692 | /* | 1710 | /* |
1693 | * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* | 1711 | * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* |
1694 | * fields corresponding to attributes that were used to store the verifier. | 1712 | * fields corresponding to attributes that were used to store the verifier. |
@@ -2252,13 +2270,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2252 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2270 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, |
2253 | struct nfs_fsinfo *info) | 2271 | struct nfs_fsinfo *info) |
2254 | { | 2272 | { |
2273 | int minor_version = server->nfs_client->cl_minorversion; | ||
2255 | int status = nfs4_lookup_root(server, fhandle, info); | 2274 | int status = nfs4_lookup_root(server, fhandle, info); |
2256 | if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) | 2275 | if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) |
2257 | /* | 2276 | /* |
2258 | * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM | 2277 | * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM |
2259 | * by nfs4_map_errors() as this function exits. | 2278 | * by nfs4_map_errors() as this function exits. |
2260 | */ | 2279 | */ |
2261 | status = nfs4_find_root_sec(server, fhandle, info); | 2280 | status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); |
2262 | if (status == 0) | 2281 | if (status == 0) |
2263 | status = nfs4_server_capabilities(server, fhandle); | 2282 | status = nfs4_server_capabilities(server, fhandle); |
2264 | if (status == 0) | 2283 | if (status == 0) |
@@ -4441,6 +4460,20 @@ out: | |||
4441 | return err; | 4460 | return err; |
4442 | } | 4461 | } |
4443 | 4462 | ||
4463 | #if defined(CONFIG_NFS_V4_1) | ||
4464 | static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) | ||
4465 | { | ||
4466 | int status; | ||
4467 | struct nfs_server *server = NFS_SERVER(state->inode); | ||
4468 | |||
4469 | status = nfs41_test_stateid(server, state); | ||
4470 | if (status == NFS_OK) | ||
4471 | return 0; | ||
4472 | nfs41_free_stateid(server, state); | ||
4473 | return nfs4_lock_expired(state, request); | ||
4474 | } | ||
4475 | #endif | ||
4476 | |||
4444 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) | 4477 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) |
4445 | { | 4478 | { |
4446 | struct nfs_inode *nfsi = NFS_I(state->inode); | 4479 | struct nfs_inode *nfsi = NFS_I(state->inode); |
@@ -4779,6 +4812,16 @@ out_inval: | |||
4779 | return -NFS4ERR_INVAL; | 4812 | return -NFS4ERR_INVAL; |
4780 | } | 4813 | } |
4781 | 4814 | ||
4815 | static bool | ||
4816 | nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) | ||
4817 | { | ||
4818 | if (a->server_scope_sz == b->server_scope_sz && | ||
4819 | memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) | ||
4820 | return true; | ||
4821 | |||
4822 | return false; | ||
4823 | } | ||
4824 | |||
4782 | /* | 4825 | /* |
4783 | * nfs4_proc_exchange_id() | 4826 | * nfs4_proc_exchange_id() |
4784 | * | 4827 | * |
@@ -4821,9 +4864,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
4821 | init_utsname()->domainname, | 4864 | init_utsname()->domainname, |
4822 | clp->cl_rpcclient->cl_auth->au_flavor); | 4865 | clp->cl_rpcclient->cl_auth->au_flavor); |
4823 | 4866 | ||
4867 | res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); | ||
4868 | if (unlikely(!res.server_scope)) | ||
4869 | return -ENOMEM; | ||
4870 | |||
4824 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); | 4871 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); |
4825 | if (!status) | 4872 | if (!status) |
4826 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); | 4873 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); |
4874 | |||
4875 | if (!status) { | ||
4876 | if (clp->server_scope && | ||
4877 | !nfs41_same_server_scope(clp->server_scope, | ||
4878 | res.server_scope)) { | ||
4879 | dprintk("%s: server_scope mismatch detected\n", | ||
4880 | __func__); | ||
4881 | set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); | ||
4882 | kfree(clp->server_scope); | ||
4883 | clp->server_scope = NULL; | ||
4884 | } | ||
4885 | |||
4886 | if (!clp->server_scope) | ||
4887 | clp->server_scope = res.server_scope; | ||
4888 | else | ||
4889 | kfree(res.server_scope); | ||
4890 | } | ||
4891 | |||
4827 | dprintk("<-- %s status= %d\n", __func__, status); | 4892 | dprintk("<-- %s status= %d\n", __func__, status); |
4828 | return status; | 4893 | return status; |
4829 | } | 4894 | } |
@@ -5704,7 +5769,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
5704 | { | 5769 | { |
5705 | struct nfs4_layoutreturn *lrp = calldata; | 5770 | struct nfs4_layoutreturn *lrp = calldata; |
5706 | struct nfs_server *server; | 5771 | struct nfs_server *server; |
5707 | struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; | 5772 | struct pnfs_layout_hdr *lo = lrp->args.layout; |
5708 | 5773 | ||
5709 | dprintk("--> %s\n", __func__); | 5774 | dprintk("--> %s\n", __func__); |
5710 | 5775 | ||
@@ -5733,7 +5798,7 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
5733 | struct nfs4_layoutreturn *lrp = calldata; | 5798 | struct nfs4_layoutreturn *lrp = calldata; |
5734 | 5799 | ||
5735 | dprintk("--> %s\n", __func__); | 5800 | dprintk("--> %s\n", __func__); |
5736 | put_layout_hdr(NFS_I(lrp->args.inode)->layout); | 5801 | put_layout_hdr(lrp->args.layout); |
5737 | kfree(calldata); | 5802 | kfree(calldata); |
5738 | dprintk("<-- %s\n", __func__); | 5803 | dprintk("<-- %s\n", __func__); |
5739 | } | 5804 | } |
@@ -5770,6 +5835,54 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
5770 | return status; | 5835 | return status; |
5771 | } | 5836 | } |
5772 | 5837 | ||
5838 | /* | ||
5839 | * Retrieve the list of Data Server devices from the MDS. | ||
5840 | */ | ||
5841 | static int _nfs4_getdevicelist(struct nfs_server *server, | ||
5842 | const struct nfs_fh *fh, | ||
5843 | struct pnfs_devicelist *devlist) | ||
5844 | { | ||
5845 | struct nfs4_getdevicelist_args args = { | ||
5846 | .fh = fh, | ||
5847 | .layoutclass = server->pnfs_curr_ld->id, | ||
5848 | }; | ||
5849 | struct nfs4_getdevicelist_res res = { | ||
5850 | .devlist = devlist, | ||
5851 | }; | ||
5852 | struct rpc_message msg = { | ||
5853 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], | ||
5854 | .rpc_argp = &args, | ||
5855 | .rpc_resp = &res, | ||
5856 | }; | ||
5857 | int status; | ||
5858 | |||
5859 | dprintk("--> %s\n", __func__); | ||
5860 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, | ||
5861 | &res.seq_res, 0); | ||
5862 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5863 | return status; | ||
5864 | } | ||
5865 | |||
5866 | int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
5867 | const struct nfs_fh *fh, | ||
5868 | struct pnfs_devicelist *devlist) | ||
5869 | { | ||
5870 | struct nfs4_exception exception = { }; | ||
5871 | int err; | ||
5872 | |||
5873 | do { | ||
5874 | err = nfs4_handle_exception(server, | ||
5875 | _nfs4_getdevicelist(server, fh, devlist), | ||
5876 | &exception); | ||
5877 | } while (exception.retry); | ||
5878 | |||
5879 | dprintk("%s: err=%d, num_devs=%u\n", __func__, | ||
5880 | err, devlist->num_devs); | ||
5881 | |||
5882 | return err; | ||
5883 | } | ||
5884 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); | ||
5885 | |||
5773 | static int | 5886 | static int |
5774 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 5887 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) |
5775 | { | 5888 | { |
@@ -5848,9 +5961,16 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | |||
5848 | static void nfs4_layoutcommit_release(void *calldata) | 5961 | static void nfs4_layoutcommit_release(void *calldata) |
5849 | { | 5962 | { |
5850 | struct nfs4_layoutcommit_data *data = calldata; | 5963 | struct nfs4_layoutcommit_data *data = calldata; |
5964 | struct pnfs_layout_segment *lseg, *tmp; | ||
5851 | 5965 | ||
5966 | pnfs_cleanup_layoutcommit(data); | ||
5852 | /* Matched by references in pnfs_set_layoutcommit */ | 5967 | /* Matched by references in pnfs_set_layoutcommit */ |
5853 | put_lseg(data->lseg); | 5968 | list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { |
5969 | list_del_init(&lseg->pls_lc_list); | ||
5970 | if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, | ||
5971 | &lseg->pls_flags)) | ||
5972 | put_lseg(lseg); | ||
5973 | } | ||
5854 | put_rpccred(data->cred); | 5974 | put_rpccred(data->cred); |
5855 | kfree(data); | 5975 | kfree(data); |
5856 | } | 5976 | } |
@@ -5901,6 +6021,143 @@ out: | |||
5901 | rpc_put_task(task); | 6021 | rpc_put_task(task); |
5902 | return status; | 6022 | return status; |
5903 | } | 6023 | } |
6024 | |||
6025 | static int | ||
6026 | _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, | ||
6027 | struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) | ||
6028 | { | ||
6029 | struct nfs41_secinfo_no_name_args args = { | ||
6030 | .style = SECINFO_STYLE_CURRENT_FH, | ||
6031 | }; | ||
6032 | struct nfs4_secinfo_res res = { | ||
6033 | .flavors = flavors, | ||
6034 | }; | ||
6035 | struct rpc_message msg = { | ||
6036 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME], | ||
6037 | .rpc_argp = &args, | ||
6038 | .rpc_resp = &res, | ||
6039 | }; | ||
6040 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | ||
6041 | } | ||
6042 | |||
6043 | static int | ||
6044 | nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, | ||
6045 | struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) | ||
6046 | { | ||
6047 | struct nfs4_exception exception = { }; | ||
6048 | int err; | ||
6049 | do { | ||
6050 | err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); | ||
6051 | switch (err) { | ||
6052 | case 0: | ||
6053 | case -NFS4ERR_WRONGSEC: | ||
6054 | case -NFS4ERR_NOTSUPP: | ||
6055 | break; | ||
6056 | default: | ||
6057 | err = nfs4_handle_exception(server, err, &exception); | ||
6058 | } | ||
6059 | } while (exception.retry); | ||
6060 | return err; | ||
6061 | } | ||
6062 | |||
6063 | static int | ||
6064 | nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, | ||
6065 | struct nfs_fsinfo *info) | ||
6066 | { | ||
6067 | int err; | ||
6068 | struct page *page; | ||
6069 | rpc_authflavor_t flavor; | ||
6070 | struct nfs4_secinfo_flavors *flavors; | ||
6071 | |||
6072 | page = alloc_page(GFP_KERNEL); | ||
6073 | if (!page) { | ||
6074 | err = -ENOMEM; | ||
6075 | goto out; | ||
6076 | } | ||
6077 | |||
6078 | flavors = page_address(page); | ||
6079 | err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); | ||
6080 | |||
6081 | /* | ||
6082 | * Fall back on "guess and check" method if | ||
6083 | * the server doesn't support SECINFO_NO_NAME | ||
6084 | */ | ||
6085 | if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { | ||
6086 | err = nfs4_find_root_sec(server, fhandle, info); | ||
6087 | goto out_freepage; | ||
6088 | } | ||
6089 | if (err) | ||
6090 | goto out_freepage; | ||
6091 | |||
6092 | flavor = nfs_find_best_sec(flavors); | ||
6093 | if (err == 0) | ||
6094 | err = nfs4_lookup_root_sec(server, fhandle, info, flavor); | ||
6095 | |||
6096 | out_freepage: | ||
6097 | put_page(page); | ||
6098 | if (err == -EACCES) | ||
6099 | return -EPERM; | ||
6100 | out: | ||
6101 | return err; | ||
6102 | } | ||
6103 | static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6104 | { | ||
6105 | int status; | ||
6106 | struct nfs41_test_stateid_args args = { | ||
6107 | .stateid = &state->stateid, | ||
6108 | }; | ||
6109 | struct nfs41_test_stateid_res res; | ||
6110 | struct rpc_message msg = { | ||
6111 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], | ||
6112 | .rpc_argp = &args, | ||
6113 | .rpc_resp = &res, | ||
6114 | }; | ||
6115 | args.seq_args.sa_session = res.seq_res.sr_session = NULL; | ||
6116 | status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); | ||
6117 | return status; | ||
6118 | } | ||
6119 | |||
6120 | static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6121 | { | ||
6122 | struct nfs4_exception exception = { }; | ||
6123 | int err; | ||
6124 | do { | ||
6125 | err = nfs4_handle_exception(server, | ||
6126 | _nfs41_test_stateid(server, state), | ||
6127 | &exception); | ||
6128 | } while (exception.retry); | ||
6129 | return err; | ||
6130 | } | ||
6131 | |||
6132 | static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6133 | { | ||
6134 | int status; | ||
6135 | struct nfs41_free_stateid_args args = { | ||
6136 | .stateid = &state->stateid, | ||
6137 | }; | ||
6138 | struct nfs41_free_stateid_res res; | ||
6139 | struct rpc_message msg = { | ||
6140 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], | ||
6141 | .rpc_argp = &args, | ||
6142 | .rpc_resp = &res, | ||
6143 | }; | ||
6144 | |||
6145 | args.seq_args.sa_session = res.seq_res.sr_session = NULL; | ||
6146 | status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); | ||
6147 | return status; | ||
6148 | } | ||
6149 | |||
6150 | static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6151 | { | ||
6152 | struct nfs4_exception exception = { }; | ||
6153 | int err; | ||
6154 | do { | ||
6155 | err = nfs4_handle_exception(server, | ||
6156 | _nfs4_free_stateid(server, state), | ||
6157 | &exception); | ||
6158 | } while (exception.retry); | ||
6159 | return err; | ||
6160 | } | ||
5904 | #endif /* CONFIG_NFS_V4_1 */ | 6161 | #endif /* CONFIG_NFS_V4_1 */ |
5905 | 6162 | ||
5906 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 6163 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
@@ -5937,8 +6194,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { | |||
5937 | struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { | 6194 | struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { |
5938 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, | 6195 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, |
5939 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, | 6196 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, |
5940 | .recover_open = nfs4_open_expired, | 6197 | .recover_open = nfs41_open_expired, |
5941 | .recover_lock = nfs4_lock_expired, | 6198 | .recover_lock = nfs41_lock_expired, |
5942 | .establish_clid = nfs41_init_clientid, | 6199 | .establish_clid = nfs41_init_clientid, |
5943 | .get_clid_cred = nfs4_get_exchange_id_cred, | 6200 | .get_clid_cred = nfs4_get_exchange_id_cred, |
5944 | }; | 6201 | }; |
@@ -5962,6 +6219,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | |||
5962 | .minor_version = 0, | 6219 | .minor_version = 0, |
5963 | .call_sync = _nfs4_call_sync, | 6220 | .call_sync = _nfs4_call_sync, |
5964 | .validate_stateid = nfs4_validate_delegation_stateid, | 6221 | .validate_stateid = nfs4_validate_delegation_stateid, |
6222 | .find_root_sec = nfs4_find_root_sec, | ||
5965 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, | 6223 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, |
5966 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, | 6224 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, |
5967 | .state_renewal_ops = &nfs40_state_renewal_ops, | 6225 | .state_renewal_ops = &nfs40_state_renewal_ops, |
@@ -5972,6 +6230,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | |||
5972 | .minor_version = 1, | 6230 | .minor_version = 1, |
5973 | .call_sync = _nfs4_call_sync_session, | 6231 | .call_sync = _nfs4_call_sync_session, |
5974 | .validate_stateid = nfs41_validate_delegation_stateid, | 6232 | .validate_stateid = nfs41_validate_delegation_stateid, |
6233 | .find_root_sec = nfs41_find_root_sec, | ||
5975 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | 6234 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, |
5976 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | 6235 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, |
5977 | .state_renewal_ops = &nfs41_state_renewal_ops, | 6236 | .state_renewal_ops = &nfs41_state_renewal_ops, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7acfe8843626..72ab97ef3d61 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1643 | goto out_error; | 1643 | goto out_error; |
1644 | } | 1644 | } |
1645 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); | 1645 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); |
1646 | set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); | 1646 | |
1647 | if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, | ||
1648 | &clp->cl_state)) | ||
1649 | nfs4_state_start_reclaim_nograce(clp); | ||
1650 | else | ||
1651 | set_bit(NFS4CLNT_RECLAIM_REBOOT, | ||
1652 | &clp->cl_state); | ||
1653 | |||
1647 | pnfs_destroy_all_layouts(clp); | 1654 | pnfs_destroy_all_layouts(clp); |
1648 | } | 1655 | } |
1649 | 1656 | ||
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6e8f3b9a1de..1dce12f41a4f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -113,7 +113,11 @@ static int nfs4_stat_to_errno(int); | |||
113 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) | 113 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) |
114 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) | 114 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) |
115 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) | 115 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) |
116 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15) | 116 | /* The 5 accounts for the PNFS attributes, and assumes that at most three |
117 | * layout types will be returned. | ||
118 | */ | ||
119 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \ | ||
120 | nfs4_fattr_bitmap_maxsz + 4 + 8 + 5) | ||
117 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) | 121 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) |
118 | #define decode_renew_maxsz (op_decode_hdr_maxsz) | 122 | #define decode_renew_maxsz (op_decode_hdr_maxsz) |
119 | #define encode_setclientid_maxsz \ | 123 | #define encode_setclientid_maxsz \ |
@@ -314,6 +318,17 @@ static int nfs4_stat_to_errno(int); | |||
314 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 318 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
315 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 319 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
316 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 320 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
321 | #define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
322 | encode_verifier_maxsz) | ||
323 | #define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ | ||
324 | 2 /* nfs_cookie4 gdlr_cookie */ + \ | ||
325 | decode_verifier_maxsz \ | ||
326 | /* verifier4 gdlr_verifier */ + \ | ||
327 | 1 /* gdlr_deviceid_list count */ + \ | ||
328 | XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ | ||
329 | NFS4_DEVICEID4_SIZE) \ | ||
330 | /* gdlr_deviceid_list */ + \ | ||
331 | 1 /* bool gdlr_eof */) | ||
317 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | 332 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ |
318 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | 333 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) |
319 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | 334 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ |
@@ -343,6 +358,14 @@ static int nfs4_stat_to_errno(int); | |||
343 | 1 /* FIXME: opaque lrf_body always empty at the moment */) | 358 | 1 /* FIXME: opaque lrf_body always empty at the moment */) |
344 | #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ | 359 | #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ |
345 | 1 + decode_stateid_maxsz) | 360 | 1 + decode_stateid_maxsz) |
361 | #define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) | ||
362 | #define decode_secinfo_no_name_maxsz decode_secinfo_maxsz | ||
363 | #define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ | ||
364 | XDR_QUADLEN(NFS4_STATEID_SIZE)) | ||
365 | #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) | ||
366 | #define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ | ||
367 | XDR_QUADLEN(NFS4_STATEID_SIZE)) | ||
368 | #define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) | ||
346 | #else /* CONFIG_NFS_V4_1 */ | 369 | #else /* CONFIG_NFS_V4_1 */ |
347 | #define encode_sequence_maxsz 0 | 370 | #define encode_sequence_maxsz 0 |
348 | #define decode_sequence_maxsz 0 | 371 | #define decode_sequence_maxsz 0 |
@@ -740,6 +763,14 @@ static int nfs4_stat_to_errno(int); | |||
740 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 763 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
741 | decode_sequence_maxsz + \ | 764 | decode_sequence_maxsz + \ |
742 | decode_reclaim_complete_maxsz) | 765 | decode_reclaim_complete_maxsz) |
766 | #define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ | ||
767 | encode_sequence_maxsz + \ | ||
768 | encode_putfh_maxsz + \ | ||
769 | encode_getdevicelist_maxsz) | ||
770 | #define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ | ||
771 | decode_sequence_maxsz + \ | ||
772 | decode_putfh_maxsz + \ | ||
773 | decode_getdevicelist_maxsz) | ||
743 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | 774 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ |
744 | encode_sequence_maxsz +\ | 775 | encode_sequence_maxsz +\ |
745 | encode_getdeviceinfo_maxsz) | 776 | encode_getdeviceinfo_maxsz) |
@@ -772,6 +803,26 @@ static int nfs4_stat_to_errno(int); | |||
772 | decode_sequence_maxsz + \ | 803 | decode_sequence_maxsz + \ |
773 | decode_putfh_maxsz + \ | 804 | decode_putfh_maxsz + \ |
774 | decode_layoutreturn_maxsz) | 805 | decode_layoutreturn_maxsz) |
806 | #define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \ | ||
807 | encode_sequence_maxsz + \ | ||
808 | encode_putrootfh_maxsz +\ | ||
809 | encode_secinfo_no_name_maxsz) | ||
810 | #define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \ | ||
811 | decode_sequence_maxsz + \ | ||
812 | decode_putrootfh_maxsz + \ | ||
813 | decode_secinfo_no_name_maxsz) | ||
814 | #define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \ | ||
815 | encode_sequence_maxsz + \ | ||
816 | encode_test_stateid_maxsz) | ||
817 | #define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ | ||
818 | decode_sequence_maxsz + \ | ||
819 | decode_test_stateid_maxsz) | ||
820 | #define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \ | ||
821 | encode_sequence_maxsz + \ | ||
822 | encode_free_stateid_maxsz) | ||
823 | #define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \ | ||
824 | decode_sequence_maxsz + \ | ||
825 | decode_free_stateid_maxsz) | ||
775 | 826 | ||
776 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 827 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
777 | compound_encode_hdr_maxsz + | 828 | compound_encode_hdr_maxsz + |
@@ -1076,6 +1127,35 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm | |||
1076 | hdr->replen += decode_getattr_maxsz; | 1127 | hdr->replen += decode_getattr_maxsz; |
1077 | } | 1128 | } |
1078 | 1129 | ||
1130 | static void | ||
1131 | encode_getattr_three(struct xdr_stream *xdr, | ||
1132 | uint32_t bm0, uint32_t bm1, uint32_t bm2, | ||
1133 | struct compound_hdr *hdr) | ||
1134 | { | ||
1135 | __be32 *p; | ||
1136 | |||
1137 | p = reserve_space(xdr, 4); | ||
1138 | *p = cpu_to_be32(OP_GETATTR); | ||
1139 | if (bm2) { | ||
1140 | p = reserve_space(xdr, 16); | ||
1141 | *p++ = cpu_to_be32(3); | ||
1142 | *p++ = cpu_to_be32(bm0); | ||
1143 | *p++ = cpu_to_be32(bm1); | ||
1144 | *p = cpu_to_be32(bm2); | ||
1145 | } else if (bm1) { | ||
1146 | p = reserve_space(xdr, 12); | ||
1147 | *p++ = cpu_to_be32(2); | ||
1148 | *p++ = cpu_to_be32(bm0); | ||
1149 | *p = cpu_to_be32(bm1); | ||
1150 | } else { | ||
1151 | p = reserve_space(xdr, 8); | ||
1152 | *p++ = cpu_to_be32(1); | ||
1153 | *p = cpu_to_be32(bm0); | ||
1154 | } | ||
1155 | hdr->nops++; | ||
1156 | hdr->replen += decode_getattr_maxsz; | ||
1157 | } | ||
1158 | |||
1079 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1159 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
1080 | { | 1160 | { |
1081 | encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], | 1161 | encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], |
@@ -1084,8 +1164,11 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c | |||
1084 | 1164 | ||
1085 | static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1165 | static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
1086 | { | 1166 | { |
1087 | encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], | 1167 | encode_getattr_three(xdr, |
1088 | bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); | 1168 | bitmask[0] & nfs4_fsinfo_bitmap[0], |
1169 | bitmask[1] & nfs4_fsinfo_bitmap[1], | ||
1170 | bitmask[2] & nfs4_fsinfo_bitmap[2], | ||
1171 | hdr); | ||
1089 | } | 1172 | } |
1090 | 1173 | ||
1091 | static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1174 | static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
@@ -1827,6 +1910,26 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1827 | 1910 | ||
1828 | #ifdef CONFIG_NFS_V4_1 | 1911 | #ifdef CONFIG_NFS_V4_1 |
1829 | static void | 1912 | static void |
1913 | encode_getdevicelist(struct xdr_stream *xdr, | ||
1914 | const struct nfs4_getdevicelist_args *args, | ||
1915 | struct compound_hdr *hdr) | ||
1916 | { | ||
1917 | __be32 *p; | ||
1918 | nfs4_verifier dummy = { | ||
1919 | .data = "dummmmmy", | ||
1920 | }; | ||
1921 | |||
1922 | p = reserve_space(xdr, 20); | ||
1923 | *p++ = cpu_to_be32(OP_GETDEVICELIST); | ||
1924 | *p++ = cpu_to_be32(args->layoutclass); | ||
1925 | *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); | ||
1926 | xdr_encode_hyper(p, 0ULL); /* cookie */ | ||
1927 | encode_nfs4_verifier(xdr, &dummy); | ||
1928 | hdr->nops++; | ||
1929 | hdr->replen += decode_getdevicelist_maxsz; | ||
1930 | } | ||
1931 | |||
1932 | static void | ||
1830 | encode_getdeviceinfo(struct xdr_stream *xdr, | 1933 | encode_getdeviceinfo(struct xdr_stream *xdr, |
1831 | const struct nfs4_getdeviceinfo_args *args, | 1934 | const struct nfs4_getdeviceinfo_args *args, |
1832 | struct compound_hdr *hdr) | 1935 | struct compound_hdr *hdr) |
@@ -1888,7 +1991,7 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
1888 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); | 1991 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); |
1889 | /* Only whole file layouts */ | 1992 | /* Only whole file layouts */ |
1890 | p = xdr_encode_hyper(p, 0); /* offset */ | 1993 | p = xdr_encode_hyper(p, 0); /* offset */ |
1891 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */ | 1994 | p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ |
1892 | *p++ = cpu_to_be32(0); /* reclaim */ | 1995 | *p++ = cpu_to_be32(0); /* reclaim */ |
1893 | p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); | 1996 | p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); |
1894 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ | 1997 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ |
@@ -1938,6 +2041,46 @@ encode_layoutreturn(struct xdr_stream *xdr, | |||
1938 | hdr->nops++; | 2041 | hdr->nops++; |
1939 | hdr->replen += decode_layoutreturn_maxsz; | 2042 | hdr->replen += decode_layoutreturn_maxsz; |
1940 | } | 2043 | } |
2044 | |||
2045 | static int | ||
2046 | encode_secinfo_no_name(struct xdr_stream *xdr, | ||
2047 | const struct nfs41_secinfo_no_name_args *args, | ||
2048 | struct compound_hdr *hdr) | ||
2049 | { | ||
2050 | __be32 *p; | ||
2051 | p = reserve_space(xdr, 8); | ||
2052 | *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); | ||
2053 | *p++ = cpu_to_be32(args->style); | ||
2054 | hdr->nops++; | ||
2055 | hdr->replen += decode_secinfo_no_name_maxsz; | ||
2056 | return 0; | ||
2057 | } | ||
2058 | |||
2059 | static void encode_test_stateid(struct xdr_stream *xdr, | ||
2060 | struct nfs41_test_stateid_args *args, | ||
2061 | struct compound_hdr *hdr) | ||
2062 | { | ||
2063 | __be32 *p; | ||
2064 | |||
2065 | p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); | ||
2066 | *p++ = cpu_to_be32(OP_TEST_STATEID); | ||
2067 | *p++ = cpu_to_be32(1); | ||
2068 | xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); | ||
2069 | hdr->nops++; | ||
2070 | hdr->replen += decode_test_stateid_maxsz; | ||
2071 | } | ||
2072 | |||
2073 | static void encode_free_stateid(struct xdr_stream *xdr, | ||
2074 | struct nfs41_free_stateid_args *args, | ||
2075 | struct compound_hdr *hdr) | ||
2076 | { | ||
2077 | __be32 *p; | ||
2078 | p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); | ||
2079 | *p++ = cpu_to_be32(OP_FREE_STATEID); | ||
2080 | xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); | ||
2081 | hdr->nops++; | ||
2082 | hdr->replen += decode_free_stateid_maxsz; | ||
2083 | } | ||
1941 | #endif /* CONFIG_NFS_V4_1 */ | 2084 | #endif /* CONFIG_NFS_V4_1 */ |
1942 | 2085 | ||
1943 | /* | 2086 | /* |
@@ -2536,7 +2679,7 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, | |||
2536 | struct compound_hdr hdr = { | 2679 | struct compound_hdr hdr = { |
2537 | .nops = 0, | 2680 | .nops = 0, |
2538 | }; | 2681 | }; |
2539 | const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; | 2682 | const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; |
2540 | 2683 | ||
2541 | encode_compound_hdr(xdr, req, &hdr); | 2684 | encode_compound_hdr(xdr, req, &hdr); |
2542 | encode_setclientid_confirm(xdr, arg, &hdr); | 2685 | encode_setclientid_confirm(xdr, arg, &hdr); |
@@ -2680,7 +2823,7 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, | |||
2680 | struct compound_hdr hdr = { | 2823 | struct compound_hdr hdr = { |
2681 | .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), | 2824 | .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), |
2682 | }; | 2825 | }; |
2683 | const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; | 2826 | const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; |
2684 | 2827 | ||
2685 | encode_compound_hdr(xdr, req, &hdr); | 2828 | encode_compound_hdr(xdr, req, &hdr); |
2686 | encode_sequence(xdr, &args->la_seq_args, &hdr); | 2829 | encode_sequence(xdr, &args->la_seq_args, &hdr); |
@@ -2707,6 +2850,24 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, | |||
2707 | } | 2850 | } |
2708 | 2851 | ||
2709 | /* | 2852 | /* |
2853 | * Encode GETDEVICELIST request | ||
2854 | */ | ||
2855 | static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, | ||
2856 | struct xdr_stream *xdr, | ||
2857 | struct nfs4_getdevicelist_args *args) | ||
2858 | { | ||
2859 | struct compound_hdr hdr = { | ||
2860 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2861 | }; | ||
2862 | |||
2863 | encode_compound_hdr(xdr, req, &hdr); | ||
2864 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2865 | encode_putfh(xdr, args->fh, &hdr); | ||
2866 | encode_getdevicelist(xdr, args, &hdr); | ||
2867 | encode_nops(&hdr); | ||
2868 | } | ||
2869 | |||
2870 | /* | ||
2710 | * Encode GETDEVICEINFO request | 2871 | * Encode GETDEVICEINFO request |
2711 | */ | 2872 | */ |
2712 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, | 2873 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, |
@@ -2790,6 +2951,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, | |||
2790 | encode_layoutreturn(xdr, args, &hdr); | 2951 | encode_layoutreturn(xdr, args, &hdr); |
2791 | encode_nops(&hdr); | 2952 | encode_nops(&hdr); |
2792 | } | 2953 | } |
2954 | |||
2955 | /* | ||
2956 | * Encode SECINFO_NO_NAME request | ||
2957 | */ | ||
2958 | static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, | ||
2959 | struct xdr_stream *xdr, | ||
2960 | struct nfs41_secinfo_no_name_args *args) | ||
2961 | { | ||
2962 | struct compound_hdr hdr = { | ||
2963 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2964 | }; | ||
2965 | |||
2966 | encode_compound_hdr(xdr, req, &hdr); | ||
2967 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2968 | encode_putrootfh(xdr, &hdr); | ||
2969 | encode_secinfo_no_name(xdr, args, &hdr); | ||
2970 | encode_nops(&hdr); | ||
2971 | return 0; | ||
2972 | } | ||
2973 | |||
2974 | /* | ||
2975 | * Encode TEST_STATEID request | ||
2976 | */ | ||
2977 | static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, | ||
2978 | struct xdr_stream *xdr, | ||
2979 | struct nfs41_test_stateid_args *args) | ||
2980 | { | ||
2981 | struct compound_hdr hdr = { | ||
2982 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2983 | }; | ||
2984 | |||
2985 | encode_compound_hdr(xdr, req, &hdr); | ||
2986 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2987 | encode_test_stateid(xdr, args, &hdr); | ||
2988 | encode_nops(&hdr); | ||
2989 | } | ||
2990 | |||
2991 | /* | ||
2992 | * Encode FREE_STATEID request | ||
2993 | */ | ||
2994 | static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, | ||
2995 | struct xdr_stream *xdr, | ||
2996 | struct nfs41_free_stateid_args *args) | ||
2997 | { | ||
2998 | struct compound_hdr hdr = { | ||
2999 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
3000 | }; | ||
3001 | |||
3002 | encode_compound_hdr(xdr, req, &hdr); | ||
3003 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
3004 | encode_free_stateid(xdr, args, &hdr); | ||
3005 | encode_nops(&hdr); | ||
3006 | } | ||
2793 | #endif /* CONFIG_NFS_V4_1 */ | 3007 | #endif /* CONFIG_NFS_V4_1 */ |
2794 | 3008 | ||
2795 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 3009 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -2890,14 +3104,17 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) | |||
2890 | goto out_overflow; | 3104 | goto out_overflow; |
2891 | bmlen = be32_to_cpup(p); | 3105 | bmlen = be32_to_cpup(p); |
2892 | 3106 | ||
2893 | bitmap[0] = bitmap[1] = 0; | 3107 | bitmap[0] = bitmap[1] = bitmap[2] = 0; |
2894 | p = xdr_inline_decode(xdr, (bmlen << 2)); | 3108 | p = xdr_inline_decode(xdr, (bmlen << 2)); |
2895 | if (unlikely(!p)) | 3109 | if (unlikely(!p)) |
2896 | goto out_overflow; | 3110 | goto out_overflow; |
2897 | if (bmlen > 0) { | 3111 | if (bmlen > 0) { |
2898 | bitmap[0] = be32_to_cpup(p++); | 3112 | bitmap[0] = be32_to_cpup(p++); |
2899 | if (bmlen > 1) | 3113 | if (bmlen > 1) { |
2900 | bitmap[1] = be32_to_cpup(p); | 3114 | bitmap[1] = be32_to_cpup(p++); |
3115 | if (bmlen > 2) | ||
3116 | bitmap[2] = be32_to_cpup(p); | ||
3117 | } | ||
2901 | } | 3118 | } |
2902 | return 0; | 3119 | return 0; |
2903 | out_overflow: | 3120 | out_overflow: |
@@ -2929,8 +3146,9 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3 | |||
2929 | return ret; | 3146 | return ret; |
2930 | bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; | 3147 | bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; |
2931 | } else | 3148 | } else |
2932 | bitmask[0] = bitmask[1] = 0; | 3149 | bitmask[0] = bitmask[1] = bitmask[2] = 0; |
2933 | dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]); | 3150 | dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__, |
3151 | bitmask[0], bitmask[1], bitmask[2]); | ||
2934 | return 0; | 3152 | return 0; |
2935 | } | 3153 | } |
2936 | 3154 | ||
@@ -3984,7 +4202,7 @@ out_overflow: | |||
3984 | static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) | 4202 | static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) |
3985 | { | 4203 | { |
3986 | __be32 *savep; | 4204 | __be32 *savep; |
3987 | uint32_t attrlen, bitmap[2] = {0}; | 4205 | uint32_t attrlen, bitmap[3] = {0}; |
3988 | int status; | 4206 | int status; |
3989 | 4207 | ||
3990 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4208 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4010,7 +4228,7 @@ xdr_error: | |||
4010 | static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) | 4228 | static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) |
4011 | { | 4229 | { |
4012 | __be32 *savep; | 4230 | __be32 *savep; |
4013 | uint32_t attrlen, bitmap[2] = {0}; | 4231 | uint32_t attrlen, bitmap[3] = {0}; |
4014 | int status; | 4232 | int status; |
4015 | 4233 | ||
4016 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4234 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4042,7 +4260,7 @@ xdr_error: | |||
4042 | static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) | 4260 | static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) |
4043 | { | 4261 | { |
4044 | __be32 *savep; | 4262 | __be32 *savep; |
4045 | uint32_t attrlen, bitmap[2] = {0}; | 4263 | uint32_t attrlen, bitmap[3] = {0}; |
4046 | int status; | 4264 | int status; |
4047 | 4265 | ||
4048 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4266 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4182,7 +4400,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat | |||
4182 | { | 4400 | { |
4183 | __be32 *savep; | 4401 | __be32 *savep; |
4184 | uint32_t attrlen, | 4402 | uint32_t attrlen, |
4185 | bitmap[2] = {0}; | 4403 | bitmap[3] = {0}; |
4186 | int status; | 4404 | int status; |
4187 | 4405 | ||
4188 | status = decode_op_hdr(xdr, OP_GETATTR); | 4406 | status = decode_op_hdr(xdr, OP_GETATTR); |
@@ -4268,10 +4486,32 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4268 | return status; | 4486 | return status; |
4269 | } | 4487 | } |
4270 | 4488 | ||
4489 | /* | ||
4490 | * The prefered block size for layout directed io | ||
4491 | */ | ||
4492 | static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, | ||
4493 | uint32_t *res) | ||
4494 | { | ||
4495 | __be32 *p; | ||
4496 | |||
4497 | dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); | ||
4498 | *res = 0; | ||
4499 | if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) { | ||
4500 | p = xdr_inline_decode(xdr, 4); | ||
4501 | if (unlikely(!p)) { | ||
4502 | print_overflow_msg(__func__, xdr); | ||
4503 | return -EIO; | ||
4504 | } | ||
4505 | *res = be32_to_cpup(p); | ||
4506 | bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE; | ||
4507 | } | ||
4508 | return 0; | ||
4509 | } | ||
4510 | |||
4271 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | 4511 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) |
4272 | { | 4512 | { |
4273 | __be32 *savep; | 4513 | __be32 *savep; |
4274 | uint32_t attrlen, bitmap[2]; | 4514 | uint32_t attrlen, bitmap[3]; |
4275 | int status; | 4515 | int status; |
4276 | 4516 | ||
4277 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4517 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4299,6 +4539,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | |||
4299 | status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); | 4539 | status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); |
4300 | if (status != 0) | 4540 | if (status != 0) |
4301 | goto xdr_error; | 4541 | goto xdr_error; |
4542 | status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); | ||
4543 | if (status) | ||
4544 | goto xdr_error; | ||
4302 | 4545 | ||
4303 | status = verify_attr_len(xdr, savep, attrlen); | 4546 | status = verify_attr_len(xdr, savep, attrlen); |
4304 | xdr_error: | 4547 | xdr_error: |
@@ -4718,7 +4961,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4718 | { | 4961 | { |
4719 | __be32 *savep; | 4962 | __be32 *savep; |
4720 | uint32_t attrlen, | 4963 | uint32_t attrlen, |
4721 | bitmap[2] = {0}; | 4964 | bitmap[3] = {0}; |
4722 | struct kvec *iov = req->rq_rcv_buf.head; | 4965 | struct kvec *iov = req->rq_rcv_buf.head; |
4723 | int status; | 4966 | int status; |
4724 | 4967 | ||
@@ -4977,11 +5220,17 @@ static int decode_exchange_id(struct xdr_stream *xdr, | |||
4977 | if (unlikely(status)) | 5220 | if (unlikely(status)) |
4978 | return status; | 5221 | return status; |
4979 | 5222 | ||
4980 | /* Throw away server_scope */ | 5223 | /* Save server_scope */ |
4981 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); | 5224 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); |
4982 | if (unlikely(status)) | 5225 | if (unlikely(status)) |
4983 | return status; | 5226 | return status; |
4984 | 5227 | ||
5228 | if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) | ||
5229 | return -EIO; | ||
5230 | |||
5231 | memcpy(res->server_scope->server_scope, dummy_str, dummy); | ||
5232 | res->server_scope->server_scope_sz = dummy; | ||
5233 | |||
4985 | /* Throw away Implementation id array */ | 5234 | /* Throw away Implementation id array */ |
4986 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); | 5235 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); |
4987 | if (unlikely(status)) | 5236 | if (unlikely(status)) |
@@ -5141,6 +5390,53 @@ out_overflow: | |||
5141 | } | 5390 | } |
5142 | 5391 | ||
5143 | #if defined(CONFIG_NFS_V4_1) | 5392 | #if defined(CONFIG_NFS_V4_1) |
5393 | /* | ||
5394 | * TODO: Need to handle case when EOF != true; | ||
5395 | */ | ||
5396 | static int decode_getdevicelist(struct xdr_stream *xdr, | ||
5397 | struct pnfs_devicelist *res) | ||
5398 | { | ||
5399 | __be32 *p; | ||
5400 | int status, i; | ||
5401 | struct nfs_writeverf verftemp; | ||
5402 | |||
5403 | status = decode_op_hdr(xdr, OP_GETDEVICELIST); | ||
5404 | if (status) | ||
5405 | return status; | ||
5406 | |||
5407 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
5408 | if (unlikely(!p)) | ||
5409 | goto out_overflow; | ||
5410 | |||
5411 | /* TODO: Skip cookie for now */ | ||
5412 | p += 2; | ||
5413 | |||
5414 | /* Read verifier */ | ||
5415 | p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); | ||
5416 | |||
5417 | res->num_devs = be32_to_cpup(p); | ||
5418 | |||
5419 | dprintk("%s: num_dev %d\n", __func__, res->num_devs); | ||
5420 | |||
5421 | if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { | ||
5422 | printk(KERN_ERR "%s too many result dev_num %u\n", | ||
5423 | __func__, res->num_devs); | ||
5424 | return -EIO; | ||
5425 | } | ||
5426 | |||
5427 | p = xdr_inline_decode(xdr, | ||
5428 | res->num_devs * NFS4_DEVICEID4_SIZE + 4); | ||
5429 | if (unlikely(!p)) | ||
5430 | goto out_overflow; | ||
5431 | for (i = 0; i < res->num_devs; i++) | ||
5432 | p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, | ||
5433 | NFS4_DEVICEID4_SIZE); | ||
5434 | res->eof = be32_to_cpup(p); | ||
5435 | return 0; | ||
5436 | out_overflow: | ||
5437 | print_overflow_msg(__func__, xdr); | ||
5438 | return -EIO; | ||
5439 | } | ||
5144 | 5440 | ||
5145 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5441 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
5146 | struct pnfs_device *pdev) | 5442 | struct pnfs_device *pdev) |
@@ -5303,6 +5599,7 @@ static int decode_layoutcommit(struct xdr_stream *xdr, | |||
5303 | int status; | 5599 | int status; |
5304 | 5600 | ||
5305 | status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); | 5601 | status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); |
5602 | res->status = status; | ||
5306 | if (status) | 5603 | if (status) |
5307 | return status; | 5604 | return status; |
5308 | 5605 | ||
@@ -5322,6 +5619,55 @@ out_overflow: | |||
5322 | print_overflow_msg(__func__, xdr); | 5619 | print_overflow_msg(__func__, xdr); |
5323 | return -EIO; | 5620 | return -EIO; |
5324 | } | 5621 | } |
5622 | |||
5623 | static int decode_test_stateid(struct xdr_stream *xdr, | ||
5624 | struct nfs41_test_stateid_res *res) | ||
5625 | { | ||
5626 | __be32 *p; | ||
5627 | int status; | ||
5628 | int num_res; | ||
5629 | |||
5630 | status = decode_op_hdr(xdr, OP_TEST_STATEID); | ||
5631 | if (status) | ||
5632 | return status; | ||
5633 | |||
5634 | p = xdr_inline_decode(xdr, 4); | ||
5635 | if (unlikely(!p)) | ||
5636 | goto out_overflow; | ||
5637 | num_res = be32_to_cpup(p++); | ||
5638 | if (num_res != 1) | ||
5639 | goto out; | ||
5640 | |||
5641 | p = xdr_inline_decode(xdr, 4); | ||
5642 | if (unlikely(!p)) | ||
5643 | goto out_overflow; | ||
5644 | res->status = be32_to_cpup(p++); | ||
5645 | return res->status; | ||
5646 | out_overflow: | ||
5647 | print_overflow_msg(__func__, xdr); | ||
5648 | out: | ||
5649 | return -EIO; | ||
5650 | } | ||
5651 | |||
5652 | static int decode_free_stateid(struct xdr_stream *xdr, | ||
5653 | struct nfs41_free_stateid_res *res) | ||
5654 | { | ||
5655 | __be32 *p; | ||
5656 | int status; | ||
5657 | |||
5658 | status = decode_op_hdr(xdr, OP_FREE_STATEID); | ||
5659 | if (status) | ||
5660 | return status; | ||
5661 | |||
5662 | p = xdr_inline_decode(xdr, 4); | ||
5663 | if (unlikely(!p)) | ||
5664 | goto out_overflow; | ||
5665 | res->status = be32_to_cpup(p++); | ||
5666 | return res->status; | ||
5667 | out_overflow: | ||
5668 | print_overflow_msg(__func__, xdr); | ||
5669 | return -EIO; | ||
5670 | } | ||
5325 | #endif /* CONFIG_NFS_V4_1 */ | 5671 | #endif /* CONFIG_NFS_V4_1 */ |
5326 | 5672 | ||
5327 | /* | 5673 | /* |
@@ -6366,6 +6712,32 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, | |||
6366 | } | 6712 | } |
6367 | 6713 | ||
6368 | /* | 6714 | /* |
6715 | * Decode GETDEVICELIST response | ||
6716 | */ | ||
6717 | static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, | ||
6718 | struct xdr_stream *xdr, | ||
6719 | struct nfs4_getdevicelist_res *res) | ||
6720 | { | ||
6721 | struct compound_hdr hdr; | ||
6722 | int status; | ||
6723 | |||
6724 | dprintk("encoding getdevicelist!\n"); | ||
6725 | |||
6726 | status = decode_compound_hdr(xdr, &hdr); | ||
6727 | if (status != 0) | ||
6728 | goto out; | ||
6729 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6730 | if (status != 0) | ||
6731 | goto out; | ||
6732 | status = decode_putfh(xdr); | ||
6733 | if (status != 0) | ||
6734 | goto out; | ||
6735 | status = decode_getdevicelist(xdr, res->devlist); | ||
6736 | out: | ||
6737 | return status; | ||
6738 | } | ||
6739 | |||
6740 | /* | ||
6369 | * Decode GETDEVINFO response | 6741 | * Decode GETDEVINFO response |
6370 | */ | 6742 | */ |
6371 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, | 6743 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, |
@@ -6461,6 +6833,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, | |||
6461 | out: | 6833 | out: |
6462 | return status; | 6834 | return status; |
6463 | } | 6835 | } |
6836 | |||
6837 | /* | ||
6838 | * Decode SECINFO_NO_NAME response | ||
6839 | */ | ||
6840 | static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, | ||
6841 | struct xdr_stream *xdr, | ||
6842 | struct nfs4_secinfo_res *res) | ||
6843 | { | ||
6844 | struct compound_hdr hdr; | ||
6845 | int status; | ||
6846 | |||
6847 | status = decode_compound_hdr(xdr, &hdr); | ||
6848 | if (status) | ||
6849 | goto out; | ||
6850 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6851 | if (status) | ||
6852 | goto out; | ||
6853 | status = decode_putrootfh(xdr); | ||
6854 | if (status) | ||
6855 | goto out; | ||
6856 | status = decode_secinfo(xdr, res); | ||
6857 | out: | ||
6858 | return status; | ||
6859 | } | ||
6860 | |||
6861 | /* | ||
6862 | * Decode TEST_STATEID response | ||
6863 | */ | ||
6864 | static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, | ||
6865 | struct xdr_stream *xdr, | ||
6866 | struct nfs41_test_stateid_res *res) | ||
6867 | { | ||
6868 | struct compound_hdr hdr; | ||
6869 | int status; | ||
6870 | |||
6871 | status = decode_compound_hdr(xdr, &hdr); | ||
6872 | if (status) | ||
6873 | goto out; | ||
6874 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6875 | if (status) | ||
6876 | goto out; | ||
6877 | status = decode_test_stateid(xdr, res); | ||
6878 | out: | ||
6879 | return status; | ||
6880 | } | ||
6881 | |||
6882 | /* | ||
6883 | * Decode FREE_STATEID response | ||
6884 | */ | ||
6885 | static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp, | ||
6886 | struct xdr_stream *xdr, | ||
6887 | struct nfs41_free_stateid_res *res) | ||
6888 | { | ||
6889 | struct compound_hdr hdr; | ||
6890 | int status; | ||
6891 | |||
6892 | status = decode_compound_hdr(xdr, &hdr); | ||
6893 | if (status) | ||
6894 | goto out; | ||
6895 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6896 | if (status) | ||
6897 | goto out; | ||
6898 | status = decode_free_stateid(xdr, res); | ||
6899 | out: | ||
6900 | return status; | ||
6901 | } | ||
6464 | #endif /* CONFIG_NFS_V4_1 */ | 6902 | #endif /* CONFIG_NFS_V4_1 */ |
6465 | 6903 | ||
6466 | /** | 6904 | /** |
@@ -6480,7 +6918,7 @@ out: | |||
6480 | int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | 6918 | int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, |
6481 | int plus) | 6919 | int plus) |
6482 | { | 6920 | { |
6483 | uint32_t bitmap[2] = {0}; | 6921 | uint32_t bitmap[3] = {0}; |
6484 | uint32_t len; | 6922 | uint32_t len; |
6485 | __be32 *p = xdr_inline_decode(xdr, 4); | 6923 | __be32 *p = xdr_inline_decode(xdr, 4); |
6486 | if (unlikely(!p)) | 6924 | if (unlikely(!p)) |
@@ -6663,6 +7101,10 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6663 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | 7101 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), |
6664 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), | 7102 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), |
6665 | PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), | 7103 | PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), |
7104 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | ||
7105 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | ||
7106 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | ||
7107 | PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), | ||
6666 | #endif /* CONFIG_NFS_V4_1 */ | 7108 | #endif /* CONFIG_NFS_V4_1 */ |
6667 | }; | 7109 | }; |
6668 | 7110 | ||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8ff2ea3f10ef..d0cda12fddc3 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
479 | for (i = 0; i < ios->numdevs; i++) { | 479 | for (i = 0; i < ios->numdevs; i++) { |
480 | struct osd_sense_info osi; | 480 | struct osd_sense_info osi; |
481 | struct osd_request *or = ios->per_dev[i].or; | 481 | struct osd_request *or = ios->per_dev[i].or; |
482 | unsigned dev; | ||
483 | int ret; | 482 | int ret; |
484 | 483 | ||
485 | if (!or) | 484 | if (!or) |
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
500 | 499 | ||
501 | continue; /* we recovered */ | 500 | continue; /* we recovered */ |
502 | } | 501 | } |
503 | dev = ios->per_dev[i].dev; | 502 | objlayout_io_set_result(&ios->ol_state, i, |
504 | objlayout_io_set_result(&ios->ol_state, dev, | 503 | &ios->layout->comps[i].oc_object_id, |
505 | &ios->layout->comps[dev].oc_object_id, | ||
506 | osd_pri_2_pnfs_err(osi.osd_err_pri), | 504 | osd_pri_2_pnfs_err(osi.osd_err_pri), |
507 | ios->per_dev[i].offset, | 505 | ios->per_dev[i].offset, |
508 | ios->per_dev[i].length, | 506 | ios->per_dev[i].length, |
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | |||
589 | } | 587 | } |
590 | 588 | ||
591 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | 589 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, |
592 | unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, | 590 | unsigned pgbase, struct _objio_per_comp *per_dev, int len, |
593 | gfp_t gfp_flags) | 591 | gfp_t gfp_flags) |
594 | { | 592 | { |
595 | unsigned pg = *cur_pg; | 593 | unsigned pg = *cur_pg; |
594 | int cur_len = len; | ||
596 | struct request_queue *q = | 595 | struct request_queue *q = |
597 | osd_request_queue(_io_od(ios, per_dev->dev)); | 596 | osd_request_queue(_io_od(ios, per_dev->dev)); |
598 | 597 | ||
599 | per_dev->length += cur_len; | ||
600 | |||
601 | if (per_dev->bio == NULL) { | 598 | if (per_dev->bio == NULL) { |
602 | unsigned stripes = ios->layout->num_comps / | 599 | unsigned pages_in_stripe = ios->layout->group_width * |
603 | ios->layout->mirrors_p1; | ||
604 | unsigned pages_in_stripe = stripes * | ||
605 | (ios->layout->stripe_unit / PAGE_SIZE); | 600 | (ios->layout->stripe_unit / PAGE_SIZE); |
606 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | 601 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / |
607 | stripes; | 602 | ios->layout->group_width; |
608 | 603 | ||
609 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | 604 | if (BIO_MAX_PAGES_KMALLOC < bio_size) |
610 | bio_size = BIO_MAX_PAGES_KMALLOC; | 605 | bio_size = BIO_MAX_PAGES_KMALLOC; |
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | |||
632 | } | 627 | } |
633 | BUG_ON(cur_len); | 628 | BUG_ON(cur_len); |
634 | 629 | ||
630 | per_dev->length += len; | ||
635 | *cur_pg = pg; | 631 | *cur_pg = pg; |
636 | return 0; | 632 | return 0; |
637 | } | 633 | } |
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
650 | int ret = 0; | 646 | int ret = 0; |
651 | 647 | ||
652 | while (length) { | 648 | while (length) { |
653 | struct _objio_per_comp *per_dev = &ios->per_dev[dev]; | 649 | struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; |
654 | unsigned cur_len, page_off = 0; | 650 | unsigned cur_len, page_off = 0; |
655 | 651 | ||
656 | if (!per_dev->length) { | 652 | if (!per_dev->length) { |
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
670 | cur_len = stripe_unit; | 666 | cur_len = stripe_unit; |
671 | } | 667 | } |
672 | 668 | ||
673 | if (max_comp < dev) | 669 | if (max_comp < dev - first_dev) |
674 | max_comp = dev; | 670 | max_comp = dev - first_dev; |
675 | } else { | 671 | } else { |
676 | cur_len = stripe_unit; | 672 | cur_len = stripe_unit; |
677 | } | 673 | } |
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
806 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | 802 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; |
807 | unsigned dev = per_dev->dev; | 803 | unsigned dev = per_dev->dev; |
808 | struct pnfs_osd_object_cred *cred = | 804 | struct pnfs_osd_object_cred *cred = |
809 | &ios->layout->comps[dev]; | 805 | &ios->layout->comps[cur_comp]; |
810 | struct osd_obj_id obj = { | 806 | struct osd_obj_id obj = { |
811 | .partition = cred->oc_object_id.oid_partition_id, | 807 | .partition = cred->oc_object_id.oid_partition_id, |
812 | .id = cred->oc_object_id.oid_object_id, | 808 | .id = cred->oc_object_id.oid_object_id, |
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
904 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | 900 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
905 | struct osd_request *or = NULL; | 901 | struct osd_request *or = NULL; |
906 | struct pnfs_osd_object_cred *cred = | 902 | struct pnfs_osd_object_cred *cred = |
907 | &ios->layout->comps[dev]; | 903 | &ios->layout->comps[cur_comp]; |
908 | struct osd_obj_id obj = { | 904 | struct osd_obj_id obj = { |
909 | .partition = cred->oc_object_id.oid_partition_id, | 905 | .partition = cred->oc_object_id.oid_partition_id, |
910 | .id = cred->oc_object_id.oid_object_id, | 906 | .id = cred->oc_object_id.oid_object_id, |
@@ -1000,13 +996,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | |||
1000 | if (!pnfs_generic_pg_test(pgio, prev, req)) | 996 | if (!pnfs_generic_pg_test(pgio, prev, req)) |
1001 | return false; | 997 | return false; |
1002 | 998 | ||
1003 | if (pgio->pg_lseg == NULL) | ||
1004 | return true; | ||
1005 | |||
1006 | return pgio->pg_count + req->wb_bytes <= | 999 | return pgio->pg_count + req->wb_bytes <= |
1007 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; | 1000 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; |
1008 | } | 1001 | } |
1009 | 1002 | ||
1003 | static const struct nfs_pageio_ops objio_pg_read_ops = { | ||
1004 | .pg_init = pnfs_generic_pg_init_read, | ||
1005 | .pg_test = objio_pg_test, | ||
1006 | .pg_doio = pnfs_generic_pg_readpages, | ||
1007 | }; | ||
1008 | |||
1009 | static const struct nfs_pageio_ops objio_pg_write_ops = { | ||
1010 | .pg_init = pnfs_generic_pg_init_write, | ||
1011 | .pg_test = objio_pg_test, | ||
1012 | .pg_doio = pnfs_generic_pg_writepages, | ||
1013 | }; | ||
1014 | |||
1010 | static struct pnfs_layoutdriver_type objlayout_type = { | 1015 | static struct pnfs_layoutdriver_type objlayout_type = { |
1011 | .id = LAYOUT_OSD2_OBJECTS, | 1016 | .id = LAYOUT_OSD2_OBJECTS, |
1012 | .name = "LAYOUT_OSD2_OBJECTS", | 1017 | .name = "LAYOUT_OSD2_OBJECTS", |
@@ -1020,7 +1025,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
1020 | 1025 | ||
1021 | .read_pagelist = objlayout_read_pagelist, | 1026 | .read_pagelist = objlayout_read_pagelist, |
1022 | .write_pagelist = objlayout_write_pagelist, | 1027 | .write_pagelist = objlayout_write_pagelist, |
1023 | .pg_test = objio_pg_test, | 1028 | .pg_read_ops = &objio_pg_read_ops, |
1029 | .pg_write_ops = &objio_pg_write_ops, | ||
1024 | 1030 | ||
1025 | .free_deviceid_node = objio_free_deviceid_node, | 1031 | .free_deviceid_node = objio_free_deviceid_node, |
1026 | 1032 | ||
@@ -1055,5 +1061,7 @@ objlayout_exit(void) | |||
1055 | __func__); | 1061 | __func__); |
1056 | } | 1062 | } |
1057 | 1063 | ||
1064 | MODULE_ALIAS("nfs-layouttype4-2"); | ||
1065 | |||
1058 | module_init(objlayout_init); | 1066 | module_init(objlayout_init); |
1059 | module_exit(objlayout_exit); | 1067 | module_exit(objlayout_exit); |
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index 16fc758e9123..b3918f7ac34d 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, | |||
170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); | 170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); |
171 | layout->olo_comps_index = be32_to_cpup(p++); | 171 | layout->olo_comps_index = be32_to_cpup(p++); |
172 | layout->olo_num_comps = be32_to_cpup(p++); | 172 | layout->olo_num_comps = be32_to_cpup(p++); |
173 | dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, | ||
174 | layout->olo_comps_index, layout->olo_num_comps); | ||
175 | |||
173 | iter->total_comps = layout->olo_num_comps; | 176 | iter->total_comps = layout->olo_num_comps; |
174 | return 0; | 177 | return 0; |
175 | } | 178 | } |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 18449f43c568..b60970cc7f1f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | |||
230 | */ | 230 | */ |
231 | void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | 231 | void nfs_pageio_init(struct nfs_pageio_descriptor *desc, |
232 | struct inode *inode, | 232 | struct inode *inode, |
233 | int (*doio)(struct nfs_pageio_descriptor *), | 233 | const struct nfs_pageio_ops *pg_ops, |
234 | size_t bsize, | 234 | size_t bsize, |
235 | int io_flags) | 235 | int io_flags) |
236 | { | 236 | { |
@@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
240 | desc->pg_bsize = bsize; | 240 | desc->pg_bsize = bsize; |
241 | desc->pg_base = 0; | 241 | desc->pg_base = 0; |
242 | desc->pg_moreio = 0; | 242 | desc->pg_moreio = 0; |
243 | desc->pg_recoalesce = 0; | ||
243 | desc->pg_inode = inode; | 244 | desc->pg_inode = inode; |
244 | desc->pg_doio = doio; | 245 | desc->pg_ops = pg_ops; |
245 | desc->pg_ioflags = io_flags; | 246 | desc->pg_ioflags = io_flags; |
246 | desc->pg_error = 0; | 247 | desc->pg_error = 0; |
247 | desc->pg_lseg = NULL; | 248 | desc->pg_lseg = NULL; |
248 | desc->pg_test = nfs_generic_pg_test; | ||
249 | pnfs_pageio_init(desc, inode); | ||
250 | } | 249 | } |
251 | 250 | ||
252 | /** | 251 | /** |
@@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
276 | return false; | 275 | return false; |
277 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | 276 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) |
278 | return false; | 277 | return false; |
279 | return pgio->pg_test(pgio, prev, req); | 278 | return pgio->pg_ops->pg_test(pgio, prev, req); |
280 | } | 279 | } |
281 | 280 | ||
282 | /** | 281 | /** |
@@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||
297 | if (!nfs_can_coalesce_requests(prev, req, desc)) | 296 | if (!nfs_can_coalesce_requests(prev, req, desc)) |
298 | return 0; | 297 | return 0; |
299 | } else { | 298 | } else { |
299 | if (desc->pg_ops->pg_init) | ||
300 | desc->pg_ops->pg_init(desc, req); | ||
300 | desc->pg_base = req->wb_pgbase; | 301 | desc->pg_base = req->wb_pgbase; |
301 | } | 302 | } |
302 | nfs_list_remove_request(req); | 303 | nfs_list_remove_request(req); |
@@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||
311 | static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | 312 | static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) |
312 | { | 313 | { |
313 | if (!list_empty(&desc->pg_list)) { | 314 | if (!list_empty(&desc->pg_list)) { |
314 | int error = desc->pg_doio(desc); | 315 | int error = desc->pg_ops->pg_doio(desc); |
315 | if (error < 0) | 316 | if (error < 0) |
316 | desc->pg_error = error; | 317 | desc->pg_error = error; |
317 | else | 318 | else |
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | |||
331 | * Returns true if the request 'req' was successfully coalesced into the | 332 | * Returns true if the request 'req' was successfully coalesced into the |
332 | * existing list of pages 'desc'. | 333 | * existing list of pages 'desc'. |
333 | */ | 334 | */ |
334 | int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | 335 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, |
335 | struct nfs_page *req) | 336 | struct nfs_page *req) |
336 | { | 337 | { |
337 | while (!nfs_pageio_do_add_request(desc, req)) { | 338 | while (!nfs_pageio_do_add_request(desc, req)) { |
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
340 | if (desc->pg_error < 0) | 341 | if (desc->pg_error < 0) |
341 | return 0; | 342 | return 0; |
342 | desc->pg_moreio = 0; | 343 | desc->pg_moreio = 0; |
344 | if (desc->pg_recoalesce) | ||
345 | return 0; | ||
343 | } | 346 | } |
344 | return 1; | 347 | return 1; |
345 | } | 348 | } |
346 | 349 | ||
350 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | ||
351 | { | ||
352 | LIST_HEAD(head); | ||
353 | |||
354 | do { | ||
355 | list_splice_init(&desc->pg_list, &head); | ||
356 | desc->pg_bytes_written -= desc->pg_count; | ||
357 | desc->pg_count = 0; | ||
358 | desc->pg_base = 0; | ||
359 | desc->pg_recoalesce = 0; | ||
360 | |||
361 | while (!list_empty(&head)) { | ||
362 | struct nfs_page *req; | ||
363 | |||
364 | req = list_first_entry(&head, struct nfs_page, wb_list); | ||
365 | nfs_list_remove_request(req); | ||
366 | if (__nfs_pageio_add_request(desc, req)) | ||
367 | continue; | ||
368 | if (desc->pg_error < 0) | ||
369 | return 0; | ||
370 | break; | ||
371 | } | ||
372 | } while (desc->pg_recoalesce); | ||
373 | return 1; | ||
374 | } | ||
375 | |||
376 | int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | ||
377 | struct nfs_page *req) | ||
378 | { | ||
379 | int ret; | ||
380 | |||
381 | do { | ||
382 | ret = __nfs_pageio_add_request(desc, req); | ||
383 | if (ret) | ||
384 | break; | ||
385 | if (desc->pg_error < 0) | ||
386 | break; | ||
387 | ret = nfs_do_recoalesce(desc); | ||
388 | } while (ret); | ||
389 | return ret; | ||
390 | } | ||
391 | |||
347 | /** | 392 | /** |
348 | * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor | 393 | * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor |
349 | * @desc: pointer to io descriptor | 394 | * @desc: pointer to io descriptor |
350 | */ | 395 | */ |
351 | void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | 396 | void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) |
352 | { | 397 | { |
353 | nfs_pageio_doio(desc); | 398 | for (;;) { |
399 | nfs_pageio_doio(desc); | ||
400 | if (!desc->pg_recoalesce) | ||
401 | break; | ||
402 | if (!nfs_do_recoalesce(desc)) | ||
403 | break; | ||
404 | } | ||
354 | } | 405 | } |
355 | 406 | ||
356 | /** | 407 | /** |
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) | |||
369 | if (!list_empty(&desc->pg_list)) { | 420 | if (!list_empty(&desc->pg_list)) { |
370 | struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); | 421 | struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); |
371 | if (index != prev->wb_index + 1) | 422 | if (index != prev->wb_index + 1) |
372 | nfs_pageio_doio(desc); | 423 | nfs_pageio_complete(desc); |
373 | } | 424 | } |
374 | } | 425 | } |
375 | 426 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 29c0ca7fc347..e550e8836c37 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -28,6 +28,7 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <linux/nfs_fs.h> | 30 | #include <linux/nfs_fs.h> |
31 | #include <linux/nfs_page.h> | ||
31 | #include "internal.h" | 32 | #include "internal.h" |
32 | #include "pnfs.h" | 33 | #include "pnfs.h" |
33 | #include "iostat.h" | 34 | #include "iostat.h" |
@@ -75,8 +76,11 @@ find_pnfs_driver(u32 id) | |||
75 | void | 76 | void |
76 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | 77 | unset_pnfs_layoutdriver(struct nfs_server *nfss) |
77 | { | 78 | { |
78 | if (nfss->pnfs_curr_ld) | 79 | if (nfss->pnfs_curr_ld) { |
80 | if (nfss->pnfs_curr_ld->clear_layoutdriver) | ||
81 | nfss->pnfs_curr_ld->clear_layoutdriver(nfss); | ||
79 | module_put(nfss->pnfs_curr_ld->owner); | 82 | module_put(nfss->pnfs_curr_ld->owner); |
83 | } | ||
80 | nfss->pnfs_curr_ld = NULL; | 84 | nfss->pnfs_curr_ld = NULL; |
81 | } | 85 | } |
82 | 86 | ||
@@ -87,7 +91,8 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) | |||
87 | * @id layout type. Zero (illegal layout type) indicates pNFS not in use. | 91 | * @id layout type. Zero (illegal layout type) indicates pNFS not in use. |
88 | */ | 92 | */ |
89 | void | 93 | void |
90 | set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | 94 | set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, |
95 | u32 id) | ||
91 | { | 96 | { |
92 | struct pnfs_layoutdriver_type *ld_type = NULL; | 97 | struct pnfs_layoutdriver_type *ld_type = NULL; |
93 | 98 | ||
@@ -114,6 +119,13 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | |||
114 | goto out_no_driver; | 119 | goto out_no_driver; |
115 | } | 120 | } |
116 | server->pnfs_curr_ld = ld_type; | 121 | server->pnfs_curr_ld = ld_type; |
122 | if (ld_type->set_layoutdriver | ||
123 | && ld_type->set_layoutdriver(server, mntfh)) { | ||
124 | printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", | ||
125 | __func__, id); | ||
126 | module_put(ld_type->owner); | ||
127 | goto out_no_driver; | ||
128 | } | ||
117 | 129 | ||
118 | dprintk("%s: pNFS module for %u set\n", __func__, id); | 130 | dprintk("%s: pNFS module for %u set\n", __func__, id); |
119 | return; | 131 | return; |
@@ -189,6 +201,7 @@ static void | |||
189 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) | 201 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) |
190 | { | 202 | { |
191 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; | 203 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; |
204 | put_rpccred(lo->plh_lc_cred); | ||
192 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); | 205 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); |
193 | } | 206 | } |
194 | 207 | ||
@@ -223,6 +236,7 @@ static void | |||
223 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) | 236 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) |
224 | { | 237 | { |
225 | INIT_LIST_HEAD(&lseg->pls_list); | 238 | INIT_LIST_HEAD(&lseg->pls_list); |
239 | INIT_LIST_HEAD(&lseg->pls_lc_list); | ||
226 | atomic_set(&lseg->pls_refcount, 1); | 240 | atomic_set(&lseg->pls_refcount, 1); |
227 | smp_mb(); | 241 | smp_mb(); |
228 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); | 242 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); |
@@ -448,11 +462,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
448 | void | 462 | void |
449 | pnfs_destroy_all_layouts(struct nfs_client *clp) | 463 | pnfs_destroy_all_layouts(struct nfs_client *clp) |
450 | { | 464 | { |
465 | struct nfs_server *server; | ||
451 | struct pnfs_layout_hdr *lo; | 466 | struct pnfs_layout_hdr *lo; |
452 | LIST_HEAD(tmp_list); | 467 | LIST_HEAD(tmp_list); |
453 | 468 | ||
469 | nfs4_deviceid_mark_client_invalid(clp); | ||
470 | nfs4_deviceid_purge_client(clp); | ||
471 | |||
454 | spin_lock(&clp->cl_lock); | 472 | spin_lock(&clp->cl_lock); |
455 | list_splice_init(&clp->cl_layouts, &tmp_list); | 473 | rcu_read_lock(); |
474 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | ||
475 | if (!list_empty(&server->layouts)) | ||
476 | list_splice_init(&server->layouts, &tmp_list); | ||
477 | } | ||
478 | rcu_read_unlock(); | ||
456 | spin_unlock(&clp->cl_lock); | 479 | spin_unlock(&clp->cl_lock); |
457 | 480 | ||
458 | while (!list_empty(&tmp_list)) { | 481 | while (!list_empty(&tmp_list)) { |
@@ -661,6 +684,7 @@ _pnfs_return_layout(struct inode *ino) | |||
661 | lrp->args.stateid = stateid; | 684 | lrp->args.stateid = stateid; |
662 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | 685 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; |
663 | lrp->args.inode = ino; | 686 | lrp->args.inode = ino; |
687 | lrp->args.layout = lo; | ||
664 | lrp->clp = NFS_SERVER(ino)->nfs_client; | 688 | lrp->clp = NFS_SERVER(ino)->nfs_client; |
665 | 689 | ||
666 | status = nfs4_proc_layoutreturn(lrp); | 690 | status = nfs4_proc_layoutreturn(lrp); |
@@ -805,7 +829,9 @@ out: | |||
805 | } | 829 | } |
806 | 830 | ||
807 | static struct pnfs_layout_hdr * | 831 | static struct pnfs_layout_hdr * |
808 | alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) | 832 | alloc_init_layout_hdr(struct inode *ino, |
833 | struct nfs_open_context *ctx, | ||
834 | gfp_t gfp_flags) | ||
809 | { | 835 | { |
810 | struct pnfs_layout_hdr *lo; | 836 | struct pnfs_layout_hdr *lo; |
811 | 837 | ||
@@ -817,11 +843,14 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) | |||
817 | INIT_LIST_HEAD(&lo->plh_segs); | 843 | INIT_LIST_HEAD(&lo->plh_segs); |
818 | INIT_LIST_HEAD(&lo->plh_bulk_recall); | 844 | INIT_LIST_HEAD(&lo->plh_bulk_recall); |
819 | lo->plh_inode = ino; | 845 | lo->plh_inode = ino; |
846 | lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); | ||
820 | return lo; | 847 | return lo; |
821 | } | 848 | } |
822 | 849 | ||
823 | static struct pnfs_layout_hdr * | 850 | static struct pnfs_layout_hdr * |
824 | pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | 851 | pnfs_find_alloc_layout(struct inode *ino, |
852 | struct nfs_open_context *ctx, | ||
853 | gfp_t gfp_flags) | ||
825 | { | 854 | { |
826 | struct nfs_inode *nfsi = NFS_I(ino); | 855 | struct nfs_inode *nfsi = NFS_I(ino); |
827 | struct pnfs_layout_hdr *new = NULL; | 856 | struct pnfs_layout_hdr *new = NULL; |
@@ -836,7 +865,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | |||
836 | return nfsi->layout; | 865 | return nfsi->layout; |
837 | } | 866 | } |
838 | spin_unlock(&ino->i_lock); | 867 | spin_unlock(&ino->i_lock); |
839 | new = alloc_init_layout_hdr(ino, gfp_flags); | 868 | new = alloc_init_layout_hdr(ino, ctx, gfp_flags); |
840 | spin_lock(&ino->i_lock); | 869 | spin_lock(&ino->i_lock); |
841 | 870 | ||
842 | if (likely(nfsi->layout == NULL)) /* Won the race? */ | 871 | if (likely(nfsi->layout == NULL)) /* Won the race? */ |
@@ -920,7 +949,8 @@ pnfs_update_layout(struct inode *ino, | |||
920 | }; | 949 | }; |
921 | unsigned pg_offset; | 950 | unsigned pg_offset; |
922 | struct nfs_inode *nfsi = NFS_I(ino); | 951 | struct nfs_inode *nfsi = NFS_I(ino); |
923 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | 952 | struct nfs_server *server = NFS_SERVER(ino); |
953 | struct nfs_client *clp = server->nfs_client; | ||
924 | struct pnfs_layout_hdr *lo; | 954 | struct pnfs_layout_hdr *lo; |
925 | struct pnfs_layout_segment *lseg = NULL; | 955 | struct pnfs_layout_segment *lseg = NULL; |
926 | bool first = false; | 956 | bool first = false; |
@@ -928,7 +958,7 @@ pnfs_update_layout(struct inode *ino, | |||
928 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | 958 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) |
929 | return NULL; | 959 | return NULL; |
930 | spin_lock(&ino->i_lock); | 960 | spin_lock(&ino->i_lock); |
931 | lo = pnfs_find_alloc_layout(ino, gfp_flags); | 961 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
932 | if (lo == NULL) { | 962 | if (lo == NULL) { |
933 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); | 963 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); |
934 | goto out_unlock; | 964 | goto out_unlock; |
@@ -964,7 +994,7 @@ pnfs_update_layout(struct inode *ino, | |||
964 | */ | 994 | */ |
965 | spin_lock(&clp->cl_lock); | 995 | spin_lock(&clp->cl_lock); |
966 | BUG_ON(!list_empty(&lo->plh_layouts)); | 996 | BUG_ON(!list_empty(&lo->plh_layouts)); |
967 | list_add_tail(&lo->plh_layouts, &clp->cl_layouts); | 997 | list_add_tail(&lo->plh_layouts, &server->layouts); |
968 | spin_unlock(&clp->cl_lock); | 998 | spin_unlock(&clp->cl_lock); |
969 | } | 999 | } |
970 | 1000 | ||
@@ -973,7 +1003,8 @@ pnfs_update_layout(struct inode *ino, | |||
973 | arg.offset -= pg_offset; | 1003 | arg.offset -= pg_offset; |
974 | arg.length += pg_offset; | 1004 | arg.length += pg_offset; |
975 | } | 1005 | } |
976 | arg.length = PAGE_CACHE_ALIGN(arg.length); | 1006 | if (arg.length != NFS4_MAX_UINT64) |
1007 | arg.length = PAGE_CACHE_ALIGN(arg.length); | ||
977 | 1008 | ||
978 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 1009 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); |
979 | if (!lseg && first) { | 1010 | if (!lseg && first) { |
@@ -991,6 +1022,7 @@ out_unlock: | |||
991 | spin_unlock(&ino->i_lock); | 1022 | spin_unlock(&ino->i_lock); |
992 | goto out; | 1023 | goto out; |
993 | } | 1024 | } |
1025 | EXPORT_SYMBOL_GPL(pnfs_update_layout); | ||
994 | 1026 | ||
995 | int | 1027 | int |
996 | pnfs_layout_process(struct nfs4_layoutget *lgp) | 1028 | pnfs_layout_process(struct nfs4_layoutget *lgp) |
@@ -1048,35 +1080,71 @@ out_forget_reply: | |||
1048 | goto out; | 1080 | goto out; |
1049 | } | 1081 | } |
1050 | 1082 | ||
1083 | void | ||
1084 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | ||
1085 | { | ||
1086 | BUG_ON(pgio->pg_lseg != NULL); | ||
1087 | |||
1088 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
1089 | req->wb_context, | ||
1090 | req_offset(req), | ||
1091 | req->wb_bytes, | ||
1092 | IOMODE_READ, | ||
1093 | GFP_KERNEL); | ||
1094 | /* If no lseg, fall back to read through mds */ | ||
1095 | if (pgio->pg_lseg == NULL) | ||
1096 | nfs_pageio_reset_read_mds(pgio); | ||
1097 | |||
1098 | } | ||
1099 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); | ||
1100 | |||
1101 | void | ||
1102 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | ||
1103 | { | ||
1104 | BUG_ON(pgio->pg_lseg != NULL); | ||
1105 | |||
1106 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
1107 | req->wb_context, | ||
1108 | req_offset(req), | ||
1109 | req->wb_bytes, | ||
1110 | IOMODE_RW, | ||
1111 | GFP_NOFS); | ||
1112 | /* If no lseg, fall back to write through mds */ | ||
1113 | if (pgio->pg_lseg == NULL) | ||
1114 | nfs_pageio_reset_write_mds(pgio); | ||
1115 | } | ||
1116 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | ||
1117 | |||
1051 | bool | 1118 | bool |
1052 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1119 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) |
1053 | struct nfs_page *req) | ||
1054 | { | 1120 | { |
1055 | enum pnfs_iomode access_type; | 1121 | struct nfs_server *server = NFS_SERVER(inode); |
1056 | gfp_t gfp_flags; | 1122 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; |
1057 | 1123 | ||
1058 | /* We assume that pg_ioflags == 0 iff we're reading a page */ | 1124 | if (ld == NULL) |
1059 | if (pgio->pg_ioflags == 0) { | 1125 | return false; |
1060 | access_type = IOMODE_READ; | 1126 | nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); |
1061 | gfp_flags = GFP_KERNEL; | 1127 | return true; |
1062 | } else { | 1128 | } |
1063 | access_type = IOMODE_RW; | ||
1064 | gfp_flags = GFP_NOFS; | ||
1065 | } | ||
1066 | 1129 | ||
1067 | if (pgio->pg_lseg == NULL) { | 1130 | bool |
1068 | if (pgio->pg_count != prev->wb_bytes) | 1131 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) |
1069 | return true; | 1132 | { |
1070 | /* This is first coelesce call for a series of nfs_pages */ | 1133 | struct nfs_server *server = NFS_SERVER(inode); |
1071 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1134 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; |
1072 | prev->wb_context, | 1135 | |
1073 | req_offset(prev), | 1136 | if (ld == NULL) |
1074 | pgio->pg_count, | 1137 | return false; |
1075 | access_type, | 1138 | nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); |
1076 | gfp_flags); | 1139 | return true; |
1077 | if (pgio->pg_lseg == NULL) | 1140 | } |
1078 | return true; | 1141 | |
1079 | } | 1142 | bool |
1143 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | ||
1144 | struct nfs_page *req) | ||
1145 | { | ||
1146 | if (pgio->pg_lseg == NULL) | ||
1147 | return nfs_generic_pg_test(pgio, prev, req); | ||
1080 | 1148 | ||
1081 | /* | 1149 | /* |
1082 | * Test if a nfs_page is fully contained in the pnfs_layout_range. | 1150 | * Test if a nfs_page is fully contained in the pnfs_layout_range. |
@@ -1120,15 +1188,30 @@ pnfs_ld_write_done(struct nfs_write_data *data) | |||
1120 | } | 1188 | } |
1121 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | 1189 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); |
1122 | 1190 | ||
1123 | enum pnfs_try_status | 1191 | static void |
1192 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | ||
1193 | struct nfs_write_data *data) | ||
1194 | { | ||
1195 | list_splice_tail_init(&data->pages, &desc->pg_list); | ||
1196 | if (data->req && list_empty(&data->req->wb_list)) | ||
1197 | nfs_list_add_request(data->req, &desc->pg_list); | ||
1198 | nfs_pageio_reset_write_mds(desc); | ||
1199 | desc->pg_recoalesce = 1; | ||
1200 | nfs_writedata_release(data); | ||
1201 | } | ||
1202 | |||
1203 | static enum pnfs_try_status | ||
1124 | pnfs_try_to_write_data(struct nfs_write_data *wdata, | 1204 | pnfs_try_to_write_data(struct nfs_write_data *wdata, |
1125 | const struct rpc_call_ops *call_ops, int how) | 1205 | const struct rpc_call_ops *call_ops, |
1206 | struct pnfs_layout_segment *lseg, | ||
1207 | int how) | ||
1126 | { | 1208 | { |
1127 | struct inode *inode = wdata->inode; | 1209 | struct inode *inode = wdata->inode; |
1128 | enum pnfs_try_status trypnfs; | 1210 | enum pnfs_try_status trypnfs; |
1129 | struct nfs_server *nfss = NFS_SERVER(inode); | 1211 | struct nfs_server *nfss = NFS_SERVER(inode); |
1130 | 1212 | ||
1131 | wdata->mds_ops = call_ops; | 1213 | wdata->mds_ops = call_ops; |
1214 | wdata->lseg = get_lseg(lseg); | ||
1132 | 1215 | ||
1133 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, | 1216 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, |
1134 | inode->i_ino, wdata->args.count, wdata->args.offset, how); | 1217 | inode->i_ino, wdata->args.count, wdata->args.offset, how); |
@@ -1144,6 +1227,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, | |||
1144 | return trypnfs; | 1227 | return trypnfs; |
1145 | } | 1228 | } |
1146 | 1229 | ||
1230 | static void | ||
1231 | pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) | ||
1232 | { | ||
1233 | struct nfs_write_data *data; | ||
1234 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | ||
1235 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | ||
1236 | |||
1237 | desc->pg_lseg = NULL; | ||
1238 | while (!list_empty(head)) { | ||
1239 | enum pnfs_try_status trypnfs; | ||
1240 | |||
1241 | data = list_entry(head->next, struct nfs_write_data, list); | ||
1242 | list_del_init(&data->list); | ||
1243 | |||
1244 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); | ||
1245 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
1246 | pnfs_write_through_mds(desc, data); | ||
1247 | } | ||
1248 | put_lseg(lseg); | ||
1249 | } | ||
1250 | |||
1251 | int | ||
1252 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | ||
1253 | { | ||
1254 | LIST_HEAD(head); | ||
1255 | int ret; | ||
1256 | |||
1257 | ret = nfs_generic_flush(desc, &head); | ||
1258 | if (ret != 0) { | ||
1259 | put_lseg(desc->pg_lseg); | ||
1260 | desc->pg_lseg = NULL; | ||
1261 | return ret; | ||
1262 | } | ||
1263 | pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); | ||
1264 | return 0; | ||
1265 | } | ||
1266 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | ||
1267 | |||
1147 | /* | 1268 | /* |
1148 | * Called by non rpc-based layout drivers | 1269 | * Called by non rpc-based layout drivers |
1149 | */ | 1270 | */ |
@@ -1167,18 +1288,32 @@ pnfs_ld_read_done(struct nfs_read_data *data) | |||
1167 | } | 1288 | } |
1168 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | 1289 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); |
1169 | 1290 | ||
1291 | static void | ||
1292 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | ||
1293 | struct nfs_read_data *data) | ||
1294 | { | ||
1295 | list_splice_tail_init(&data->pages, &desc->pg_list); | ||
1296 | if (data->req && list_empty(&data->req->wb_list)) | ||
1297 | nfs_list_add_request(data->req, &desc->pg_list); | ||
1298 | nfs_pageio_reset_read_mds(desc); | ||
1299 | desc->pg_recoalesce = 1; | ||
1300 | nfs_readdata_release(data); | ||
1301 | } | ||
1302 | |||
1170 | /* | 1303 | /* |
1171 | * Call the appropriate parallel I/O subsystem read function. | 1304 | * Call the appropriate parallel I/O subsystem read function. |
1172 | */ | 1305 | */ |
1173 | enum pnfs_try_status | 1306 | static enum pnfs_try_status |
1174 | pnfs_try_to_read_data(struct nfs_read_data *rdata, | 1307 | pnfs_try_to_read_data(struct nfs_read_data *rdata, |
1175 | const struct rpc_call_ops *call_ops) | 1308 | const struct rpc_call_ops *call_ops, |
1309 | struct pnfs_layout_segment *lseg) | ||
1176 | { | 1310 | { |
1177 | struct inode *inode = rdata->inode; | 1311 | struct inode *inode = rdata->inode; |
1178 | struct nfs_server *nfss = NFS_SERVER(inode); | 1312 | struct nfs_server *nfss = NFS_SERVER(inode); |
1179 | enum pnfs_try_status trypnfs; | 1313 | enum pnfs_try_status trypnfs; |
1180 | 1314 | ||
1181 | rdata->mds_ops = call_ops; | 1315 | rdata->mds_ops = call_ops; |
1316 | rdata->lseg = get_lseg(lseg); | ||
1182 | 1317 | ||
1183 | dprintk("%s: Reading ino:%lu %u@%llu\n", | 1318 | dprintk("%s: Reading ino:%lu %u@%llu\n", |
1184 | __func__, inode->i_ino, rdata->args.count, rdata->args.offset); | 1319 | __func__, inode->i_ino, rdata->args.count, rdata->args.offset); |
@@ -1194,17 +1329,56 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, | |||
1194 | return trypnfs; | 1329 | return trypnfs; |
1195 | } | 1330 | } |
1196 | 1331 | ||
1332 | static void | ||
1333 | pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) | ||
1334 | { | ||
1335 | struct nfs_read_data *data; | ||
1336 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | ||
1337 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | ||
1338 | |||
1339 | desc->pg_lseg = NULL; | ||
1340 | while (!list_empty(head)) { | ||
1341 | enum pnfs_try_status trypnfs; | ||
1342 | |||
1343 | data = list_entry(head->next, struct nfs_read_data, list); | ||
1344 | list_del_init(&data->list); | ||
1345 | |||
1346 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); | ||
1347 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
1348 | pnfs_read_through_mds(desc, data); | ||
1349 | } | ||
1350 | put_lseg(lseg); | ||
1351 | } | ||
1352 | |||
1353 | int | ||
1354 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | ||
1355 | { | ||
1356 | LIST_HEAD(head); | ||
1357 | int ret; | ||
1358 | |||
1359 | ret = nfs_generic_pagein(desc, &head); | ||
1360 | if (ret != 0) { | ||
1361 | put_lseg(desc->pg_lseg); | ||
1362 | desc->pg_lseg = NULL; | ||
1363 | return ret; | ||
1364 | } | ||
1365 | pnfs_do_multiple_reads(desc, &head); | ||
1366 | return 0; | ||
1367 | } | ||
1368 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); | ||
1369 | |||
1197 | /* | 1370 | /* |
1198 | * Currently there is only one (whole file) write lseg. | 1371 | * There can be multiple RW segments. |
1199 | */ | 1372 | */ |
1200 | static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode) | 1373 | static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) |
1201 | { | 1374 | { |
1202 | struct pnfs_layout_segment *lseg, *rv = NULL; | 1375 | struct pnfs_layout_segment *lseg; |
1203 | 1376 | ||
1204 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) | 1377 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { |
1205 | if (lseg->pls_range.iomode == IOMODE_RW) | 1378 | if (lseg->pls_range.iomode == IOMODE_RW && |
1206 | rv = lseg; | 1379 | test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) |
1207 | return rv; | 1380 | list_add(&lseg->pls_lc_list, listp); |
1381 | } | ||
1208 | } | 1382 | } |
1209 | 1383 | ||
1210 | void | 1384 | void |
@@ -1216,17 +1390,19 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) | |||
1216 | 1390 | ||
1217 | spin_lock(&nfsi->vfs_inode.i_lock); | 1391 | spin_lock(&nfsi->vfs_inode.i_lock); |
1218 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1392 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
1219 | /* references matched in nfs4_layoutcommit_release */ | ||
1220 | get_lseg(wdata->lseg); | ||
1221 | wdata->lseg->pls_lc_cred = | ||
1222 | get_rpccred(wdata->args.context->state->owner->so_cred); | ||
1223 | mark_as_dirty = true; | 1393 | mark_as_dirty = true; |
1224 | dprintk("%s: Set layoutcommit for inode %lu ", | 1394 | dprintk("%s: Set layoutcommit for inode %lu ", |
1225 | __func__, wdata->inode->i_ino); | 1395 | __func__, wdata->inode->i_ino); |
1226 | } | 1396 | } |
1227 | if (end_pos > wdata->lseg->pls_end_pos) | 1397 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { |
1228 | wdata->lseg->pls_end_pos = end_pos; | 1398 | /* references matched in nfs4_layoutcommit_release */ |
1399 | get_lseg(wdata->lseg); | ||
1400 | } | ||
1401 | if (end_pos > nfsi->layout->plh_lwb) | ||
1402 | nfsi->layout->plh_lwb = end_pos; | ||
1229 | spin_unlock(&nfsi->vfs_inode.i_lock); | 1403 | spin_unlock(&nfsi->vfs_inode.i_lock); |
1404 | dprintk("%s: lseg %p end_pos %llu\n", | ||
1405 | __func__, wdata->lseg, nfsi->layout->plh_lwb); | ||
1230 | 1406 | ||
1231 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | 1407 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one |
1232 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | 1408 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ |
@@ -1235,6 +1411,14 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) | |||
1235 | } | 1411 | } |
1236 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 1412 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
1237 | 1413 | ||
1414 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | ||
1415 | { | ||
1416 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | ||
1417 | |||
1418 | if (nfss->pnfs_curr_ld->cleanup_layoutcommit) | ||
1419 | nfss->pnfs_curr_ld->cleanup_layoutcommit(data); | ||
1420 | } | ||
1421 | |||
1238 | /* | 1422 | /* |
1239 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and | 1423 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and |
1240 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough | 1424 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough |
@@ -1248,8 +1432,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1248 | { | 1432 | { |
1249 | struct nfs4_layoutcommit_data *data; | 1433 | struct nfs4_layoutcommit_data *data; |
1250 | struct nfs_inode *nfsi = NFS_I(inode); | 1434 | struct nfs_inode *nfsi = NFS_I(inode); |
1251 | struct pnfs_layout_segment *lseg; | ||
1252 | struct rpc_cred *cred; | ||
1253 | loff_t end_pos; | 1435 | loff_t end_pos; |
1254 | int status = 0; | 1436 | int status = 0; |
1255 | 1437 | ||
@@ -1266,30 +1448,25 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1266 | goto out; | 1448 | goto out; |
1267 | } | 1449 | } |
1268 | 1450 | ||
1451 | INIT_LIST_HEAD(&data->lseg_list); | ||
1269 | spin_lock(&inode->i_lock); | 1452 | spin_lock(&inode->i_lock); |
1270 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1453 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
1271 | spin_unlock(&inode->i_lock); | 1454 | spin_unlock(&inode->i_lock); |
1272 | kfree(data); | 1455 | kfree(data); |
1273 | goto out; | 1456 | goto out; |
1274 | } | 1457 | } |
1275 | /* | ||
1276 | * Currently only one (whole file) write lseg which is referenced | ||
1277 | * in pnfs_set_layoutcommit and will be found. | ||
1278 | */ | ||
1279 | lseg = pnfs_list_write_lseg(inode); | ||
1280 | 1458 | ||
1281 | end_pos = lseg->pls_end_pos; | 1459 | pnfs_list_write_lseg(inode, &data->lseg_list); |
1282 | cred = lseg->pls_lc_cred; | 1460 | |
1283 | lseg->pls_end_pos = 0; | 1461 | end_pos = nfsi->layout->plh_lwb; |
1284 | lseg->pls_lc_cred = NULL; | 1462 | nfsi->layout->plh_lwb = 0; |
1285 | 1463 | ||
1286 | memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, | 1464 | memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, |
1287 | sizeof(nfsi->layout->plh_stateid.data)); | 1465 | sizeof(nfsi->layout->plh_stateid.data)); |
1288 | spin_unlock(&inode->i_lock); | 1466 | spin_unlock(&inode->i_lock); |
1289 | 1467 | ||
1290 | data->args.inode = inode; | 1468 | data->args.inode = inode; |
1291 | data->lseg = lseg; | 1469 | data->cred = get_rpccred(nfsi->layout->plh_lc_cred); |
1292 | data->cred = cred; | ||
1293 | nfs_fattr_init(&data->fattr); | 1470 | nfs_fattr_init(&data->fattr); |
1294 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; | 1471 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; |
1295 | data->res.fattr = &data->fattr; | 1472 | data->res.fattr = &data->fattr; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 96bf4e6f45be..01cbfd54f3cb 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -36,16 +36,16 @@ | |||
36 | enum { | 36 | enum { |
37 | NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ | 37 | NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ |
38 | NFS_LSEG_ROC, /* roc bit received from server */ | 38 | NFS_LSEG_ROC, /* roc bit received from server */ |
39 | NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ | ||
39 | }; | 40 | }; |
40 | 41 | ||
41 | struct pnfs_layout_segment { | 42 | struct pnfs_layout_segment { |
42 | struct list_head pls_list; | 43 | struct list_head pls_list; |
44 | struct list_head pls_lc_list; | ||
43 | struct pnfs_layout_range pls_range; | 45 | struct pnfs_layout_range pls_range; |
44 | atomic_t pls_refcount; | 46 | atomic_t pls_refcount; |
45 | unsigned long pls_flags; | 47 | unsigned long pls_flags; |
46 | struct pnfs_layout_hdr *pls_layout; | 48 | struct pnfs_layout_hdr *pls_layout; |
47 | struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */ | ||
48 | loff_t pls_end_pos; /* LAYOUTCOMMIT write end */ | ||
49 | }; | 49 | }; |
50 | 50 | ||
51 | enum pnfs_try_status { | 51 | enum pnfs_try_status { |
@@ -80,6 +80,9 @@ struct pnfs_layoutdriver_type { | |||
80 | struct module *owner; | 80 | struct module *owner; |
81 | unsigned flags; | 81 | unsigned flags; |
82 | 82 | ||
83 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); | ||
84 | int (*clear_layoutdriver) (struct nfs_server *); | ||
85 | |||
83 | struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); | 86 | struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); |
84 | void (*free_layout_hdr) (struct pnfs_layout_hdr *); | 87 | void (*free_layout_hdr) (struct pnfs_layout_hdr *); |
85 | 88 | ||
@@ -87,7 +90,8 @@ struct pnfs_layoutdriver_type { | |||
87 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | 90 | void (*free_lseg) (struct pnfs_layout_segment *lseg); |
88 | 91 | ||
89 | /* test for nfs page cache coalescing */ | 92 | /* test for nfs page cache coalescing */ |
90 | bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 93 | const struct nfs_pageio_ops *pg_read_ops; |
94 | const struct nfs_pageio_ops *pg_write_ops; | ||
91 | 95 | ||
92 | /* Returns true if layoutdriver wants to divert this request to | 96 | /* Returns true if layoutdriver wants to divert this request to |
93 | * driver's commit routine. | 97 | * driver's commit routine. |
@@ -109,6 +113,8 @@ struct pnfs_layoutdriver_type { | |||
109 | struct xdr_stream *xdr, | 113 | struct xdr_stream *xdr, |
110 | const struct nfs4_layoutreturn_args *args); | 114 | const struct nfs4_layoutreturn_args *args); |
111 | 115 | ||
116 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); | ||
117 | |||
112 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, | 118 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, |
113 | struct xdr_stream *xdr, | 119 | struct xdr_stream *xdr, |
114 | const struct nfs4_layoutcommit_args *args); | 120 | const struct nfs4_layoutcommit_args *args); |
@@ -124,6 +130,8 @@ struct pnfs_layout_hdr { | |||
124 | unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ | 130 | unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ |
125 | u32 plh_barrier; /* ignore lower seqids */ | 131 | u32 plh_barrier; /* ignore lower seqids */ |
126 | unsigned long plh_flags; | 132 | unsigned long plh_flags; |
133 | loff_t plh_lwb; /* last write byte for layoutcommit */ | ||
134 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ | ||
127 | struct inode *plh_inode; | 135 | struct inode *plh_inode; |
128 | }; | 136 | }; |
129 | 137 | ||
@@ -136,10 +144,21 @@ struct pnfs_device { | |||
136 | unsigned int pglen; | 144 | unsigned int pglen; |
137 | }; | 145 | }; |
138 | 146 | ||
147 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 | ||
148 | |||
149 | struct pnfs_devicelist { | ||
150 | unsigned int eof; | ||
151 | unsigned int num_devs; | ||
152 | struct nfs4_deviceid dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM]; | ||
153 | }; | ||
154 | |||
139 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | 155 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); |
140 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | 156 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); |
141 | 157 | ||
142 | /* nfs4proc.c */ | 158 | /* nfs4proc.c */ |
159 | extern int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
160 | const struct nfs_fh *fh, | ||
161 | struct pnfs_devicelist *devlist); | ||
143 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 162 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
144 | struct pnfs_device *dev); | 163 | struct pnfs_device *dev); |
145 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | 164 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); |
@@ -148,16 +167,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | |||
148 | /* pnfs.c */ | 167 | /* pnfs.c */ |
149 | void get_layout_hdr(struct pnfs_layout_hdr *lo); | 168 | void get_layout_hdr(struct pnfs_layout_hdr *lo); |
150 | void put_lseg(struct pnfs_layout_segment *lseg); | 169 | void put_lseg(struct pnfs_layout_segment *lseg); |
151 | struct pnfs_layout_segment * | 170 | |
152 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 171 | bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); |
153 | loff_t pos, u64 count, enum pnfs_iomode access_type, | 172 | bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); |
154 | gfp_t gfp_flags); | 173 | |
155 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 174 | void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); |
156 | void unset_pnfs_layoutdriver(struct nfs_server *); | 175 | void unset_pnfs_layoutdriver(struct nfs_server *); |
157 | enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, | 176 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); |
158 | const struct rpc_call_ops *, int); | 177 | int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); |
159 | enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, | 178 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); |
160 | const struct rpc_call_ops *); | 179 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); |
161 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); | 180 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); |
162 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | 181 | int pnfs_layout_process(struct nfs4_layoutget *lgp); |
163 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 182 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
@@ -178,10 +197,24 @@ void pnfs_roc_release(struct inode *ino); | |||
178 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 197 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
179 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); | 198 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); |
180 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | 199 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); |
200 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | ||
181 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 201 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
182 | int _pnfs_return_layout(struct inode *); | 202 | int _pnfs_return_layout(struct inode *); |
183 | int pnfs_ld_write_done(struct nfs_write_data *); | 203 | int pnfs_ld_write_done(struct nfs_write_data *); |
184 | int pnfs_ld_read_done(struct nfs_read_data *); | 204 | int pnfs_ld_read_done(struct nfs_read_data *); |
205 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | ||
206 | struct nfs_open_context *ctx, | ||
207 | loff_t pos, | ||
208 | u64 count, | ||
209 | enum pnfs_iomode iomode, | ||
210 | gfp_t gfp_flags); | ||
211 | |||
212 | void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); | ||
213 | |||
214 | /* nfs4_deviceid_flags */ | ||
215 | enum { | ||
216 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ | ||
217 | }; | ||
185 | 218 | ||
186 | /* pnfs_dev.c */ | 219 | /* pnfs_dev.c */ |
187 | struct nfs4_deviceid_node { | 220 | struct nfs4_deviceid_node { |
@@ -189,13 +222,13 @@ struct nfs4_deviceid_node { | |||
189 | struct hlist_node tmpnode; | 222 | struct hlist_node tmpnode; |
190 | const struct pnfs_layoutdriver_type *ld; | 223 | const struct pnfs_layoutdriver_type *ld; |
191 | const struct nfs_client *nfs_client; | 224 | const struct nfs_client *nfs_client; |
225 | unsigned long flags; | ||
192 | struct nfs4_deviceid deviceid; | 226 | struct nfs4_deviceid deviceid; |
193 | atomic_t ref; | 227 | atomic_t ref; |
194 | }; | 228 | }; |
195 | 229 | ||
196 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); | 230 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); |
197 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 231 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
198 | struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
199 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 232 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
200 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | 233 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, |
201 | const struct pnfs_layoutdriver_type *, | 234 | const struct pnfs_layoutdriver_type *, |
@@ -293,15 +326,6 @@ static inline int pnfs_return_layout(struct inode *ino) | |||
293 | return 0; | 326 | return 0; |
294 | } | 327 | } |
295 | 328 | ||
296 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, | ||
297 | struct inode *inode) | ||
298 | { | ||
299 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
300 | |||
301 | if (ld) | ||
302 | pgio->pg_test = ld->pg_test; | ||
303 | } | ||
304 | |||
305 | #else /* CONFIG_NFS_V4_1 */ | 329 | #else /* CONFIG_NFS_V4_1 */ |
306 | 330 | ||
307 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 331 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
@@ -322,28 +346,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) | |||
322 | { | 346 | { |
323 | } | 347 | } |
324 | 348 | ||
325 | static inline struct pnfs_layout_segment * | ||
326 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | ||
327 | loff_t pos, u64 count, enum pnfs_iomode access_type, | ||
328 | gfp_t gfp_flags) | ||
329 | { | ||
330 | return NULL; | ||
331 | } | ||
332 | |||
333 | static inline enum pnfs_try_status | ||
334 | pnfs_try_to_read_data(struct nfs_read_data *data, | ||
335 | const struct rpc_call_ops *call_ops) | ||
336 | { | ||
337 | return PNFS_NOT_ATTEMPTED; | ||
338 | } | ||
339 | |||
340 | static inline enum pnfs_try_status | ||
341 | pnfs_try_to_write_data(struct nfs_write_data *data, | ||
342 | const struct rpc_call_ops *call_ops, int how) | ||
343 | { | ||
344 | return PNFS_NOT_ATTEMPTED; | ||
345 | } | ||
346 | |||
347 | static inline int pnfs_return_layout(struct inode *ino) | 349 | static inline int pnfs_return_layout(struct inode *ino) |
348 | { | 350 | { |
349 | return 0; | 351 | return 0; |
@@ -377,7 +379,8 @@ pnfs_roc_drain(struct inode *ino, u32 *barrier) | |||
377 | return false; | 379 | return false; |
378 | } | 380 | } |
379 | 381 | ||
380 | static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) | 382 | static inline void set_pnfs_layoutdriver(struct nfs_server *s, |
383 | const struct nfs_fh *mntfh, u32 id) | ||
381 | { | 384 | { |
382 | } | 385 | } |
383 | 386 | ||
@@ -385,9 +388,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | |||
385 | { | 388 | { |
386 | } | 389 | } |
387 | 390 | ||
388 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, | 391 | static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) |
389 | struct inode *inode) | 392 | { |
393 | return false; | ||
394 | } | ||
395 | |||
396 | static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) | ||
390 | { | 397 | { |
398 | return false; | ||
391 | } | 399 | } |
392 | 400 | ||
393 | static inline void | 401 | static inline void |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index f0f8e1e22f6c..6fda5228ef56 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
100 | 100 | ||
101 | rcu_read_lock(); | 101 | rcu_read_lock(); |
102 | d = _lookup_deviceid(ld, clp, id, hash); | 102 | d = _lookup_deviceid(ld, clp, id, hash); |
103 | if (d && !atomic_inc_not_zero(&d->ref)) | 103 | if (d != NULL) |
104 | d = NULL; | 104 | atomic_inc(&d->ref); |
105 | rcu_read_unlock(); | 105 | rcu_read_unlock(); |
106 | return d; | 106 | return d; |
107 | } | 107 | } |
@@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
115 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); | 115 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); |
116 | 116 | ||
117 | /* | 117 | /* |
118 | * Unhash and put deviceid | 118 | * Remove a deviceid from cache |
119 | * | 119 | * |
120 | * @clp nfs_client associated with deviceid | 120 | * @clp nfs_client associated with deviceid |
121 | * @id the deviceid to unhash | 121 | * @id the deviceid to unhash |
122 | * | 122 | * |
123 | * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. | 123 | * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. |
124 | */ | 124 | */ |
125 | struct nfs4_deviceid_node * | 125 | void |
126 | nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | 126 | nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, |
127 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | 127 | const struct nfs_client *clp, const struct nfs4_deviceid *id) |
128 | { | 128 | { |
129 | struct nfs4_deviceid_node *d; | 129 | struct nfs4_deviceid_node *d; |
@@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
134 | rcu_read_unlock(); | 134 | rcu_read_unlock(); |
135 | if (!d) { | 135 | if (!d) { |
136 | spin_unlock(&nfs4_deviceid_lock); | 136 | spin_unlock(&nfs4_deviceid_lock); |
137 | return NULL; | 137 | return; |
138 | } | 138 | } |
139 | hlist_del_init_rcu(&d->node); | 139 | hlist_del_init_rcu(&d->node); |
140 | spin_unlock(&nfs4_deviceid_lock); | 140 | spin_unlock(&nfs4_deviceid_lock); |
@@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
142 | 142 | ||
143 | /* balance the initial ref set in pnfs_insert_deviceid */ | 143 | /* balance the initial ref set in pnfs_insert_deviceid */ |
144 | if (atomic_dec_and_test(&d->ref)) | 144 | if (atomic_dec_and_test(&d->ref)) |
145 | return d; | 145 | d->ld->free_deviceid_node(d); |
146 | |||
147 | return NULL; | ||
148 | } | ||
149 | EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); | ||
150 | |||
151 | /* | ||
152 | * Delete a deviceid from cache | ||
153 | * | ||
154 | * @clp struct nfs_client qualifying the deviceid | ||
155 | * @id deviceid to delete | ||
156 | */ | ||
157 | void | ||
158 | nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
159 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
160 | { | ||
161 | struct nfs4_deviceid_node *d; | ||
162 | |||
163 | d = nfs4_unhash_put_deviceid(ld, clp, id); | ||
164 | if (!d) | ||
165 | return; | ||
166 | d->ld->free_deviceid_node(d); | ||
167 | } | 146 | } |
168 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | 147 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); |
169 | 148 | ||
@@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | |||
177 | INIT_HLIST_NODE(&d->tmpnode); | 156 | INIT_HLIST_NODE(&d->tmpnode); |
178 | d->ld = ld; | 157 | d->ld = ld; |
179 | d->nfs_client = nfs_client; | 158 | d->nfs_client = nfs_client; |
159 | d->flags = 0; | ||
180 | d->deviceid = *id; | 160 | d->deviceid = *id; |
181 | atomic_set(&d->ref, 1); | 161 | atomic_set(&d->ref, 1); |
182 | } | 162 | } |
@@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); | |||
221 | * | 201 | * |
222 | * @d deviceid node to put | 202 | * @d deviceid node to put |
223 | * | 203 | * |
224 | * @ret true iff the node was deleted | 204 | * return true iff the node was deleted |
205 | * Note that since the test for d->ref == 0 is sufficient to establish | ||
206 | * that the node is no longer hashed in the global device id cache. | ||
225 | */ | 207 | */ |
226 | bool | 208 | bool |
227 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) | 209 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) |
228 | { | 210 | { |
229 | if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) | 211 | if (!atomic_dec_and_test(&d->ref)) |
230 | return false; | 212 | return false; |
231 | hlist_del_init_rcu(&d->node); | ||
232 | spin_unlock(&nfs4_deviceid_lock); | ||
233 | synchronize_rcu(); | ||
234 | d->ld->free_deviceid_node(d); | 213 | d->ld->free_deviceid_node(d); |
235 | return true; | 214 | return true; |
236 | } | 215 | } |
@@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) | |||
275 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) | 254 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) |
276 | _deviceid_purge_client(clp, h); | 255 | _deviceid_purge_client(clp, h); |
277 | } | 256 | } |
257 | |||
258 | /* | ||
259 | * Stop use of all deviceids associated with an nfs_client | ||
260 | */ | ||
261 | void | ||
262 | nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) | ||
263 | { | ||
264 | struct nfs4_deviceid_node *d; | ||
265 | struct hlist_node *n; | ||
266 | int i; | ||
267 | |||
268 | rcu_read_lock(); | ||
269 | for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ | ||
270 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) | ||
271 | if (d->nfs_client == clp) | ||
272 | set_bit(NFS_DEVICEID_INVALID, &d->flags); | ||
273 | } | ||
274 | rcu_read_unlock(); | ||
275 | } | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a68679f538fc..2171c043ab08 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -30,8 +30,7 @@ | |||
30 | 30 | ||
31 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 31 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
32 | 32 | ||
33 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); | 33 | static const struct nfs_pageio_ops nfs_pageio_read_ops; |
34 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); | ||
35 | static const struct rpc_call_ops nfs_read_partial_ops; | 34 | static const struct rpc_call_ops nfs_read_partial_ops; |
36 | static const struct rpc_call_ops nfs_read_full_ops; | 35 | static const struct rpc_call_ops nfs_read_full_ops; |
37 | 36 | ||
@@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p) | |||
68 | mempool_free(p, nfs_rdata_mempool); | 67 | mempool_free(p, nfs_rdata_mempool); |
69 | } | 68 | } |
70 | 69 | ||
71 | static void nfs_readdata_release(struct nfs_read_data *rdata) | 70 | void nfs_readdata_release(struct nfs_read_data *rdata) |
72 | { | 71 | { |
73 | put_lseg(rdata->lseg); | 72 | put_lseg(rdata->lseg); |
74 | put_nfs_open_context(rdata->args.context); | 73 | put_nfs_open_context(rdata->args.context); |
@@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) | |||
113 | } | 112 | } |
114 | } | 113 | } |
115 | 114 | ||
115 | static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, | ||
116 | struct inode *inode) | ||
117 | { | ||
118 | nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, | ||
119 | NFS_SERVER(inode)->rsize, 0); | ||
120 | } | ||
121 | |||
122 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | ||
123 | { | ||
124 | pgio->pg_ops = &nfs_pageio_read_ops; | ||
125 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | ||
126 | } | ||
127 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | ||
128 | |||
129 | static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, | ||
130 | struct inode *inode) | ||
131 | { | ||
132 | if (!pnfs_pageio_init_read(pgio, inode)) | ||
133 | nfs_pageio_init_read_mds(pgio, inode); | ||
134 | } | ||
135 | |||
116 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | 136 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, |
117 | struct page *page) | 137 | struct page *page) |
118 | { | 138 | { |
@@ -131,14 +151,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
131 | if (len < PAGE_CACHE_SIZE) | 151 | if (len < PAGE_CACHE_SIZE) |
132 | zero_user_segment(page, len, PAGE_CACHE_SIZE); | 152 | zero_user_segment(page, len, PAGE_CACHE_SIZE); |
133 | 153 | ||
134 | nfs_pageio_init(&pgio, inode, NULL, 0, 0); | 154 | nfs_pageio_init_read(&pgio, inode); |
135 | nfs_list_add_request(new, &pgio.pg_list); | 155 | nfs_pageio_add_request(&pgio, new); |
136 | pgio.pg_count = len; | 156 | nfs_pageio_complete(&pgio); |
137 | |||
138 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) | ||
139 | nfs_pagein_multi(&pgio); | ||
140 | else | ||
141 | nfs_pagein_one(&pgio); | ||
142 | return 0; | 157 | return 0; |
143 | } | 158 | } |
144 | 159 | ||
@@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); | |||
202 | /* | 217 | /* |
203 | * Set up the NFS read request struct | 218 | * Set up the NFS read request struct |
204 | */ | 219 | */ |
205 | static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | 220 | static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, |
206 | const struct rpc_call_ops *call_ops, | 221 | unsigned int count, unsigned int offset) |
207 | unsigned int count, unsigned int offset, | ||
208 | struct pnfs_layout_segment *lseg) | ||
209 | { | 222 | { |
210 | struct inode *inode = req->wb_context->dentry->d_inode; | 223 | struct inode *inode = req->wb_context->dentry->d_inode; |
211 | 224 | ||
212 | data->req = req; | 225 | data->req = req; |
213 | data->inode = inode; | 226 | data->inode = inode; |
214 | data->cred = req->wb_context->cred; | 227 | data->cred = req->wb_context->cred; |
215 | data->lseg = get_lseg(lseg); | ||
216 | 228 | ||
217 | data->args.fh = NFS_FH(inode); | 229 | data->args.fh = NFS_FH(inode); |
218 | data->args.offset = req_offset(req) + offset; | 230 | data->args.offset = req_offset(req) + offset; |
@@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | |||
226 | data->res.count = count; | 238 | data->res.count = count; |
227 | data->res.eof = 0; | 239 | data->res.eof = 0; |
228 | nfs_fattr_init(&data->fattr); | 240 | nfs_fattr_init(&data->fattr); |
241 | } | ||
229 | 242 | ||
230 | if (data->lseg && | 243 | static int nfs_do_read(struct nfs_read_data *data, |
231 | (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) | 244 | const struct rpc_call_ops *call_ops) |
232 | return 0; | 245 | { |
246 | struct inode *inode = data->args.context->dentry->d_inode; | ||
233 | 247 | ||
234 | return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); | 248 | return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); |
235 | } | 249 | } |
236 | 250 | ||
251 | static int | ||
252 | nfs_do_multiple_reads(struct list_head *head, | ||
253 | const struct rpc_call_ops *call_ops) | ||
254 | { | ||
255 | struct nfs_read_data *data; | ||
256 | int ret = 0; | ||
257 | |||
258 | while (!list_empty(head)) { | ||
259 | int ret2; | ||
260 | |||
261 | data = list_entry(head->next, struct nfs_read_data, list); | ||
262 | list_del_init(&data->list); | ||
263 | |||
264 | ret2 = nfs_do_read(data, call_ops); | ||
265 | if (ret == 0) | ||
266 | ret = ret2; | ||
267 | } | ||
268 | return ret; | ||
269 | } | ||
270 | |||
237 | static void | 271 | static void |
238 | nfs_async_read_error(struct list_head *head) | 272 | nfs_async_read_error(struct list_head *head) |
239 | { | 273 | { |
@@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head) | |||
260 | * won't see the new data until our attribute cache is updated. This is more | 294 | * won't see the new data until our attribute cache is updated. This is more |
261 | * or less conventional NFS client behavior. | 295 | * or less conventional NFS client behavior. |
262 | */ | 296 | */ |
263 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) | 297 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) |
264 | { | 298 | { |
265 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); | 299 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); |
266 | struct page *page = req->wb_page; | 300 | struct page *page = req->wb_page; |
267 | struct nfs_read_data *data; | 301 | struct nfs_read_data *data; |
268 | size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; | 302 | size_t rsize = desc->pg_bsize, nbytes; |
269 | unsigned int offset; | 303 | unsigned int offset; |
270 | int requests = 0; | 304 | int requests = 0; |
271 | int ret = 0; | 305 | int ret = 0; |
272 | struct pnfs_layout_segment *lseg; | ||
273 | LIST_HEAD(list); | ||
274 | 306 | ||
275 | nfs_list_remove_request(req); | 307 | nfs_list_remove_request(req); |
276 | 308 | ||
309 | offset = 0; | ||
277 | nbytes = desc->pg_count; | 310 | nbytes = desc->pg_count; |
278 | do { | 311 | do { |
279 | size_t len = min(nbytes,rsize); | 312 | size_t len = min(nbytes,rsize); |
@@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) | |||
281 | data = nfs_readdata_alloc(1); | 314 | data = nfs_readdata_alloc(1); |
282 | if (!data) | 315 | if (!data) |
283 | goto out_bad; | 316 | goto out_bad; |
284 | list_add(&data->pages, &list); | 317 | data->pagevec[0] = page; |
318 | nfs_read_rpcsetup(req, data, len, offset); | ||
319 | list_add(&data->list, res); | ||
285 | requests++; | 320 | requests++; |
286 | nbytes -= len; | 321 | nbytes -= len; |
322 | offset += len; | ||
287 | } while(nbytes != 0); | 323 | } while(nbytes != 0); |
288 | atomic_set(&req->wb_complete, requests); | 324 | atomic_set(&req->wb_complete, requests); |
289 | |||
290 | BUG_ON(desc->pg_lseg != NULL); | ||
291 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
292 | req_offset(req), desc->pg_count, | ||
293 | IOMODE_READ, GFP_KERNEL); | ||
294 | ClearPageError(page); | 325 | ClearPageError(page); |
295 | offset = 0; | 326 | desc->pg_rpc_callops = &nfs_read_partial_ops; |
296 | nbytes = desc->pg_count; | ||
297 | do { | ||
298 | int ret2; | ||
299 | |||
300 | data = list_entry(list.next, struct nfs_read_data, pages); | ||
301 | list_del_init(&data->pages); | ||
302 | |||
303 | data->pagevec[0] = page; | ||
304 | |||
305 | if (nbytes < rsize) | ||
306 | rsize = nbytes; | ||
307 | ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, | ||
308 | rsize, offset, lseg); | ||
309 | if (ret == 0) | ||
310 | ret = ret2; | ||
311 | offset += rsize; | ||
312 | nbytes -= rsize; | ||
313 | } while (nbytes != 0); | ||
314 | put_lseg(lseg); | ||
315 | desc->pg_lseg = NULL; | ||
316 | |||
317 | return ret; | 327 | return ret; |
318 | |||
319 | out_bad: | 328 | out_bad: |
320 | while (!list_empty(&list)) { | 329 | while (!list_empty(res)) { |
321 | data = list_entry(list.next, struct nfs_read_data, pages); | 330 | data = list_entry(res->next, struct nfs_read_data, list); |
322 | list_del(&data->pages); | 331 | list_del(&data->list); |
323 | nfs_readdata_free(data); | 332 | nfs_readdata_free(data); |
324 | } | 333 | } |
325 | SetPageError(page); | 334 | SetPageError(page); |
@@ -327,19 +336,19 @@ out_bad: | |||
327 | return -ENOMEM; | 336 | return -ENOMEM; |
328 | } | 337 | } |
329 | 338 | ||
330 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) | 339 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) |
331 | { | 340 | { |
332 | struct nfs_page *req; | 341 | struct nfs_page *req; |
333 | struct page **pages; | 342 | struct page **pages; |
334 | struct nfs_read_data *data; | 343 | struct nfs_read_data *data; |
335 | struct list_head *head = &desc->pg_list; | 344 | struct list_head *head = &desc->pg_list; |
336 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 345 | int ret = 0; |
337 | int ret = -ENOMEM; | ||
338 | 346 | ||
339 | data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, | 347 | data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, |
340 | desc->pg_count)); | 348 | desc->pg_count)); |
341 | if (!data) { | 349 | if (!data) { |
342 | nfs_async_read_error(head); | 350 | nfs_async_read_error(head); |
351 | ret = -ENOMEM; | ||
343 | goto out; | 352 | goto out; |
344 | } | 353 | } |
345 | 354 | ||
@@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) | |||
352 | *pages++ = req->wb_page; | 361 | *pages++ = req->wb_page; |
353 | } | 362 | } |
354 | req = nfs_list_entry(data->pages.next); | 363 | req = nfs_list_entry(data->pages.next); |
355 | if ((!lseg) && list_is_singular(&data->pages)) | ||
356 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
357 | req_offset(req), desc->pg_count, | ||
358 | IOMODE_READ, GFP_KERNEL); | ||
359 | 364 | ||
360 | ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, | 365 | nfs_read_rpcsetup(req, data, desc->pg_count, 0); |
361 | 0, lseg); | 366 | list_add(&data->list, res); |
367 | desc->pg_rpc_callops = &nfs_read_full_ops; | ||
362 | out: | 368 | out: |
363 | put_lseg(lseg); | ||
364 | desc->pg_lseg = NULL; | ||
365 | return ret; | 369 | return ret; |
366 | } | 370 | } |
367 | 371 | ||
372 | int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) | ||
373 | { | ||
374 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
375 | return nfs_pagein_multi(desc, head); | ||
376 | return nfs_pagein_one(desc, head); | ||
377 | } | ||
378 | |||
379 | static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | ||
380 | { | ||
381 | LIST_HEAD(head); | ||
382 | int ret; | ||
383 | |||
384 | ret = nfs_generic_pagein(desc, &head); | ||
385 | if (ret == 0) | ||
386 | ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); | ||
387 | return ret; | ||
388 | } | ||
389 | |||
390 | static const struct nfs_pageio_ops nfs_pageio_read_ops = { | ||
391 | .pg_test = nfs_generic_pg_test, | ||
392 | .pg_doio = nfs_generic_pg_readpages, | ||
393 | }; | ||
394 | |||
368 | /* | 395 | /* |
369 | * This is the callback from RPC telling us whether a reply was | 396 | * This is the callback from RPC telling us whether a reply was |
370 | * received or some error occurred (timeout or socket shutdown). | 397 | * received or some error occurred (timeout or socket shutdown). |
@@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
635 | .pgio = &pgio, | 662 | .pgio = &pgio, |
636 | }; | 663 | }; |
637 | struct inode *inode = mapping->host; | 664 | struct inode *inode = mapping->host; |
638 | struct nfs_server *server = NFS_SERVER(inode); | ||
639 | size_t rsize = server->rsize; | ||
640 | unsigned long npages; | 665 | unsigned long npages; |
641 | int ret = -ESTALE; | 666 | int ret = -ESTALE; |
642 | 667 | ||
@@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
664 | if (ret == 0) | 689 | if (ret == 0) |
665 | goto read_complete; /* all pages were read */ | 690 | goto read_complete; /* all pages were read */ |
666 | 691 | ||
667 | if (rsize < PAGE_CACHE_SIZE) | 692 | nfs_pageio_init_read(&pgio, inode); |
668 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | ||
669 | else | ||
670 | nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); | ||
671 | 693 | ||
672 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); | 694 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); |
673 | 695 | ||
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 8d6864c2a5fa..b2fbbde58e44 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
147 | 147 | ||
148 | alias = d_lookup(parent, &data->args.name); | 148 | alias = d_lookup(parent, &data->args.name); |
149 | if (alias != NULL) { | 149 | if (alias != NULL) { |
150 | int ret = 0; | 150 | int ret; |
151 | void *devname_garbage = NULL; | 151 | void *devname_garbage = NULL; |
152 | 152 | ||
153 | /* | 153 | /* |
@@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
155 | * the sillyrename information to the aliased dentry. | 155 | * the sillyrename information to the aliased dentry. |
156 | */ | 156 | */ |
157 | nfs_free_dname(data); | 157 | nfs_free_dname(data); |
158 | ret = nfs_copy_dname(alias, data); | ||
158 | spin_lock(&alias->d_lock); | 159 | spin_lock(&alias->d_lock); |
159 | if (alias->d_inode != NULL && | 160 | if (ret == 0 && alias->d_inode != NULL && |
160 | !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { | 161 | !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { |
161 | devname_garbage = alias->d_fsdata; | 162 | devname_garbage = alias->d_fsdata; |
162 | alias->d_fsdata = data; | 163 | alias->d_fsdata = data; |
163 | alias->d_flags |= DCACHE_NFSFS_RENAMED; | 164 | alias->d_flags |= DCACHE_NFSFS_RENAMED; |
164 | ret = 1; | 165 | ret = 1; |
165 | } | 166 | } else |
167 | ret = 0; | ||
166 | spin_unlock(&alias->d_lock); | 168 | spin_unlock(&alias->d_lock); |
167 | nfs_dec_sillycount(dir); | 169 | nfs_dec_sillycount(dir); |
168 | dput(alias); | 170 | dput(alias); |
@@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
171 | * point dentry is definitely not a root, so we won't need | 173 | * point dentry is definitely not a root, so we won't need |
172 | * that anymore. | 174 | * that anymore. |
173 | */ | 175 | */ |
174 | if (devname_garbage) | 176 | kfree(devname_garbage); |
175 | kfree(devname_garbage); | ||
176 | return ret; | 177 | return ret; |
177 | } | 178 | } |
178 | data->dir = igrab(dir); | 179 | data->dir = igrab(dir); |
@@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |||
204 | if (parent == NULL) | 205 | if (parent == NULL) |
205 | goto out_free; | 206 | goto out_free; |
206 | dir = parent->d_inode; | 207 | dir = parent->d_inode; |
207 | if (nfs_copy_dname(dentry, data) != 0) | ||
208 | goto out_dput; | ||
209 | /* Non-exclusive lock protects against concurrent lookup() calls */ | 208 | /* Non-exclusive lock protects against concurrent lookup() calls */ |
210 | spin_lock(&dir->i_lock); | 209 | spin_lock(&dir->i_lock); |
211 | if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { | 210 | if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { |
@@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | |||
366 | struct nfs_renamedata *data = calldata; | 365 | struct nfs_renamedata *data = calldata; |
367 | struct inode *old_dir = data->old_dir; | 366 | struct inode *old_dir = data->old_dir; |
368 | struct inode *new_dir = data->new_dir; | 367 | struct inode *new_dir = data->new_dir; |
368 | struct dentry *old_dentry = data->old_dentry; | ||
369 | struct dentry *new_dentry = data->new_dentry; | ||
369 | 370 | ||
370 | if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { | 371 | if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { |
371 | nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); | 372 | nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); |
@@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | |||
373 | } | 374 | } |
374 | 375 | ||
375 | if (task->tk_status != 0) { | 376 | if (task->tk_status != 0) { |
376 | nfs_cancel_async_unlink(data->old_dentry); | 377 | nfs_cancel_async_unlink(old_dentry); |
377 | return; | 378 | return; |
378 | } | 379 | } |
379 | 380 | ||
380 | nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); | 381 | d_drop(old_dentry); |
381 | d_move(data->old_dentry, data->new_dentry); | 382 | d_drop(new_dentry); |
382 | } | 383 | } |
383 | 384 | ||
384 | /** | 385 | /** |
@@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | |||
501 | * and only performs the unlink once the last reference to it is put. | 502 | * and only performs the unlink once the last reference to it is put. |
502 | * | 503 | * |
503 | * The final cleanup is done during dentry_iput. | 504 | * The final cleanup is done during dentry_iput. |
505 | * | ||
506 | * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server | ||
507 | * could take responsibility for keeping open files referenced. The server | ||
508 | * would also need to ensure that opened-but-deleted files were kept over | ||
509 | * reboots. However, we may not assume a server does so. (RFC 5661 | ||
510 | * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can | ||
511 | * use to advertise that it does this; some day we may take advantage of | ||
512 | * it.)) | ||
504 | */ | 513 | */ |
505 | int | 514 | int |
506 | nfs_sillyrename(struct inode *dir, struct dentry *dentry) | 515 | nfs_sillyrename(struct inode *dir, struct dentry *dentry) |
@@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
560 | if (error) | 569 | if (error) |
561 | goto out_dput; | 570 | goto out_dput; |
562 | 571 | ||
572 | /* populate unlinkdata with the right dname */ | ||
573 | error = nfs_copy_dname(sdentry, | ||
574 | (struct nfs_unlinkdata *)dentry->d_fsdata); | ||
575 | if (error) { | ||
576 | nfs_cancel_async_unlink(dentry); | ||
577 | goto out_dput; | ||
578 | } | ||
579 | |||
563 | /* run the rename task, undo unlink if it fails */ | 580 | /* run the rename task, undo unlink if it fails */ |
564 | task = nfs_async_rename(dir, dir, dentry, sdentry); | 581 | task = nfs_async_rename(dir, dir, dentry, sdentry); |
565 | if (IS_ERR(task)) { | 582 | if (IS_ERR(task)) { |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 08579312c57b..b39b37f80913 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p) | |||
97 | mempool_free(p, nfs_wdata_mempool); | 97 | mempool_free(p, nfs_wdata_mempool); |
98 | } | 98 | } |
99 | 99 | ||
100 | static void nfs_writedata_release(struct nfs_write_data *wdata) | 100 | void nfs_writedata_release(struct nfs_write_data *wdata) |
101 | { | 101 | { |
102 | put_lseg(wdata->lseg); | 102 | put_lseg(wdata->lseg); |
103 | put_nfs_open_context(wdata->args.context); | 103 | put_nfs_open_context(wdata->args.context); |
@@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); | |||
845 | /* | 845 | /* |
846 | * Set up the argument/result storage required for the RPC call. | 846 | * Set up the argument/result storage required for the RPC call. |
847 | */ | 847 | */ |
848 | static int nfs_write_rpcsetup(struct nfs_page *req, | 848 | static void nfs_write_rpcsetup(struct nfs_page *req, |
849 | struct nfs_write_data *data, | 849 | struct nfs_write_data *data, |
850 | const struct rpc_call_ops *call_ops, | ||
851 | unsigned int count, unsigned int offset, | 850 | unsigned int count, unsigned int offset, |
852 | struct pnfs_layout_segment *lseg, | ||
853 | int how) | 851 | int how) |
854 | { | 852 | { |
855 | struct inode *inode = req->wb_context->dentry->d_inode; | 853 | struct inode *inode = req->wb_context->dentry->d_inode; |
@@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
860 | data->req = req; | 858 | data->req = req; |
861 | data->inode = inode = req->wb_context->dentry->d_inode; | 859 | data->inode = inode = req->wb_context->dentry->d_inode; |
862 | data->cred = req->wb_context->cred; | 860 | data->cred = req->wb_context->cred; |
863 | data->lseg = get_lseg(lseg); | ||
864 | 861 | ||
865 | data->args.fh = NFS_FH(inode); | 862 | data->args.fh = NFS_FH(inode); |
866 | data->args.offset = req_offset(req) + offset; | 863 | data->args.offset = req_offset(req) + offset; |
@@ -872,24 +869,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
872 | data->args.context = get_nfs_open_context(req->wb_context); | 869 | data->args.context = get_nfs_open_context(req->wb_context); |
873 | data->args.lock_context = req->wb_lock_context; | 870 | data->args.lock_context = req->wb_lock_context; |
874 | data->args.stable = NFS_UNSTABLE; | 871 | data->args.stable = NFS_UNSTABLE; |
875 | if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | 872 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { |
876 | data->args.stable = NFS_DATA_SYNC; | 873 | case 0: |
877 | if (!nfs_need_commit(NFS_I(inode))) | 874 | break; |
878 | data->args.stable = NFS_FILE_SYNC; | 875 | case FLUSH_COND_STABLE: |
876 | if (nfs_need_commit(NFS_I(inode))) | ||
877 | break; | ||
878 | default: | ||
879 | data->args.stable = NFS_FILE_SYNC; | ||
879 | } | 880 | } |
880 | 881 | ||
881 | data->res.fattr = &data->fattr; | 882 | data->res.fattr = &data->fattr; |
882 | data->res.count = count; | 883 | data->res.count = count; |
883 | data->res.verf = &data->verf; | 884 | data->res.verf = &data->verf; |
884 | nfs_fattr_init(&data->fattr); | 885 | nfs_fattr_init(&data->fattr); |
886 | } | ||
885 | 887 | ||
886 | if (data->lseg && | 888 | static int nfs_do_write(struct nfs_write_data *data, |
887 | (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) | 889 | const struct rpc_call_ops *call_ops, |
888 | return 0; | 890 | int how) |
891 | { | ||
892 | struct inode *inode = data->args.context->dentry->d_inode; | ||
889 | 893 | ||
890 | return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); | 894 | return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); |
891 | } | 895 | } |
892 | 896 | ||
897 | static int nfs_do_multiple_writes(struct list_head *head, | ||
898 | const struct rpc_call_ops *call_ops, | ||
899 | int how) | ||
900 | { | ||
901 | struct nfs_write_data *data; | ||
902 | int ret = 0; | ||
903 | |||
904 | while (!list_empty(head)) { | ||
905 | int ret2; | ||
906 | |||
907 | data = list_entry(head->next, struct nfs_write_data, list); | ||
908 | list_del_init(&data->list); | ||
909 | |||
910 | ret2 = nfs_do_write(data, call_ops, how); | ||
911 | if (ret == 0) | ||
912 | ret = ret2; | ||
913 | } | ||
914 | return ret; | ||
915 | } | ||
916 | |||
893 | /* If a nfs_flush_* function fails, it should remove reqs from @head and | 917 | /* If a nfs_flush_* function fails, it should remove reqs from @head and |
894 | * call this on each, which will prepare them to be retried on next | 918 | * call this on each, which will prepare them to be retried on next |
895 | * writeback using standard nfs. | 919 | * writeback using standard nfs. |
@@ -907,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req) | |||
907 | * Generate multiple small requests to write out a single | 931 | * Generate multiple small requests to write out a single |
908 | * contiguous dirty area on one page. | 932 | * contiguous dirty area on one page. |
909 | */ | 933 | */ |
910 | static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | 934 | static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) |
911 | { | 935 | { |
912 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); | 936 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); |
913 | struct page *page = req->wb_page; | 937 | struct page *page = req->wb_page; |
914 | struct nfs_write_data *data; | 938 | struct nfs_write_data *data; |
915 | size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; | 939 | size_t wsize = desc->pg_bsize, nbytes; |
916 | unsigned int offset; | 940 | unsigned int offset; |
917 | int requests = 0; | 941 | int requests = 0; |
918 | int ret = 0; | 942 | int ret = 0; |
919 | struct pnfs_layout_segment *lseg; | ||
920 | LIST_HEAD(list); | ||
921 | 943 | ||
922 | nfs_list_remove_request(req); | 944 | nfs_list_remove_request(req); |
923 | 945 | ||
@@ -927,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
927 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 949 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; |
928 | 950 | ||
929 | 951 | ||
952 | offset = 0; | ||
930 | nbytes = desc->pg_count; | 953 | nbytes = desc->pg_count; |
931 | do { | 954 | do { |
932 | size_t len = min(nbytes, wsize); | 955 | size_t len = min(nbytes, wsize); |
@@ -934,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
934 | data = nfs_writedata_alloc(1); | 957 | data = nfs_writedata_alloc(1); |
935 | if (!data) | 958 | if (!data) |
936 | goto out_bad; | 959 | goto out_bad; |
937 | list_add(&data->pages, &list); | 960 | data->pagevec[0] = page; |
961 | nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); | ||
962 | list_add(&data->list, res); | ||
938 | requests++; | 963 | requests++; |
939 | nbytes -= len; | 964 | nbytes -= len; |
965 | offset += len; | ||
940 | } while (nbytes != 0); | 966 | } while (nbytes != 0); |
941 | atomic_set(&req->wb_complete, requests); | 967 | atomic_set(&req->wb_complete, requests); |
942 | 968 | desc->pg_rpc_callops = &nfs_write_partial_ops; | |
943 | BUG_ON(desc->pg_lseg); | ||
944 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
945 | req_offset(req), desc->pg_count, | ||
946 | IOMODE_RW, GFP_NOFS); | ||
947 | ClearPageError(page); | ||
948 | offset = 0; | ||
949 | nbytes = desc->pg_count; | ||
950 | do { | ||
951 | int ret2; | ||
952 | |||
953 | data = list_entry(list.next, struct nfs_write_data, pages); | ||
954 | list_del_init(&data->pages); | ||
955 | |||
956 | data->pagevec[0] = page; | ||
957 | |||
958 | if (nbytes < wsize) | ||
959 | wsize = nbytes; | ||
960 | ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, | ||
961 | wsize, offset, lseg, desc->pg_ioflags); | ||
962 | if (ret == 0) | ||
963 | ret = ret2; | ||
964 | offset += wsize; | ||
965 | nbytes -= wsize; | ||
966 | } while (nbytes != 0); | ||
967 | |||
968 | put_lseg(lseg); | ||
969 | desc->pg_lseg = NULL; | ||
970 | return ret; | 969 | return ret; |
971 | 970 | ||
972 | out_bad: | 971 | out_bad: |
973 | while (!list_empty(&list)) { | 972 | while (!list_empty(res)) { |
974 | data = list_entry(list.next, struct nfs_write_data, pages); | 973 | data = list_entry(res->next, struct nfs_write_data, list); |
975 | list_del(&data->pages); | 974 | list_del(&data->list); |
976 | nfs_writedata_free(data); | 975 | nfs_writedata_free(data); |
977 | } | 976 | } |
978 | nfs_redirty_request(req); | 977 | nfs_redirty_request(req); |
@@ -987,14 +986,13 @@ out_bad: | |||
987 | * This is the case if nfs_updatepage detects a conflicting request | 986 | * This is the case if nfs_updatepage detects a conflicting request |
988 | * that has been written but not committed. | 987 | * that has been written but not committed. |
989 | */ | 988 | */ |
990 | static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | 989 | static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) |
991 | { | 990 | { |
992 | struct nfs_page *req; | 991 | struct nfs_page *req; |
993 | struct page **pages; | 992 | struct page **pages; |
994 | struct nfs_write_data *data; | 993 | struct nfs_write_data *data; |
995 | struct list_head *head = &desc->pg_list; | 994 | struct list_head *head = &desc->pg_list; |
996 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 995 | int ret = 0; |
997 | int ret; | ||
998 | 996 | ||
999 | data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, | 997 | data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, |
1000 | desc->pg_count)); | 998 | desc->pg_count)); |
@@ -1016,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | |||
1016 | *pages++ = req->wb_page; | 1014 | *pages++ = req->wb_page; |
1017 | } | 1015 | } |
1018 | req = nfs_list_entry(data->pages.next); | 1016 | req = nfs_list_entry(data->pages.next); |
1019 | if ((!lseg) && list_is_singular(&data->pages)) | ||
1020 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
1021 | req_offset(req), desc->pg_count, | ||
1022 | IOMODE_RW, GFP_NOFS); | ||
1023 | 1017 | ||
1024 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 1018 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && |
1025 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) | 1019 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) |
1026 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 1020 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; |
1027 | 1021 | ||
1028 | /* Set up the argument struct */ | 1022 | /* Set up the argument struct */ |
1029 | ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); | 1023 | nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); |
1024 | list_add(&data->list, res); | ||
1025 | desc->pg_rpc_callops = &nfs_write_full_ops; | ||
1030 | out: | 1026 | out: |
1031 | put_lseg(lseg); /* Cleans any gotten in ->pg_test */ | ||
1032 | desc->pg_lseg = NULL; | ||
1033 | return ret; | 1027 | return ret; |
1034 | } | 1028 | } |
1035 | 1029 | ||
1036 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 1030 | int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) |
1031 | { | ||
1032 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
1033 | return nfs_flush_multi(desc, head); | ||
1034 | return nfs_flush_one(desc, head); | ||
1035 | } | ||
1036 | |||
1037 | static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | ||
1038 | { | ||
1039 | LIST_HEAD(head); | ||
1040 | int ret; | ||
1041 | |||
1042 | ret = nfs_generic_flush(desc, &head); | ||
1043 | if (ret == 0) | ||
1044 | ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, | ||
1045 | desc->pg_ioflags); | ||
1046 | return ret; | ||
1047 | } | ||
1048 | |||
1049 | static const struct nfs_pageio_ops nfs_pageio_write_ops = { | ||
1050 | .pg_test = nfs_generic_pg_test, | ||
1051 | .pg_doio = nfs_generic_pg_writepages, | ||
1052 | }; | ||
1053 | |||
1054 | static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, | ||
1037 | struct inode *inode, int ioflags) | 1055 | struct inode *inode, int ioflags) |
1038 | { | 1056 | { |
1039 | size_t wsize = NFS_SERVER(inode)->wsize; | 1057 | nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, |
1058 | NFS_SERVER(inode)->wsize, ioflags); | ||
1059 | } | ||
1060 | |||
1061 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) | ||
1062 | { | ||
1063 | pgio->pg_ops = &nfs_pageio_write_ops; | ||
1064 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | ||
1065 | } | ||
1066 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | ||
1040 | 1067 | ||
1041 | if (wsize < PAGE_CACHE_SIZE) | 1068 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, |
1042 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); | 1069 | struct inode *inode, int ioflags) |
1043 | else | 1070 | { |
1044 | nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); | 1071 | if (!pnfs_pageio_init_write(pgio, inode, ioflags)) |
1072 | nfs_pageio_init_write_mds(pgio, inode, ioflags); | ||
1045 | } | 1073 | } |
1046 | 1074 | ||
1047 | /* | 1075 | /* |
@@ -1566,8 +1594,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1566 | int status; | 1594 | int status; |
1567 | bool sync = true; | 1595 | bool sync = true; |
1568 | 1596 | ||
1569 | if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || | 1597 | if (wbc->sync_mode == WB_SYNC_NONE) |
1570 | wbc->for_background) | ||
1571 | sync = false; | 1598 | sync = false; |
1572 | 1599 | ||
1573 | status = pnfs_layoutcommit_inode(inode, sync); | 1600 | status = pnfs_layoutcommit_inode(inode, sync); |
diff --git a/fs/notify/group.c b/fs/notify/group.c index d309f38449cb..63fc294a4692 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #include <linux/fsnotify_backend.h> | 26 | #include <linux/fsnotify_backend.h> |
27 | #include "fsnotify.h" | 27 | #include "fsnotify.h" |
28 | 28 | ||
29 | #include <asm/atomic.h> | 29 | #include <linux/atomic.h> |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Final freeing of a group | 32 | * Final freeing of a group |
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 07ea8d3e6ea2..b13c00ac48eb 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
25 | 25 | ||
26 | #include <asm/atomic.h> | 26 | #include <linux/atomic.h> |
27 | 27 | ||
28 | #include <linux/fsnotify_backend.h> | 28 | #include <linux/fsnotify_backend.h> |
29 | #include "fsnotify.h" | 29 | #include "fsnotify.h" |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 252ab1f6452b..e14587d55689 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -92,7 +92,7 @@ | |||
92 | #include <linux/spinlock.h> | 92 | #include <linux/spinlock.h> |
93 | #include <linux/srcu.h> | 93 | #include <linux/srcu.h> |
94 | 94 | ||
95 | #include <asm/atomic.h> | 95 | #include <linux/atomic.h> |
96 | 96 | ||
97 | #include <linux/fsnotify_backend.h> | 97 | #include <linux/fsnotify_backend.h> |
98 | #include "fsnotify.h" | 98 | #include "fsnotify.h" |
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index f39260f8f865..ee188158a224 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -43,7 +43,7 @@ | |||
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/spinlock.h> | 44 | #include <linux/spinlock.h> |
45 | 45 | ||
46 | #include <asm/atomic.h> | 46 | #include <linux/atomic.h> |
47 | 47 | ||
48 | #include <linux/fsnotify_backend.h> | 48 | #include <linux/fsnotify_backend.h> |
49 | #include "fsnotify.h" | 49 | #include "fsnotify.h" |
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index e86577d6c5c3..778fe6cae3b0 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | 26 | ||
27 | #include <asm/atomic.h> | 27 | #include <linux/atomic.h> |
28 | 28 | ||
29 | #include <linux/fsnotify_backend.h> | 29 | #include <linux/fsnotify_backend.h> |
30 | #include "fsnotify.h" | 30 | #include "fsnotify.h" |
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 2dabf813456c..fe8e7e928889 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h | |||
@@ -24,7 +24,7 @@ | |||
24 | #ifndef _LINUX_NTFS_INODE_H | 24 | #ifndef _LINUX_NTFS_INODE_H |
25 | #define _LINUX_NTFS_INODE_H | 25 | #define _LINUX_NTFS_INODE_H |
26 | 26 | ||
27 | #include <asm/atomic.h> | 27 | #include <linux/atomic.h> |
28 | 28 | ||
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/list.h> | 30 | #include <linux/list.h> |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 783c58d9daf1..a7219075b4de 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -247,7 +247,7 @@ static int ocfs2_set_acl(handle_t *handle, | |||
247 | case ACL_TYPE_ACCESS: | 247 | case ACL_TYPE_ACCESS: |
248 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; | 248 | name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; |
249 | if (acl) { | 249 | if (acl) { |
250 | mode_t mode = inode->i_mode; | 250 | umode_t mode = inode->i_mode; |
251 | ret = posix_acl_equiv_mode(acl, &mode); | 251 | ret = posix_acl_equiv_mode(acl, &mode); |
252 | if (ret < 0) | 252 | if (ret < 0) |
253 | return ret; | 253 | return ret; |
@@ -351,7 +351,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
351 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 351 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
352 | struct posix_acl *acl = NULL; | 352 | struct posix_acl *acl = NULL; |
353 | int ret = 0, ret2; | 353 | int ret = 0, ret2; |
354 | mode_t mode; | 354 | umode_t mode; |
355 | 355 | ||
356 | if (!S_ISLNK(inode->i_mode)) { | 356 | if (!S_ISLNK(inode->i_mode)) { |
357 | if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { | 357 | if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { |
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 3b8d3979e03b..98e544274390 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
@@ -93,7 +93,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb) | |||
93 | 93 | ||
94 | memset(bh->b_data, 0, sizeof(struct omfs_inode)); | 94 | memset(bh->b_data, 0, sizeof(struct omfs_inode)); |
95 | 95 | ||
96 | if (inode->i_mode & S_IFDIR) { | 96 | if (S_ISDIR(inode->i_mode)) { |
97 | memset(&bh->b_data[OMFS_DIR_START], 0xff, | 97 | memset(&bh->b_data[OMFS_DIR_START], 0xff, |
98 | sbi->s_sys_blocksize - OMFS_DIR_START); | 98 | sbi->s_sys_blocksize - OMFS_DIR_START); |
99 | } else | 99 | } else |
@@ -446,74 +446,52 @@ out: | |||
446 | return error; | 446 | return error; |
447 | } | 447 | } |
448 | 448 | ||
449 | SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) | 449 | static int chmod_common(struct path *path, umode_t mode) |
450 | { | 450 | { |
451 | struct inode * inode; | 451 | struct inode *inode = path->dentry->d_inode; |
452 | struct dentry * dentry; | ||
453 | struct file * file; | ||
454 | int err = -EBADF; | ||
455 | struct iattr newattrs; | 452 | struct iattr newattrs; |
453 | int error; | ||
456 | 454 | ||
457 | file = fget(fd); | 455 | error = mnt_want_write(path->mnt); |
458 | if (!file) | 456 | if (error) |
459 | goto out; | 457 | return error; |
460 | |||
461 | dentry = file->f_path.dentry; | ||
462 | inode = dentry->d_inode; | ||
463 | |||
464 | audit_inode(NULL, dentry); | ||
465 | |||
466 | err = mnt_want_write_file(file); | ||
467 | if (err) | ||
468 | goto out_putf; | ||
469 | mutex_lock(&inode->i_mutex); | 458 | mutex_lock(&inode->i_mutex); |
470 | err = security_path_chmod(dentry, file->f_vfsmnt, mode); | 459 | error = security_path_chmod(path->dentry, path->mnt, mode); |
471 | if (err) | 460 | if (error) |
472 | goto out_unlock; | 461 | goto out_unlock; |
473 | if (mode == (mode_t) -1) | ||
474 | mode = inode->i_mode; | ||
475 | newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); | 462 | newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); |
476 | newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; | 463 | newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; |
477 | err = notify_change(dentry, &newattrs); | 464 | error = notify_change(path->dentry, &newattrs); |
478 | out_unlock: | 465 | out_unlock: |
479 | mutex_unlock(&inode->i_mutex); | 466 | mutex_unlock(&inode->i_mutex); |
480 | mnt_drop_write(file->f_path.mnt); | 467 | mnt_drop_write(path->mnt); |
481 | out_putf: | 468 | return error; |
482 | fput(file); | 469 | } |
483 | out: | 470 | |
471 | SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) | ||
472 | { | ||
473 | struct file * file; | ||
474 | int err = -EBADF; | ||
475 | |||
476 | file = fget(fd); | ||
477 | if (file) { | ||
478 | audit_inode(NULL, file->f_path.dentry); | ||
479 | err = chmod_common(&file->f_path, mode); | ||
480 | fput(file); | ||
481 | } | ||
484 | return err; | 482 | return err; |
485 | } | 483 | } |
486 | 484 | ||
487 | SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) | 485 | SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) |
488 | { | 486 | { |
489 | struct path path; | 487 | struct path path; |
490 | struct inode *inode; | ||
491 | int error; | 488 | int error; |
492 | struct iattr newattrs; | ||
493 | 489 | ||
494 | error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); | 490 | error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); |
495 | if (error) | 491 | if (!error) { |
496 | goto out; | 492 | error = chmod_common(&path, mode); |
497 | inode = path.dentry->d_inode; | 493 | path_put(&path); |
498 | 494 | } | |
499 | error = mnt_want_write(path.mnt); | ||
500 | if (error) | ||
501 | goto dput_and_out; | ||
502 | mutex_lock(&inode->i_mutex); | ||
503 | error = security_path_chmod(path.dentry, path.mnt, mode); | ||
504 | if (error) | ||
505 | goto out_unlock; | ||
506 | if (mode == (mode_t) -1) | ||
507 | mode = inode->i_mode; | ||
508 | newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); | ||
509 | newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; | ||
510 | error = notify_change(path.dentry, &newattrs); | ||
511 | out_unlock: | ||
512 | mutex_unlock(&inode->i_mutex); | ||
513 | mnt_drop_write(path.mnt); | ||
514 | dput_and_out: | ||
515 | path_put(&path); | ||
516 | out: | ||
517 | return error; | 495 | return error; |
518 | } | 496 | } |
519 | 497 | ||
@@ -948,7 +948,7 @@ static const struct dentry_operations pipefs_dentry_operations = { | |||
948 | 948 | ||
949 | static struct inode * get_pipe_inode(void) | 949 | static struct inode * get_pipe_inode(void) |
950 | { | 950 | { |
951 | struct inode *inode = new_inode(pipe_mnt->mnt_sb); | 951 | struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); |
952 | struct pipe_inode_info *pipe; | 952 | struct pipe_inode_info *pipe; |
953 | 953 | ||
954 | if (!inode) | 954 | if (!inode) |
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index a6227d219e93..10027b42b7e2 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
@@ -14,7 +14,7 @@ | |||
14 | 14 | ||
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <asm/atomic.h> | 17 | #include <linux/atomic.h> |
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/posix_acl.h> | 20 | #include <linux/posix_acl.h> |
@@ -149,10 +149,10 @@ posix_acl_valid(const struct posix_acl *acl) | |||
149 | * file mode permission bits, or else 1. Returns -E... on error. | 149 | * file mode permission bits, or else 1. Returns -E... on error. |
150 | */ | 150 | */ |
151 | int | 151 | int |
152 | posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) | 152 | posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) |
153 | { | 153 | { |
154 | const struct posix_acl_entry *pa, *pe; | 154 | const struct posix_acl_entry *pa, *pe; |
155 | mode_t mode = 0; | 155 | umode_t mode = 0; |
156 | int not_equiv = 0; | 156 | int not_equiv = 0; |
157 | 157 | ||
158 | FOREACH_ACL_ENTRY(pa, acl, pe) { | 158 | FOREACH_ACL_ENTRY(pa, acl, pe) { |
@@ -188,7 +188,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) | |||
188 | * Create an ACL representing the file mode permission bits of an inode. | 188 | * Create an ACL representing the file mode permission bits of an inode. |
189 | */ | 189 | */ |
190 | struct posix_acl * | 190 | struct posix_acl * |
191 | posix_acl_from_mode(mode_t mode, gfp_t flags) | 191 | posix_acl_from_mode(umode_t mode, gfp_t flags) |
192 | { | 192 | { |
193 | struct posix_acl *acl = posix_acl_alloc(3, flags); | 193 | struct posix_acl *acl = posix_acl_alloc(3, flags); |
194 | if (!acl) | 194 | if (!acl) |
@@ -279,11 +279,11 @@ check_perm: | |||
279 | * system calls. All permissions that are not granted by the acl are removed. | 279 | * system calls. All permissions that are not granted by the acl are removed. |
280 | * The permissions in the acl are changed to reflect the mode_p parameter. | 280 | * The permissions in the acl are changed to reflect the mode_p parameter. |
281 | */ | 281 | */ |
282 | static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) | 282 | static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) |
283 | { | 283 | { |
284 | struct posix_acl_entry *pa, *pe; | 284 | struct posix_acl_entry *pa, *pe; |
285 | struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; | 285 | struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; |
286 | mode_t mode = *mode_p; | 286 | umode_t mode = *mode_p; |
287 | int not_equiv = 0; | 287 | int not_equiv = 0; |
288 | 288 | ||
289 | /* assert(atomic_read(acl->a_refcount) == 1); */ | 289 | /* assert(atomic_read(acl->a_refcount) == 1); */ |
@@ -336,7 +336,7 @@ static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) | |||
336 | /* | 336 | /* |
337 | * Modify the ACL for the chmod syscall. | 337 | * Modify the ACL for the chmod syscall. |
338 | */ | 338 | */ |
339 | static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) | 339 | static int posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode) |
340 | { | 340 | { |
341 | struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; | 341 | struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; |
342 | struct posix_acl_entry *pa, *pe; | 342 | struct posix_acl_entry *pa, *pe; |
@@ -382,7 +382,7 @@ static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) | |||
382 | } | 382 | } |
383 | 383 | ||
384 | int | 384 | int |
385 | posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p) | 385 | posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) |
386 | { | 386 | { |
387 | struct posix_acl *clone = posix_acl_clone(*acl, gfp); | 387 | struct posix_acl *clone = posix_acl_clone(*acl, gfp); |
388 | int err = -ENOMEM; | 388 | int err = -ENOMEM; |
@@ -400,7 +400,7 @@ posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p) | |||
400 | EXPORT_SYMBOL(posix_acl_create); | 400 | EXPORT_SYMBOL(posix_acl_create); |
401 | 401 | ||
402 | int | 402 | int |
403 | posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, mode_t mode) | 403 | posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode) |
404 | { | 404 | { |
405 | struct posix_acl *clone = posix_acl_clone(*acl, gfp); | 405 | struct posix_acl *clone = posix_acl_clone(*acl, gfp); |
406 | int err = -ENOMEM; | 406 | int err = -ENOMEM; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c9e3f650f23c..5eb02069e1b8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1118,7 +1118,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1118 | * Warn that /proc/pid/oom_adj is deprecated, see | 1118 | * Warn that /proc/pid/oom_adj is deprecated, see |
1119 | * Documentation/feature-removal-schedule.txt. | 1119 | * Documentation/feature-removal-schedule.txt. |
1120 | */ | 1120 | */ |
1121 | WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", | 1121 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", |
1122 | current->comm, task_pid_nr(current), task_pid_nr(task), | 1122 | current->comm, task_pid_nr(current), task_pid_nr(task), |
1123 | task_pid_nr(task)); | 1123 | task_pid_nr(task)); |
1124 | task->signal->oom_adj = oom_adjust; | 1124 | task->signal->oom_adj = oom_adjust; |
@@ -1919,6 +1919,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) | |||
1919 | spin_lock(&files->file_lock); | 1919 | spin_lock(&files->file_lock); |
1920 | file = fcheck_files(files, fd); | 1920 | file = fcheck_files(files, fd); |
1921 | if (file) { | 1921 | if (file) { |
1922 | unsigned int f_flags; | ||
1923 | struct fdtable *fdt; | ||
1924 | |||
1925 | fdt = files_fdtable(files); | ||
1926 | f_flags = file->f_flags & ~O_CLOEXEC; | ||
1927 | if (FD_ISSET(fd, fdt->close_on_exec)) | ||
1928 | f_flags |= O_CLOEXEC; | ||
1929 | |||
1922 | if (path) { | 1930 | if (path) { |
1923 | *path = file->f_path; | 1931 | *path = file->f_path; |
1924 | path_get(&file->f_path); | 1932 | path_get(&file->f_path); |
@@ -1928,7 +1936,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) | |||
1928 | "pos:\t%lli\n" | 1936 | "pos:\t%lli\n" |
1929 | "flags:\t0%o\n", | 1937 | "flags:\t0%o\n", |
1930 | (long long) file->f_pos, | 1938 | (long long) file->f_pos, |
1931 | file->f_flags); | 1939 | f_flags); |
1932 | spin_unlock(&files->file_lock); | 1940 | spin_unlock(&files->file_lock); |
1933 | put_files_struct(files); | 1941 | put_files_struct(files); |
1934 | return 0; | 1942 | return 0; |
@@ -2706,9 +2714,16 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) | |||
2706 | { | 2714 | { |
2707 | struct task_io_accounting acct = task->ioac; | 2715 | struct task_io_accounting acct = task->ioac; |
2708 | unsigned long flags; | 2716 | unsigned long flags; |
2717 | int result; | ||
2709 | 2718 | ||
2710 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2719 | result = mutex_lock_killable(&task->signal->cred_guard_mutex); |
2711 | return -EACCES; | 2720 | if (result) |
2721 | return result; | ||
2722 | |||
2723 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) { | ||
2724 | result = -EACCES; | ||
2725 | goto out_unlock; | ||
2726 | } | ||
2712 | 2727 | ||
2713 | if (whole && lock_task_sighand(task, &flags)) { | 2728 | if (whole && lock_task_sighand(task, &flags)) { |
2714 | struct task_struct *t = task; | 2729 | struct task_struct *t = task; |
@@ -2719,7 +2734,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) | |||
2719 | 2734 | ||
2720 | unlock_task_sighand(task, &flags); | 2735 | unlock_task_sighand(task, &flags); |
2721 | } | 2736 | } |
2722 | return sprintf(buffer, | 2737 | result = sprintf(buffer, |
2723 | "rchar: %llu\n" | 2738 | "rchar: %llu\n" |
2724 | "wchar: %llu\n" | 2739 | "wchar: %llu\n" |
2725 | "syscr: %llu\n" | 2740 | "syscr: %llu\n" |
@@ -2734,6 +2749,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) | |||
2734 | (unsigned long long)acct.read_bytes, | 2749 | (unsigned long long)acct.read_bytes, |
2735 | (unsigned long long)acct.write_bytes, | 2750 | (unsigned long long)acct.write_bytes, |
2736 | (unsigned long long)acct.cancelled_write_bytes); | 2751 | (unsigned long long)acct.cancelled_write_bytes); |
2752 | out_unlock: | ||
2753 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
2754 | return result; | ||
2737 | } | 2755 | } |
2738 | 2756 | ||
2739 | static int proc_tid_io_accounting(struct task_struct *task, char *buffer) | 2757 | static int proc_tid_io_accounting(struct task_struct *task, char *buffer) |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f1637f17c37c..9d99131d0d65 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -620,8 +620,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
620 | if (!ent) goto out; | 620 | if (!ent) goto out; |
621 | 621 | ||
622 | memset(ent, 0, sizeof(struct proc_dir_entry)); | 622 | memset(ent, 0, sizeof(struct proc_dir_entry)); |
623 | memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); | 623 | memcpy(ent->name, fn, len + 1); |
624 | ent->name = ((char *) ent) + sizeof(*ent); | ||
625 | ent->namelen = len; | 624 | ent->namelen = len; |
626 | ent->mode = mode; | 625 | ent->mode = mode; |
627 | ent->nlink = nlink; | 626 | ent->nlink = nlink; |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 74b48cfa1bb2..7ed72d6c1c6f 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -319,7 +319,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
319 | if (!pde->proc_fops) { | 319 | if (!pde->proc_fops) { |
320 | spin_unlock(&pde->pde_unload_lock); | 320 | spin_unlock(&pde->pde_unload_lock); |
321 | kfree(pdeo); | 321 | kfree(pdeo); |
322 | return -EINVAL; | 322 | return -ENOENT; |
323 | } | 323 | } |
324 | pde->pde_users++; | 324 | pde->pde_users++; |
325 | open = pde->proc_fops->open; | 325 | open = pde->proc_fops->open; |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index ed257d141568..586174168e2a 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <linux/seq_file.h> | 10 | #include <linux/seq_file.h> |
11 | #include <linux/swap.h> | 11 | #include <linux/swap.h> |
12 | #include <linux/vmstat.h> | 12 | #include <linux/vmstat.h> |
13 | #include <asm/atomic.h> | 13 | #include <linux/atomic.h> |
14 | #include <asm/page.h> | 14 | #include <asm/page.h> |
15 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
16 | #include "internal.h" | 16 | #include "internal.h" |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 9020ac15baaa..f738024ccc8e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -197,15 +197,15 @@ static __net_init int proc_net_ns_init(struct net *net) | |||
197 | int err; | 197 | int err; |
198 | 198 | ||
199 | err = -ENOMEM; | 199 | err = -ENOMEM; |
200 | netd = kzalloc(sizeof(*netd), GFP_KERNEL); | 200 | netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL); |
201 | if (!netd) | 201 | if (!netd) |
202 | goto out; | 202 | goto out; |
203 | 203 | ||
204 | netd->data = net; | 204 | netd->data = net; |
205 | netd->nlink = 2; | 205 | netd->nlink = 2; |
206 | netd->name = "net"; | ||
207 | netd->namelen = 3; | 206 | netd->namelen = 3; |
208 | netd->parent = &proc_root; | 207 | netd->parent = &proc_root; |
208 | memcpy(netd->name, "net", 4); | ||
209 | 209 | ||
210 | err = -EEXIST; | 210 | err = -EEXIST; |
211 | net_statd = proc_net_mkdir(net, "stat", netd); | 211 | net_statd = proc_net_mkdir(net, "stat", netd); |
diff --git a/fs/proc/root.c b/fs/proc/root.c index d6c3b416529b..9a8a2b77b874 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -186,13 +186,13 @@ static const struct inode_operations proc_root_inode_operations = { | |||
186 | struct proc_dir_entry proc_root = { | 186 | struct proc_dir_entry proc_root = { |
187 | .low_ino = PROC_ROOT_INO, | 187 | .low_ino = PROC_ROOT_INO, |
188 | .namelen = 5, | 188 | .namelen = 5, |
189 | .name = "/proc", | ||
190 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, | 189 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, |
191 | .nlink = 2, | 190 | .nlink = 2, |
192 | .count = ATOMIC_INIT(1), | 191 | .count = ATOMIC_INIT(1), |
193 | .proc_iops = &proc_root_inode_operations, | 192 | .proc_iops = &proc_root_inode_operations, |
194 | .proc_fops = &proc_root_operations, | 193 | .proc_fops = &proc_root_operations, |
195 | .parent = &proc_root, | 194 | .parent = &proc_root, |
195 | .name = "/proc", | ||
196 | }; | 196 | }; |
197 | 197 | ||
198 | int pid_ns_prepare_proc(struct pid_namespace *ns) | 198 | int pid_ns_prepare_proc(struct pid_namespace *ns) |
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 977ed2723845..893b961dcfd8 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
@@ -39,8 +39,9 @@ | |||
39 | #define PSTORE_NAMELEN 64 | 39 | #define PSTORE_NAMELEN 64 |
40 | 40 | ||
41 | struct pstore_private { | 41 | struct pstore_private { |
42 | struct pstore_info *psi; | ||
43 | enum pstore_type_id type; | ||
42 | u64 id; | 44 | u64 id; |
43 | int (*erase)(u64); | ||
44 | ssize_t size; | 45 | ssize_t size; |
45 | char data[]; | 46 | char data[]; |
46 | }; | 47 | }; |
@@ -73,7 +74,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) | |||
73 | { | 74 | { |
74 | struct pstore_private *p = dentry->d_inode->i_private; | 75 | struct pstore_private *p = dentry->d_inode->i_private; |
75 | 76 | ||
76 | p->erase(p->id); | 77 | p->psi->erase(p->type, p->id, p->psi); |
77 | 78 | ||
78 | return simple_unlink(dir, dentry); | 79 | return simple_unlink(dir, dentry); |
79 | } | 80 | } |
@@ -175,8 +176,8 @@ int pstore_is_mounted(void) | |||
175 | * Set the mtime & ctime to the date that this record was originally stored. | 176 | * Set the mtime & ctime to the date that this record was originally stored. |
176 | */ | 177 | */ |
177 | int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, | 178 | int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, |
178 | char *data, size_t size, | 179 | char *data, size_t size, struct timespec time, |
179 | struct timespec time, int (*erase)(u64)) | 180 | struct pstore_info *psi) |
180 | { | 181 | { |
181 | struct dentry *root = pstore_sb->s_root; | 182 | struct dentry *root = pstore_sb->s_root; |
182 | struct dentry *dentry; | 183 | struct dentry *dentry; |
@@ -192,8 +193,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, | |||
192 | private = kmalloc(sizeof *private + size, GFP_KERNEL); | 193 | private = kmalloc(sizeof *private + size, GFP_KERNEL); |
193 | if (!private) | 194 | if (!private) |
194 | goto fail_alloc; | 195 | goto fail_alloc; |
196 | private->type = type; | ||
195 | private->id = id; | 197 | private->id = id; |
196 | private->erase = erase; | 198 | private->psi = psi; |
197 | 199 | ||
198 | switch (type) { | 200 | switch (type) { |
199 | case PSTORE_TYPE_DMESG: | 201 | case PSTORE_TYPE_DMESG: |
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 8c9f23eb1645..611c1b3c46fa 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h | |||
@@ -2,5 +2,5 @@ extern void pstore_set_kmsg_bytes(int); | |||
2 | extern void pstore_get_records(void); | 2 | extern void pstore_get_records(void); |
3 | extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, | 3 | extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, |
4 | char *data, size_t size, | 4 | char *data, size_t size, |
5 | struct timespec time, int (*erase)(u64)); | 5 | struct timespec time, struct pstore_info *psi); |
6 | extern int pstore_is_mounted(void); | 6 | extern int pstore_is_mounted(void); |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f2c3ff20ea68..c5300ec31696 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -37,6 +37,8 @@ | |||
37 | static DEFINE_SPINLOCK(pstore_lock); | 37 | static DEFINE_SPINLOCK(pstore_lock); |
38 | static struct pstore_info *psinfo; | 38 | static struct pstore_info *psinfo; |
39 | 39 | ||
40 | static char *backend; | ||
41 | |||
40 | /* How much of the console log to snapshot */ | 42 | /* How much of the console log to snapshot */ |
41 | static unsigned long kmsg_bytes = 10240; | 43 | static unsigned long kmsg_bytes = 10240; |
42 | 44 | ||
@@ -67,7 +69,8 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
67 | unsigned long size, total = 0; | 69 | unsigned long size, total = 0; |
68 | char *dst, *why; | 70 | char *dst, *why; |
69 | u64 id; | 71 | u64 id; |
70 | int hsize, part = 1; | 72 | int hsize; |
73 | unsigned int part = 1; | ||
71 | 74 | ||
72 | if (reason < ARRAY_SIZE(reason_str)) | 75 | if (reason < ARRAY_SIZE(reason_str)) |
73 | why = reason_str[reason]; | 76 | why = reason_str[reason]; |
@@ -78,7 +81,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
78 | oopscount++; | 81 | oopscount++; |
79 | while (total < kmsg_bytes) { | 82 | while (total < kmsg_bytes) { |
80 | dst = psinfo->buf; | 83 | dst = psinfo->buf; |
81 | hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++); | 84 | hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part); |
82 | size = psinfo->bufsize - hsize; | 85 | size = psinfo->bufsize - hsize; |
83 | dst += hsize; | 86 | dst += hsize; |
84 | 87 | ||
@@ -94,14 +97,16 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
94 | memcpy(dst, s1 + s1_start, l1_cpy); | 97 | memcpy(dst, s1 + s1_start, l1_cpy); |
95 | memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); | 98 | memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); |
96 | 99 | ||
97 | id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); | 100 | id = psinfo->write(PSTORE_TYPE_DMESG, part, |
101 | hsize + l1_cpy + l2_cpy, psinfo); | ||
98 | if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) | 102 | if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) |
99 | pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, | 103 | pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, |
100 | psinfo->buf, hsize + l1_cpy + l2_cpy, | 104 | psinfo->buf, hsize + l1_cpy + l2_cpy, |
101 | CURRENT_TIME, psinfo->erase); | 105 | CURRENT_TIME, psinfo); |
102 | l1 -= l1_cpy; | 106 | l1 -= l1_cpy; |
103 | l2 -= l2_cpy; | 107 | l2 -= l2_cpy; |
104 | total += l1_cpy + l2_cpy; | 108 | total += l1_cpy + l2_cpy; |
109 | part++; | ||
105 | } | 110 | } |
106 | mutex_unlock(&psinfo->buf_mutex); | 111 | mutex_unlock(&psinfo->buf_mutex); |
107 | } | 112 | } |
@@ -128,6 +133,12 @@ int pstore_register(struct pstore_info *psi) | |||
128 | spin_unlock(&pstore_lock); | 133 | spin_unlock(&pstore_lock); |
129 | return -EBUSY; | 134 | return -EBUSY; |
130 | } | 135 | } |
136 | |||
137 | if (backend && strcmp(backend, psi->name)) { | ||
138 | spin_unlock(&pstore_lock); | ||
139 | return -EINVAL; | ||
140 | } | ||
141 | |||
131 | psinfo = psi; | 142 | psinfo = psi; |
132 | spin_unlock(&pstore_lock); | 143 | spin_unlock(&pstore_lock); |
133 | 144 | ||
@@ -166,9 +177,9 @@ void pstore_get_records(void) | |||
166 | if (rc) | 177 | if (rc) |
167 | goto out; | 178 | goto out; |
168 | 179 | ||
169 | while ((size = psi->read(&id, &type, &time)) > 0) { | 180 | while ((size = psi->read(&id, &type, &time, psi)) > 0) { |
170 | if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, | 181 | if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, |
171 | time, psi->erase)) | 182 | time, psi)) |
172 | failed++; | 183 | failed++; |
173 | } | 184 | } |
174 | psi->close(psi); | 185 | psi->close(psi); |
@@ -196,12 +207,15 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) | |||
196 | 207 | ||
197 | mutex_lock(&psinfo->buf_mutex); | 208 | mutex_lock(&psinfo->buf_mutex); |
198 | memcpy(psinfo->buf, buf, size); | 209 | memcpy(psinfo->buf, buf, size); |
199 | id = psinfo->write(type, size); | 210 | id = psinfo->write(type, 0, size, psinfo); |
200 | if (pstore_is_mounted()) | 211 | if (pstore_is_mounted()) |
201 | pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, | 212 | pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, |
202 | size, CURRENT_TIME, psinfo->erase); | 213 | size, CURRENT_TIME, psinfo); |
203 | mutex_unlock(&psinfo->buf_mutex); | 214 | mutex_unlock(&psinfo->buf_mutex); |
204 | 215 | ||
205 | return 0; | 216 | return 0; |
206 | } | 217 | } |
207 | EXPORT_SYMBOL_GPL(pstore_write); | 218 | EXPORT_SYMBOL_GPL(pstore_write); |
219 | |||
220 | module_param(backend, charp, 0444); | ||
221 | MODULE_PARM_DESC(backend, "Pstore backend to use"); | ||
diff --git a/fs/read_write.c b/fs/read_write.c index 5907b49e4d7e..179f1c33ea57 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -166,8 +166,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) | |||
166 | * long as offset isn't at the end of the file then the | 166 | * long as offset isn't at the end of the file then the |
167 | * offset is data. | 167 | * offset is data. |
168 | */ | 168 | */ |
169 | if (offset >= inode->i_size) | 169 | if (offset >= inode->i_size) { |
170 | return -ENXIO; | 170 | retval = -ENXIO; |
171 | goto out; | ||
172 | } | ||
171 | break; | 173 | break; |
172 | case SEEK_HOLE: | 174 | case SEEK_HOLE: |
173 | /* | 175 | /* |
@@ -175,8 +177,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) | |||
175 | * as long as offset isn't i_size or larger, return | 177 | * as long as offset isn't i_size or larger, return |
176 | * i_size. | 178 | * i_size. |
177 | */ | 179 | */ |
178 | if (offset >= inode->i_size) | 180 | if (offset >= inode->i_size) { |
179 | return -ENXIO; | 181 | retval = -ENXIO; |
182 | goto out; | ||
183 | } | ||
180 | offset = inode->i_size; | 184 | offset = inode->i_size; |
181 | break; | 185 | break; |
182 | } | 186 | } |
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 7362cf4c946a..6da0396e5052 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -272,12 +272,10 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
272 | case ACL_TYPE_ACCESS: | 272 | case ACL_TYPE_ACCESS: |
273 | name = POSIX_ACL_XATTR_ACCESS; | 273 | name = POSIX_ACL_XATTR_ACCESS; |
274 | if (acl) { | 274 | if (acl) { |
275 | mode_t mode = inode->i_mode; | 275 | error = posix_acl_equiv_mode(acl, &inode->i_mode); |
276 | error = posix_acl_equiv_mode(acl, &mode); | ||
277 | if (error < 0) | 276 | if (error < 0) |
278 | return error; | 277 | return error; |
279 | else { | 278 | else { |
280 | inode->i_mode = mode; | ||
281 | if (error == 0) | 279 | if (error == 0) |
282 | acl = NULL; | 280 | acl = NULL; |
283 | } | 281 | } |
@@ -354,8 +352,6 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |||
354 | return PTR_ERR(acl); | 352 | return PTR_ERR(acl); |
355 | 353 | ||
356 | if (acl) { | 354 | if (acl) { |
357 | mode_t mode = inode->i_mode; | ||
358 | |||
359 | /* Copy the default ACL to the default ACL of a new directory */ | 355 | /* Copy the default ACL to the default ACL of a new directory */ |
360 | if (S_ISDIR(inode->i_mode)) { | 356 | if (S_ISDIR(inode->i_mode)) { |
361 | err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, | 357 | err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, |
@@ -366,12 +362,10 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |||
366 | 362 | ||
367 | /* Now we reconcile the new ACL and the mode, | 363 | /* Now we reconcile the new ACL and the mode, |
368 | potentially modifying both */ | 364 | potentially modifying both */ |
369 | err = posix_acl_create(&acl, GFP_NOFS, &mode); | 365 | err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); |
370 | if (err < 0) | 366 | if (err < 0) |
371 | return err; | 367 | return err; |
372 | 368 | ||
373 | inode->i_mode = mode; | ||
374 | |||
375 | /* If we need an ACL.. */ | 369 | /* If we need an ACL.. */ |
376 | if (err > 0) | 370 | if (err > 0) |
377 | err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); | 371 | err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); |
diff --git a/fs/stack.c b/fs/stack.c index 4a6f7f440658..b4f2ab48a61f 100644 --- a/fs/stack.c +++ b/fs/stack.c | |||
@@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) | |||
29 | * | 29 | * |
30 | * We don't actually know what locking is used at the lower level; | 30 | * We don't actually know what locking is used at the lower level; |
31 | * but if it's a filesystem that supports quotas, it will be using | 31 | * but if it's a filesystem that supports quotas, it will be using |
32 | * i_lock as in inode_add_bytes(). tmpfs uses other locking, and | 32 | * i_lock as in inode_add_bytes(). |
33 | * its 32-bit is (just) able to exceed 2TB i_size with the aid of | ||
34 | * holes; but its i_blocks cannot carry into the upper long without | ||
35 | * almost 2TB swap - let's ignore that case. | ||
36 | */ | 33 | */ |
37 | if (sizeof(i_blocks) > sizeof(long)) | 34 | if (sizeof(i_blocks) > sizeof(long)) |
38 | spin_lock(&src->i_lock); | 35 | spin_lock(&src->i_lock); |
@@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) | |||
27 | stat->uid = inode->i_uid; | 27 | stat->uid = inode->i_uid; |
28 | stat->gid = inode->i_gid; | 28 | stat->gid = inode->i_gid; |
29 | stat->rdev = inode->i_rdev; | 29 | stat->rdev = inode->i_rdev; |
30 | stat->size = i_size_read(inode); | ||
30 | stat->atime = inode->i_atime; | 31 | stat->atime = inode->i_atime; |
31 | stat->mtime = inode->i_mtime; | 32 | stat->mtime = inode->i_mtime; |
32 | stat->ctime = inode->i_ctime; | 33 | stat->ctime = inode->i_ctime; |
33 | stat->size = i_size_read(inode); | ||
34 | stat->blocks = inode->i_blocks; | ||
35 | stat->blksize = (1 << inode->i_blkbits); | 34 | stat->blksize = (1 << inode->i_blkbits); |
35 | stat->blocks = inode->i_blocks; | ||
36 | } | 36 | } |
37 | 37 | ||
38 | EXPORT_SYMBOL(generic_fillattr); | 38 | EXPORT_SYMBOL(generic_fillattr); |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 45174b534377..feb361e252ac 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -335,9 +335,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
335 | #define DBGKEY(key) ((char *)(key)) | 335 | #define DBGKEY(key) ((char *)(key)) |
336 | #define DBGKEY1(key) ((char *)(key)) | 336 | #define DBGKEY1(key) ((char *)(key)) |
337 | 337 | ||
338 | #define ubifs_dbg_msg(fmt, ...) do { \ | 338 | #define ubifs_dbg_msg(fmt, ...) do { \ |
339 | if (0) \ | 339 | if (0) \ |
340 | pr_debug(fmt "\n", ##__VA_ARGS__); \ | 340 | printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ |
341 | } while (0) | 341 | } while (0) |
342 | 342 | ||
343 | #define dbg_dump_stack() | 343 | #define dbg_dump_stack() |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 75bb316529dd..427a4e82a588 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -16,44 +16,53 @@ | |||
16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | # | 17 | # |
18 | 18 | ||
19 | ccflags-y := -I$(src) -I$(src)/linux-2.6 | 19 | ccflags-y += -I$(src) # needed for trace events |
20 | ccflags-$(CONFIG_XFS_DEBUG) += -g | ||
21 | 20 | ||
22 | XFS_LINUX := linux-2.6 | 21 | ccflags-$(CONFIG_XFS_DEBUG) += -g |
23 | 22 | ||
24 | obj-$(CONFIG_XFS_FS) += xfs.o | 23 | obj-$(CONFIG_XFS_FS) += xfs.o |
25 | 24 | ||
26 | xfs-y += linux-2.6/xfs_trace.o | 25 | # this one should be compiled first, as the tracing macros can easily blow up |
27 | 26 | xfs-y += xfs_trace.o | |
28 | xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ | ||
29 | xfs_dquot.o \ | ||
30 | xfs_dquot_item.o \ | ||
31 | xfs_trans_dquot.o \ | ||
32 | xfs_qm_syscalls.o \ | ||
33 | xfs_qm_bhv.o \ | ||
34 | xfs_qm.o) | ||
35 | xfs-$(CONFIG_XFS_QUOTA) += linux-2.6/xfs_quotaops.o | ||
36 | |||
37 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
38 | xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o | ||
39 | endif | ||
40 | |||
41 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o | ||
42 | xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o | ||
43 | xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o | ||
44 | xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o | ||
45 | xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o | ||
46 | 27 | ||
28 | # highlevel code | ||
29 | xfs-y += xfs_aops.o \ | ||
30 | xfs_bit.o \ | ||
31 | xfs_buf.o \ | ||
32 | xfs_dfrag.o \ | ||
33 | xfs_discard.o \ | ||
34 | xfs_error.o \ | ||
35 | xfs_export.o \ | ||
36 | xfs_file.o \ | ||
37 | xfs_filestream.o \ | ||
38 | xfs_fsops.o \ | ||
39 | xfs_fs_subr.o \ | ||
40 | xfs_globals.o \ | ||
41 | xfs_iget.o \ | ||
42 | xfs_ioctl.o \ | ||
43 | xfs_iomap.o \ | ||
44 | xfs_iops.o \ | ||
45 | xfs_itable.o \ | ||
46 | xfs_message.o \ | ||
47 | xfs_mru_cache.o \ | ||
48 | xfs_super.o \ | ||
49 | xfs_sync.o \ | ||
50 | xfs_xattr.o \ | ||
51 | xfs_rename.o \ | ||
52 | xfs_rw.o \ | ||
53 | xfs_utils.o \ | ||
54 | xfs_vnodeops.o \ | ||
55 | kmem.o \ | ||
56 | uuid.o | ||
47 | 57 | ||
58 | # code shared with libxfs | ||
48 | xfs-y += xfs_alloc.o \ | 59 | xfs-y += xfs_alloc.o \ |
49 | xfs_alloc_btree.o \ | 60 | xfs_alloc_btree.o \ |
50 | xfs_attr.o \ | 61 | xfs_attr.o \ |
51 | xfs_attr_leaf.o \ | 62 | xfs_attr_leaf.o \ |
52 | xfs_bit.o \ | ||
53 | xfs_bmap.o \ | 63 | xfs_bmap.o \ |
54 | xfs_bmap_btree.o \ | 64 | xfs_bmap_btree.o \ |
55 | xfs_btree.o \ | 65 | xfs_btree.o \ |
56 | xfs_buf_item.o \ | ||
57 | xfs_da_btree.o \ | 66 | xfs_da_btree.o \ |
58 | xfs_dir2.o \ | 67 | xfs_dir2.o \ |
59 | xfs_dir2_block.o \ | 68 | xfs_dir2_block.o \ |
@@ -61,49 +70,37 @@ xfs-y += xfs_alloc.o \ | |||
61 | xfs_dir2_leaf.o \ | 70 | xfs_dir2_leaf.o \ |
62 | xfs_dir2_node.o \ | 71 | xfs_dir2_node.o \ |
63 | xfs_dir2_sf.o \ | 72 | xfs_dir2_sf.o \ |
64 | xfs_error.o \ | ||
65 | xfs_extfree_item.o \ | ||
66 | xfs_filestream.o \ | ||
67 | xfs_fsops.o \ | ||
68 | xfs_ialloc.o \ | 73 | xfs_ialloc.o \ |
69 | xfs_ialloc_btree.o \ | 74 | xfs_ialloc_btree.o \ |
70 | xfs_iget.o \ | ||
71 | xfs_inode.o \ | 75 | xfs_inode.o \ |
72 | xfs_inode_item.o \ | ||
73 | xfs_iomap.o \ | ||
74 | xfs_itable.o \ | ||
75 | xfs_dfrag.o \ | ||
76 | xfs_log.o \ | ||
77 | xfs_log_cil.o \ | ||
78 | xfs_log_recover.o \ | 76 | xfs_log_recover.o \ |
79 | xfs_mount.o \ | 77 | xfs_mount.o \ |
80 | xfs_mru_cache.o \ | 78 | xfs_trans.o |
81 | xfs_rename.o \ | 79 | |
82 | xfs_trans.o \ | 80 | # low-level transaction/log code |
81 | xfs-y += xfs_log.o \ | ||
82 | xfs_log_cil.o \ | ||
83 | xfs_buf_item.o \ | ||
84 | xfs_extfree_item.o \ | ||
85 | xfs_inode_item.o \ | ||
83 | xfs_trans_ail.o \ | 86 | xfs_trans_ail.o \ |
84 | xfs_trans_buf.o \ | 87 | xfs_trans_buf.o \ |
85 | xfs_trans_extfree.o \ | 88 | xfs_trans_extfree.o \ |
86 | xfs_trans_inode.o \ | 89 | xfs_trans_inode.o \ |
87 | xfs_utils.o \ | ||
88 | xfs_vnodeops.o \ | ||
89 | xfs_rw.o | ||
90 | |||
91 | # Objects in linux/ | ||
92 | xfs-y += $(addprefix $(XFS_LINUX)/, \ | ||
93 | kmem.o \ | ||
94 | xfs_aops.o \ | ||
95 | xfs_buf.o \ | ||
96 | xfs_discard.o \ | ||
97 | xfs_export.o \ | ||
98 | xfs_file.o \ | ||
99 | xfs_fs_subr.o \ | ||
100 | xfs_globals.o \ | ||
101 | xfs_ioctl.o \ | ||
102 | xfs_iops.o \ | ||
103 | xfs_message.o \ | ||
104 | xfs_super.o \ | ||
105 | xfs_sync.o \ | ||
106 | xfs_xattr.o) | ||
107 | 90 | ||
108 | # Objects in support/ | 91 | # optional features |
109 | xfs-y += support/uuid.o | 92 | xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ |
93 | xfs_dquot_item.o \ | ||
94 | xfs_trans_dquot.o \ | ||
95 | xfs_qm_syscalls.o \ | ||
96 | xfs_qm_bhv.o \ | ||
97 | xfs_qm.o \ | ||
98 | xfs_quotaops.o | ||
99 | ifeq ($(CONFIG_XFS_QUOTA),y) | ||
100 | xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o | ||
101 | endif | ||
102 | xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o | ||
103 | xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o | ||
104 | xfs-$(CONFIG_PROC_FS) += xfs_stats.o | ||
105 | xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o | ||
106 | xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o | ||
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/kmem.c index a907de565db3..a907de565db3 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/kmem.c | |||
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/kmem.h index 292eff198030..292eff198030 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/kmem.h | |||
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/mrlock.h index ff6a19873e5c..ff6a19873e5c 100644 --- a/fs/xfs/linux-2.6/mrlock.h +++ b/fs/xfs/mrlock.h | |||
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/time.h index 387e695a184c..387e695a184c 100644 --- a/fs/xfs/linux-2.6/time.h +++ b/fs/xfs/time.h | |||
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/uuid.c index b83f76b6d410..b83f76b6d410 100644 --- a/fs/xfs/support/uuid.c +++ b/fs/xfs/uuid.c | |||
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/uuid.h index 4732d71262cc..4732d71262cc 100644 --- a/fs/xfs/support/uuid.h +++ b/fs/xfs/uuid.h | |||
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 53ec3ea9a625..d8b11b7f94aa 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h | |||
@@ -24,5 +24,6 @@ | |||
24 | #define XFS_BUF_LOCK_TRACKING 1 | 24 | #define XFS_BUF_LOCK_TRACKING 1 |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | #include <linux-2.6/xfs_linux.h> | 27 | #include "xfs_linux.h" |
28 | |||
28 | #endif /* __XFS_H__ */ | 29 | #endif /* __XFS_H__ */ |
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/xfs_acl.c index 44ce51656804..b6c4b3795c4a 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -221,7 +221,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
221 | } | 221 | } |
222 | 222 | ||
223 | static int | 223 | static int |
224 | xfs_set_mode(struct inode *inode, mode_t mode) | 224 | xfs_set_mode(struct inode *inode, umode_t mode) |
225 | { | 225 | { |
226 | int error = 0; | 226 | int error = 0; |
227 | 227 | ||
@@ -267,7 +267,7 @@ posix_acl_default_exists(struct inode *inode) | |||
267 | int | 267 | int |
268 | xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) | 268 | xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) |
269 | { | 269 | { |
270 | mode_t mode = inode->i_mode; | 270 | umode_t mode = inode->i_mode; |
271 | int error = 0, inherit = 0; | 271 | int error = 0, inherit = 0; |
272 | 272 | ||
273 | if (S_ISDIR(inode->i_mode)) { | 273 | if (S_ISDIR(inode->i_mode)) { |
@@ -381,7 +381,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
381 | goto out_release; | 381 | goto out_release; |
382 | 382 | ||
383 | if (type == ACL_TYPE_ACCESS) { | 383 | if (type == ACL_TYPE_ACCESS) { |
384 | mode_t mode = inode->i_mode; | 384 | umode_t mode = inode->i_mode; |
385 | error = posix_acl_equiv_mode(acl, &mode); | 385 | error = posix_acl_equiv_mode(acl, &mode); |
386 | 386 | ||
387 | if (error <= 0) { | 387 | if (error <= 0) { |
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 2c656ef49473..39632d941354 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -51,7 +51,10 @@ extern int posix_acl_default_exists(struct inode *inode); | |||
51 | extern const struct xattr_handler xfs_xattr_acl_access_handler; | 51 | extern const struct xattr_handler xfs_xattr_acl_access_handler; |
52 | extern const struct xattr_handler xfs_xattr_acl_default_handler; | 52 | extern const struct xattr_handler xfs_xattr_acl_default_handler; |
53 | #else | 53 | #else |
54 | # define xfs_get_acl(inode, type) NULL | 54 | static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) |
55 | { | ||
56 | return NULL; | ||
57 | } | ||
55 | # define xfs_inherit_acl(inode, default_acl) 0 | 58 | # define xfs_inherit_acl(inode, default_acl) 0 |
56 | # define xfs_acl_chmod(inode) 0 | 59 | # define xfs_acl_chmod(inode) 0 |
57 | # define posix_acl_access_exists(inode) 0 | 60 | # define posix_acl_access_exists(inode) 0 |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 6530769a999b..4805f009f923 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -103,7 +103,7 @@ typedef struct xfs_agf { | |||
103 | /* disk block (xfs_daddr_t) in the AG */ | 103 | /* disk block (xfs_daddr_t) in the AG */ |
104 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) | 104 | #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) |
105 | #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) | 105 | #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) |
106 | #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) | 106 | #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) |
107 | 107 | ||
108 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, | 108 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, |
109 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); | 109 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); |
@@ -156,7 +156,7 @@ typedef struct xfs_agi { | |||
156 | /* disk block (xfs_daddr_t) in the AG */ | 156 | /* disk block (xfs_daddr_t) in the AG */ |
157 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) | 157 | #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) |
158 | #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) | 158 | #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) |
159 | #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) | 159 | #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) |
160 | 160 | ||
161 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | 161 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, |
162 | xfs_agnumber_t agno, struct xfs_buf **bpp); | 162 | xfs_agnumber_t agno, struct xfs_buf **bpp); |
@@ -168,7 +168,7 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | |||
168 | #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) | 168 | #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) |
169 | #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) | 169 | #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) |
170 | #define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) | 170 | #define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) |
171 | #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) | 171 | #define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) |
172 | 172 | ||
173 | typedef struct xfs_agfl { | 173 | typedef struct xfs_agfl { |
174 | __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ | 174 | __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 1e00b3ef6274..bdd9cb54d63b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -451,8 +451,7 @@ xfs_alloc_read_agfl( | |||
451 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 451 | XFS_FSS_TO_BB(mp, 1), 0, &bp); |
452 | if (error) | 452 | if (error) |
453 | return error; | 453 | return error; |
454 | ASSERT(bp); | 454 | ASSERT(!xfs_buf_geterror(bp)); |
455 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
456 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); | 455 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); |
457 | *bpp = bp; | 456 | *bpp = bp; |
458 | return 0; | 457 | return 0; |
@@ -2116,7 +2115,7 @@ xfs_read_agf( | |||
2116 | if (!*bpp) | 2115 | if (!*bpp) |
2117 | return 0; | 2116 | return 0; |
2118 | 2117 | ||
2119 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | 2118 | ASSERT(!(*bpp)->b_error); |
2120 | agf = XFS_BUF_TO_AGF(*bpp); | 2119 | agf = XFS_BUF_TO_AGF(*bpp); |
2121 | 2120 | ||
2122 | /* | 2121 | /* |
@@ -2168,7 +2167,7 @@ xfs_alloc_read_agf( | |||
2168 | return error; | 2167 | return error; |
2169 | if (!*bpp) | 2168 | if (!*bpp) |
2170 | return 0; | 2169 | return 0; |
2171 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | 2170 | ASSERT(!(*bpp)->b_error); |
2172 | 2171 | ||
2173 | agf = XFS_BUF_TO_AGF(*bpp); | 2172 | agf = XFS_BUF_TO_AGF(*bpp); |
2174 | pag = xfs_perag_get(mp, agno); | 2173 | pag = xfs_perag_get(mp, agno); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/xfs_aops.c index 63e971e2b837..8c37dde4c521 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1300,6 +1300,7 @@ xfs_end_io_direct_write( | |||
1300 | bool is_async) | 1300 | bool is_async) |
1301 | { | 1301 | { |
1302 | struct xfs_ioend *ioend = iocb->private; | 1302 | struct xfs_ioend *ioend = iocb->private; |
1303 | struct inode *inode = ioend->io_inode; | ||
1303 | 1304 | ||
1304 | /* | 1305 | /* |
1305 | * blockdev_direct_IO can return an error even after the I/O | 1306 | * blockdev_direct_IO can return an error even after the I/O |
@@ -1331,7 +1332,7 @@ xfs_end_io_direct_write( | |||
1331 | } | 1332 | } |
1332 | 1333 | ||
1333 | /* XXX: probably should move into the real I/O completion handler */ | 1334 | /* XXX: probably should move into the real I/O completion handler */ |
1334 | inode_dio_done(ioend->io_inode); | 1335 | inode_dio_done(inode); |
1335 | } | 1336 | } |
1336 | 1337 | ||
1337 | STATIC ssize_t | 1338 | STATIC ssize_t |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/xfs_aops.h index 71f721e1a71f..71f721e1a71f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index cbae424fe1ba..160bcdc34a6e 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -2121,8 +2121,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2121 | 2121 | ||
2122 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, | 2122 | bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, |
2123 | XBF_LOCK | XBF_DONT_BLOCK); | 2123 | XBF_LOCK | XBF_DONT_BLOCK); |
2124 | ASSERT(bp); | 2124 | ASSERT(!xfs_buf_geterror(bp)); |
2125 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
2126 | 2125 | ||
2127 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : | 2126 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : |
2128 | XFS_BUF_SIZE(bp); | 2127 | XFS_BUF_SIZE(bp); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index c51a3f903633..452a291383ab 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -414,7 +414,7 @@ xfs_bmap_add_attrfork_local( | |||
414 | 414 | ||
415 | if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) | 415 | if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) |
416 | return 0; | 416 | return 0; |
417 | if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { | 417 | if (S_ISDIR(ip->i_d.di_mode)) { |
418 | mp = ip->i_mount; | 418 | mp = ip->i_mount; |
419 | memset(&dargs, 0, sizeof(dargs)); | 419 | memset(&dargs, 0, sizeof(dargs)); |
420 | dargs.dp = ip; | 420 | dargs.dp = ip; |
@@ -3344,8 +3344,7 @@ xfs_bmap_local_to_extents( | |||
3344 | * We don't want to deal with the case of keeping inode data inline yet. | 3344 | * We don't want to deal with the case of keeping inode data inline yet. |
3345 | * So sending the data fork of a regular inode is invalid. | 3345 | * So sending the data fork of a regular inode is invalid. |
3346 | */ | 3346 | */ |
3347 | ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG && | 3347 | ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); |
3348 | whichfork == XFS_DATA_FORK)); | ||
3349 | ifp = XFS_IFORK_PTR(ip, whichfork); | 3348 | ifp = XFS_IFORK_PTR(ip, whichfork); |
3350 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); | 3349 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); |
3351 | flags = 0; | 3350 | flags = 0; |
@@ -3384,8 +3383,7 @@ xfs_bmap_local_to_extents( | |||
3384 | ASSERT(args.len == 1); | 3383 | ASSERT(args.len == 1); |
3385 | *firstblock = args.fsbno; | 3384 | *firstblock = args.fsbno; |
3386 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); | 3385 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); |
3387 | memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data, | 3386 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); |
3388 | ifp->if_bytes); | ||
3389 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); | 3387 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); |
3390 | xfs_bmap_forkoff_reset(args.mp, ip, whichfork); | 3388 | xfs_bmap_forkoff_reset(args.mp, ip, whichfork); |
3391 | xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); | 3389 | xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); |
@@ -4052,7 +4050,7 @@ xfs_bmap_one_block( | |||
4052 | 4050 | ||
4053 | #ifndef DEBUG | 4051 | #ifndef DEBUG |
4054 | if (whichfork == XFS_DATA_FORK) { | 4052 | if (whichfork == XFS_DATA_FORK) { |
4055 | return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ? | 4053 | return S_ISREG(ip->i_d.di_mode) ? |
4056 | (ip->i_size == ip->i_mount->m_sb.sb_blocksize) : | 4054 | (ip->i_size == ip->i_mount->m_sb.sb_blocksize) : |
4057 | (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); | 4055 | (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); |
4058 | } | 4056 | } |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index cabf4b5604aa..2b9fd385e27d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -275,8 +275,7 @@ xfs_btree_dup_cursor( | |||
275 | return error; | 275 | return error; |
276 | } | 276 | } |
277 | new->bc_bufs[i] = bp; | 277 | new->bc_bufs[i] = bp; |
278 | ASSERT(bp); | 278 | ASSERT(!xfs_buf_geterror(bp)); |
279 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
280 | } else | 279 | } else |
281 | new->bc_bufs[i] = NULL; | 280 | new->bc_bufs[i] = NULL; |
282 | } | 281 | } |
@@ -467,8 +466,7 @@ xfs_btree_get_bufl( | |||
467 | ASSERT(fsbno != NULLFSBLOCK); | 466 | ASSERT(fsbno != NULLFSBLOCK); |
468 | d = XFS_FSB_TO_DADDR(mp, fsbno); | 467 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
469 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); | 468 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); |
470 | ASSERT(bp); | 469 | ASSERT(!xfs_buf_geterror(bp)); |
471 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
472 | return bp; | 470 | return bp; |
473 | } | 471 | } |
474 | 472 | ||
@@ -491,8 +489,7 @@ xfs_btree_get_bufs( | |||
491 | ASSERT(agbno != NULLAGBLOCK); | 489 | ASSERT(agbno != NULLAGBLOCK); |
492 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | 490 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); |
493 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); | 491 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); |
494 | ASSERT(bp); | 492 | ASSERT(!xfs_buf_geterror(bp)); |
495 | ASSERT(!XFS_BUF_GETERROR(bp)); | ||
496 | return bp; | 493 | return bp; |
497 | } | 494 | } |
498 | 495 | ||
@@ -632,7 +629,7 @@ xfs_btree_read_bufl( | |||
632 | mp->m_bsize, lock, &bp))) { | 629 | mp->m_bsize, lock, &bp))) { |
633 | return error; | 630 | return error; |
634 | } | 631 | } |
635 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | 632 | ASSERT(!xfs_buf_geterror(bp)); |
636 | if (bp) | 633 | if (bp) |
637 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | 634 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); |
638 | *bpp = bp; | 635 | *bpp = bp; |
@@ -973,8 +970,7 @@ xfs_btree_get_buf_block( | |||
973 | *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, | 970 | *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, |
974 | mp->m_bsize, flags); | 971 | mp->m_bsize, flags); |
975 | 972 | ||
976 | ASSERT(*bpp); | 973 | ASSERT(!xfs_buf_geterror(*bpp)); |
977 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | ||
978 | 974 | ||
979 | *block = XFS_BUF_TO_BLOCK(*bpp); | 975 | *block = XFS_BUF_TO_BLOCK(*bpp); |
980 | return 0; | 976 | return 0; |
@@ -1006,8 +1002,7 @@ xfs_btree_read_buf_block( | |||
1006 | if (error) | 1002 | if (error) |
1007 | return error; | 1003 | return error; |
1008 | 1004 | ||
1009 | ASSERT(*bpp != NULL); | 1005 | ASSERT(!xfs_buf_geterror(*bpp)); |
1010 | ASSERT(!XFS_BUF_GETERROR(*bpp)); | ||
1011 | 1006 | ||
1012 | xfs_btree_set_refs(cur, *bpp); | 1007 | xfs_btree_set_refs(cur, *bpp); |
1013 | *block = XFS_BUF_TO_BLOCK(*bpp); | 1008 | *block = XFS_BUF_TO_BLOCK(*bpp); |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 8d05a6a46ce3..5b240de104c0 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -262,7 +262,7 @@ typedef struct xfs_btree_cur | |||
262 | /* | 262 | /* |
263 | * Convert from buffer to btree block header. | 263 | * Convert from buffer to btree block header. |
264 | */ | 264 | */ |
265 | #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp)) | 265 | #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) |
266 | 266 | ||
267 | 267 | ||
268 | /* | 268 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/xfs_buf.c index b2b411985591..c57836dc778f 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -596,7 +596,7 @@ _xfs_buf_read( | |||
596 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); | 596 | bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); |
597 | 597 | ||
598 | status = xfs_buf_iorequest(bp); | 598 | status = xfs_buf_iorequest(bp); |
599 | if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) | 599 | if (status || bp->b_error || (flags & XBF_ASYNC)) |
600 | return status; | 600 | return status; |
601 | return xfs_buf_iowait(bp); | 601 | return xfs_buf_iowait(bp); |
602 | } | 602 | } |
@@ -679,7 +679,6 @@ xfs_buf_read_uncached( | |||
679 | /* set up the buffer for a read IO */ | 679 | /* set up the buffer for a read IO */ |
680 | XFS_BUF_SET_ADDR(bp, daddr); | 680 | XFS_BUF_SET_ADDR(bp, daddr); |
681 | XFS_BUF_READ(bp); | 681 | XFS_BUF_READ(bp); |
682 | XFS_BUF_BUSY(bp); | ||
683 | 682 | ||
684 | xfsbdstrat(mp, bp); | 683 | xfsbdstrat(mp, bp); |
685 | error = xfs_buf_iowait(bp); | 684 | error = xfs_buf_iowait(bp); |
@@ -1069,7 +1068,7 @@ xfs_bioerror( | |||
1069 | /* | 1068 | /* |
1070 | * No need to wait until the buffer is unpinned, we aren't flushing it. | 1069 | * No need to wait until the buffer is unpinned, we aren't flushing it. |
1071 | */ | 1070 | */ |
1072 | XFS_BUF_ERROR(bp, EIO); | 1071 | xfs_buf_ioerror(bp, EIO); |
1073 | 1072 | ||
1074 | /* | 1073 | /* |
1075 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. | 1074 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. |
@@ -1094,7 +1093,7 @@ STATIC int | |||
1094 | xfs_bioerror_relse( | 1093 | xfs_bioerror_relse( |
1095 | struct xfs_buf *bp) | 1094 | struct xfs_buf *bp) |
1096 | { | 1095 | { |
1097 | int64_t fl = XFS_BUF_BFLAGS(bp); | 1096 | int64_t fl = bp->b_flags; |
1098 | /* | 1097 | /* |
1099 | * No need to wait until the buffer is unpinned. | 1098 | * No need to wait until the buffer is unpinned. |
1100 | * We aren't flushing it. | 1099 | * We aren't flushing it. |
@@ -1115,7 +1114,7 @@ xfs_bioerror_relse( | |||
1115 | * There's no reason to mark error for | 1114 | * There's no reason to mark error for |
1116 | * ASYNC buffers. | 1115 | * ASYNC buffers. |
1117 | */ | 1116 | */ |
1118 | XFS_BUF_ERROR(bp, EIO); | 1117 | xfs_buf_ioerror(bp, EIO); |
1119 | XFS_BUF_FINISH_IOWAIT(bp); | 1118 | XFS_BUF_FINISH_IOWAIT(bp); |
1120 | } else { | 1119 | } else { |
1121 | xfs_buf_relse(bp); | 1120 | xfs_buf_relse(bp); |
@@ -1224,6 +1223,9 @@ _xfs_buf_ioapply( | |||
1224 | rw = READ; | 1223 | rw = READ; |
1225 | } | 1224 | } |
1226 | 1225 | ||
1226 | /* we only use the buffer cache for meta-data */ | ||
1227 | rw |= REQ_META; | ||
1228 | |||
1227 | next_chunk: | 1229 | next_chunk: |
1228 | atomic_inc(&bp->b_io_remaining); | 1230 | atomic_inc(&bp->b_io_remaining); |
1229 | nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); | 1231 | nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); |
@@ -1321,7 +1323,7 @@ xfs_buf_offset( | |||
1321 | struct page *page; | 1323 | struct page *page; |
1322 | 1324 | ||
1323 | if (bp->b_flags & XBF_MAPPED) | 1325 | if (bp->b_flags & XBF_MAPPED) |
1324 | return XFS_BUF_PTR(bp) + offset; | 1326 | return bp->b_addr + offset; |
1325 | 1327 | ||
1326 | offset += bp->b_offset; | 1328 | offset += bp->b_offset; |
1327 | page = bp->b_pages[offset >> PAGE_SHIFT]; | 1329 | page = bp->b_pages[offset >> PAGE_SHIFT]; |
@@ -1481,7 +1483,7 @@ xfs_setsize_buftarg_flags( | |||
1481 | if (set_blocksize(btp->bt_bdev, sectorsize)) { | 1483 | if (set_blocksize(btp->bt_bdev, sectorsize)) { |
1482 | xfs_warn(btp->bt_mount, | 1484 | xfs_warn(btp->bt_mount, |
1483 | "Cannot set_blocksize to %u on device %s\n", | 1485 | "Cannot set_blocksize to %u on device %s\n", |
1484 | sectorsize, XFS_BUFTARG_NAME(btp)); | 1486 | sectorsize, xfs_buf_target_name(btp)); |
1485 | return EINVAL; | 1487 | return EINVAL; |
1486 | } | 1488 | } |
1487 | 1489 | ||
@@ -1678,7 +1680,7 @@ xfs_buf_delwri_split( | |||
1678 | list_for_each_entry_safe(bp, n, dwq, b_list) { | 1680 | list_for_each_entry_safe(bp, n, dwq, b_list) { |
1679 | ASSERT(bp->b_flags & XBF_DELWRI); | 1681 | ASSERT(bp->b_flags & XBF_DELWRI); |
1680 | 1682 | ||
1681 | if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) { | 1683 | if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { |
1682 | if (!force && | 1684 | if (!force && |
1683 | time_before(jiffies, bp->b_queuetime + age)) { | 1685 | time_before(jiffies, bp->b_queuetime + age)) { |
1684 | xfs_buf_unlock(bp); | 1686 | xfs_buf_unlock(bp); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/xfs_buf.h index 6a83b46b4bcf..620972b8094d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -228,11 +228,15 @@ extern void xfs_buf_delwri_promote(xfs_buf_t *); | |||
228 | extern int xfs_buf_init(void); | 228 | extern int xfs_buf_init(void); |
229 | extern void xfs_buf_terminate(void); | 229 | extern void xfs_buf_terminate(void); |
230 | 230 | ||
231 | #define xfs_buf_target_name(target) \ | 231 | static inline const char * |
232 | ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) | 232 | xfs_buf_target_name(struct xfs_buftarg *target) |
233 | { | ||
234 | static char __b[BDEVNAME_SIZE]; | ||
235 | |||
236 | return bdevname(target->bt_bdev, __b); | ||
237 | } | ||
233 | 238 | ||
234 | 239 | ||
235 | #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) | ||
236 | #define XFS_BUF_ZEROFLAGS(bp) \ | 240 | #define XFS_BUF_ZEROFLAGS(bp) \ |
237 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ | 241 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ |
238 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) | 242 | XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) |
@@ -251,23 +255,14 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
251 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) | 255 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) |
252 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | 256 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) |
253 | 257 | ||
254 | #define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no) | ||
255 | #define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp) | ||
256 | #define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0) | ||
257 | |||
258 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) | 258 | #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) |
259 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) | 259 | #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) |
260 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) | 260 | #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) |
261 | 261 | ||
262 | #define XFS_BUF_BUSY(bp) do { } while (0) | ||
263 | #define XFS_BUF_UNBUSY(bp) do { } while (0) | ||
264 | #define XFS_BUF_ISBUSY(bp) (1) | ||
265 | |||
266 | #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) | 262 | #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) |
267 | #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) | 263 | #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) |
268 | #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) | 264 | #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) |
269 | 265 | ||
270 | #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) | ||
271 | #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) | 266 | #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) |
272 | #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) | 267 | #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) |
273 | #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) | 268 | #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) |
@@ -276,10 +271,6 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
276 | #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) | 271 | #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) |
277 | #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) | 272 | #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) |
278 | 273 | ||
279 | #define XFS_BUF_SET_START(bp) do { } while (0) | ||
280 | |||
281 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | ||
282 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | ||
283 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) | 274 | #define XFS_BUF_ADDR(bp) ((bp)->b_bn) |
284 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) | 275 | #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) |
285 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) | 276 | #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) |
@@ -299,14 +290,13 @@ xfs_buf_set_ref( | |||
299 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) | 290 | #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) |
300 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) | 291 | #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) |
301 | 292 | ||
302 | #define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) | 293 | static inline int xfs_buf_ispinned(struct xfs_buf *bp) |
294 | { | ||
295 | return atomic_read(&bp->b_pin_count); | ||
296 | } | ||
303 | 297 | ||
304 | #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); | 298 | #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); |
305 | 299 | ||
306 | #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) | ||
307 | #define XFS_BUF_TARGET(bp) ((bp)->b_target) | ||
308 | #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) | ||
309 | |||
310 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 300 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
311 | { | 301 | { |
312 | xfs_buf_unlock(bp); | 302 | xfs_buf_unlock(bp); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 88492916c3dc..cac2ecfa6746 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -124,9 +124,9 @@ xfs_buf_item_log_check( | |||
124 | 124 | ||
125 | bp = bip->bli_buf; | 125 | bp = bip->bli_buf; |
126 | ASSERT(XFS_BUF_COUNT(bp) > 0); | 126 | ASSERT(XFS_BUF_COUNT(bp) > 0); |
127 | ASSERT(XFS_BUF_PTR(bp) != NULL); | 127 | ASSERT(bp->b_addr != NULL); |
128 | orig = bip->bli_orig; | 128 | orig = bip->bli_orig; |
129 | buffer = XFS_BUF_PTR(bp); | 129 | buffer = bp->b_addr; |
130 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { | 130 | for (x = 0; x < XFS_BUF_COUNT(bp); x++) { |
131 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { | 131 | if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { |
132 | xfs_emerg(bp->b_mount, | 132 | xfs_emerg(bp->b_mount, |
@@ -371,7 +371,6 @@ xfs_buf_item_pin( | |||
371 | { | 371 | { |
372 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 372 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
373 | 373 | ||
374 | ASSERT(XFS_BUF_ISBUSY(bip->bli_buf)); | ||
375 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 374 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
376 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 375 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
377 | (bip->bli_flags & XFS_BLI_STALE)); | 376 | (bip->bli_flags & XFS_BLI_STALE)); |
@@ -479,13 +478,13 @@ xfs_buf_item_trylock( | |||
479 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 478 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
480 | struct xfs_buf *bp = bip->bli_buf; | 479 | struct xfs_buf *bp = bip->bli_buf; |
481 | 480 | ||
482 | if (XFS_BUF_ISPINNED(bp)) | 481 | if (xfs_buf_ispinned(bp)) |
483 | return XFS_ITEM_PINNED; | 482 | return XFS_ITEM_PINNED; |
484 | if (!xfs_buf_trylock(bp)) | 483 | if (!xfs_buf_trylock(bp)) |
485 | return XFS_ITEM_LOCKED; | 484 | return XFS_ITEM_LOCKED; |
486 | 485 | ||
487 | /* take a reference to the buffer. */ | 486 | /* take a reference to the buffer. */ |
488 | XFS_BUF_HOLD(bp); | 487 | xfs_buf_hold(bp); |
489 | 488 | ||
490 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 489 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
491 | trace_xfs_buf_item_trylock(bip); | 490 | trace_xfs_buf_item_trylock(bip); |
@@ -726,7 +725,7 @@ xfs_buf_item_init( | |||
726 | * to have logged. | 725 | * to have logged. |
727 | */ | 726 | */ |
728 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); | 727 | bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); |
729 | memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp)); | 728 | memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp)); |
730 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); | 729 | bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); |
731 | #endif | 730 | #endif |
732 | 731 | ||
@@ -895,7 +894,6 @@ xfs_buf_attach_iodone( | |||
895 | { | 894 | { |
896 | xfs_log_item_t *head_lip; | 895 | xfs_log_item_t *head_lip; |
897 | 896 | ||
898 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
899 | ASSERT(xfs_buf_islocked(bp)); | 897 | ASSERT(xfs_buf_islocked(bp)); |
900 | 898 | ||
901 | lip->li_cb = cb; | 899 | lip->li_cb = cb; |
@@ -960,7 +958,7 @@ xfs_buf_iodone_callbacks( | |||
960 | static ulong lasttime; | 958 | static ulong lasttime; |
961 | static xfs_buftarg_t *lasttarg; | 959 | static xfs_buftarg_t *lasttarg; |
962 | 960 | ||
963 | if (likely(!XFS_BUF_GETERROR(bp))) | 961 | if (likely(!xfs_buf_geterror(bp))) |
964 | goto do_callbacks; | 962 | goto do_callbacks; |
965 | 963 | ||
966 | /* | 964 | /* |
@@ -973,14 +971,14 @@ xfs_buf_iodone_callbacks( | |||
973 | goto do_callbacks; | 971 | goto do_callbacks; |
974 | } | 972 | } |
975 | 973 | ||
976 | if (XFS_BUF_TARGET(bp) != lasttarg || | 974 | if (bp->b_target != lasttarg || |
977 | time_after(jiffies, (lasttime + 5*HZ))) { | 975 | time_after(jiffies, (lasttime + 5*HZ))) { |
978 | lasttime = jiffies; | 976 | lasttime = jiffies; |
979 | xfs_alert(mp, "Device %s: metadata write error block 0x%llx", | 977 | xfs_alert(mp, "Device %s: metadata write error block 0x%llx", |
980 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 978 | xfs_buf_target_name(bp->b_target), |
981 | (__uint64_t)XFS_BUF_ADDR(bp)); | 979 | (__uint64_t)XFS_BUF_ADDR(bp)); |
982 | } | 980 | } |
983 | lasttarg = XFS_BUF_TARGET(bp); | 981 | lasttarg = bp->b_target; |
984 | 982 | ||
985 | /* | 983 | /* |
986 | * If the write was asynchronous then no one will be looking for the | 984 | * If the write was asynchronous then no one will be looking for the |
@@ -991,12 +989,11 @@ xfs_buf_iodone_callbacks( | |||
991 | * around. | 989 | * around. |
992 | */ | 990 | */ |
993 | if (XFS_BUF_ISASYNC(bp)) { | 991 | if (XFS_BUF_ISASYNC(bp)) { |
994 | XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ | 992 | xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ |
995 | 993 | ||
996 | if (!XFS_BUF_ISSTALE(bp)) { | 994 | if (!XFS_BUF_ISSTALE(bp)) { |
997 | XFS_BUF_DELAYWRITE(bp); | 995 | XFS_BUF_DELAYWRITE(bp); |
998 | XFS_BUF_DONE(bp); | 996 | XFS_BUF_DONE(bp); |
999 | XFS_BUF_SET_START(bp); | ||
1000 | } | 997 | } |
1001 | ASSERT(bp->b_iodone != NULL); | 998 | ASSERT(bp->b_iodone != NULL); |
1002 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | 999 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); |
@@ -1013,7 +1010,6 @@ xfs_buf_iodone_callbacks( | |||
1013 | XFS_BUF_UNDELAYWRITE(bp); | 1010 | XFS_BUF_UNDELAYWRITE(bp); |
1014 | 1011 | ||
1015 | trace_xfs_buf_error_relse(bp, _RET_IP_); | 1012 | trace_xfs_buf_error_relse(bp, _RET_IP_); |
1016 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1017 | 1013 | ||
1018 | do_callbacks: | 1014 | do_callbacks: |
1019 | xfs_buf_do_callbacks(bp); | 1015 | xfs_buf_do_callbacks(bp); |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 2925726529f8..ee9d5427fcd4 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -692,6 +692,24 @@ xfs_da_join(xfs_da_state_t *state) | |||
692 | return(error); | 692 | return(error); |
693 | } | 693 | } |
694 | 694 | ||
695 | #ifdef DEBUG | ||
696 | static void | ||
697 | xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) | ||
698 | { | ||
699 | __be16 magic = blkinfo->magic; | ||
700 | |||
701 | if (level == 1) { | ||
702 | ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | ||
703 | magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
704 | } else | ||
705 | ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); | ||
706 | ASSERT(!blkinfo->forw); | ||
707 | ASSERT(!blkinfo->back); | ||
708 | } | ||
709 | #else /* !DEBUG */ | ||
710 | #define xfs_da_blkinfo_onlychild_validate(blkinfo, level) | ||
711 | #endif /* !DEBUG */ | ||
712 | |||
695 | /* | 713 | /* |
696 | * We have only one entry in the root. Copy the only remaining child of | 714 | * We have only one entry in the root. Copy the only remaining child of |
697 | * the old root to block 0 as the new root node. | 715 | * the old root to block 0 as the new root node. |
@@ -700,8 +718,6 @@ STATIC int | |||
700 | xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) | 718 | xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) |
701 | { | 719 | { |
702 | xfs_da_intnode_t *oldroot; | 720 | xfs_da_intnode_t *oldroot; |
703 | /* REFERENCED */ | ||
704 | xfs_da_blkinfo_t *blkinfo; | ||
705 | xfs_da_args_t *args; | 721 | xfs_da_args_t *args; |
706 | xfs_dablk_t child; | 722 | xfs_dablk_t child; |
707 | xfs_dabuf_t *bp; | 723 | xfs_dabuf_t *bp; |
@@ -732,15 +748,9 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) | |||
732 | if (error) | 748 | if (error) |
733 | return(error); | 749 | return(error); |
734 | ASSERT(bp != NULL); | 750 | ASSERT(bp != NULL); |
735 | blkinfo = bp->data; | 751 | xfs_da_blkinfo_onlychild_validate(bp->data, |
736 | if (be16_to_cpu(oldroot->hdr.level) == 1) { | 752 | be16_to_cpu(oldroot->hdr.level)); |
737 | ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || | 753 | |
738 | blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
739 | } else { | ||
740 | ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); | ||
741 | } | ||
742 | ASSERT(!blkinfo->forw); | ||
743 | ASSERT(!blkinfo->back); | ||
744 | memcpy(root_blk->bp->data, bp->data, state->blocksize); | 754 | memcpy(root_blk->bp->data, bp->data, state->blocksize); |
745 | xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); | 755 | xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); |
746 | error = xfs_da_shrink_inode(args, child, bp); | 756 | error = xfs_da_shrink_inode(args, child, bp); |
@@ -2040,7 +2050,7 @@ xfs_da_do_buf( | |||
2040 | case 0: | 2050 | case 0: |
2041 | bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, | 2051 | bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, |
2042 | mappedbno, nmapped, 0); | 2052 | mappedbno, nmapped, 0); |
2043 | error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO); | 2053 | error = bp ? bp->b_error : XFS_ERROR(EIO); |
2044 | break; | 2054 | break; |
2045 | case 1: | 2055 | case 1: |
2046 | case 2: | 2056 | case 2: |
@@ -2258,7 +2268,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) | |||
2258 | dabuf->nbuf = 1; | 2268 | dabuf->nbuf = 1; |
2259 | bp = bps[0]; | 2269 | bp = bps[0]; |
2260 | dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); | 2270 | dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); |
2261 | dabuf->data = XFS_BUF_PTR(bp); | 2271 | dabuf->data = bp->b_addr; |
2262 | dabuf->bps[0] = bp; | 2272 | dabuf->bps[0] = bp; |
2263 | } else { | 2273 | } else { |
2264 | dabuf->nbuf = nbuf; | 2274 | dabuf->nbuf = nbuf; |
@@ -2269,7 +2279,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) | |||
2269 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); | 2279 | dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); |
2270 | for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { | 2280 | for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { |
2271 | bp = bps[i]; | 2281 | bp = bps[i]; |
2272 | memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp), | 2282 | memcpy((char *)dabuf->data + off, bp->b_addr, |
2273 | XFS_BUF_COUNT(bp)); | 2283 | XFS_BUF_COUNT(bp)); |
2274 | } | 2284 | } |
2275 | } | 2285 | } |
@@ -2292,8 +2302,8 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf) | |||
2292 | for (i = off = 0; i < dabuf->nbuf; | 2302 | for (i = off = 0; i < dabuf->nbuf; |
2293 | i++, off += XFS_BUF_COUNT(bp)) { | 2303 | i++, off += XFS_BUF_COUNT(bp)) { |
2294 | bp = dabuf->bps[i]; | 2304 | bp = dabuf->bps[i]; |
2295 | memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off, | 2305 | memcpy(bp->b_addr, dabuf->data + off, |
2296 | XFS_BUF_COUNT(bp)); | 2306 | XFS_BUF_COUNT(bp)); |
2297 | } | 2307 | } |
2298 | } | 2308 | } |
2299 | } | 2309 | } |
@@ -2330,7 +2340,7 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) | |||
2330 | 2340 | ||
2331 | ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); | 2341 | ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); |
2332 | if (dabuf->nbuf == 1) { | 2342 | if (dabuf->nbuf == 1) { |
2333 | ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0])); | 2343 | ASSERT(dabuf->data == dabuf->bps[0]->b_addr); |
2334 | xfs_trans_log_buf(tp, dabuf->bps[0], first, last); | 2344 | xfs_trans_log_buf(tp, dabuf->bps[0], first, last); |
2335 | return; | 2345 | return; |
2336 | } | 2346 | } |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index dffba9ba0db6..a3721633abc8 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt { | |||
148 | be32_to_cpu((dip)->di_nextents) : \ | 148 | be32_to_cpu((dip)->di_nextents) : \ |
149 | be16_to_cpu((dip)->di_anextents)) | 149 | be16_to_cpu((dip)->di_anextents)) |
150 | 150 | ||
151 | #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) | 151 | #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr)) |
152 | 152 | ||
153 | /* | 153 | /* |
154 | * For block and character special files the 32bit dev_t is stored at the | 154 | * For block and character special files the 32bit dev_t is stored at the |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 4580ce00aeb4..a2e27010c7fb 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -121,7 +121,7 @@ xfs_dir_isempty( | |||
121 | { | 121 | { |
122 | xfs_dir2_sf_hdr_t *sfp; | 122 | xfs_dir2_sf_hdr_t *sfp; |
123 | 123 | ||
124 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 124 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
125 | if (dp->i_d.di_size == 0) /* might happen during shutdown. */ | 125 | if (dp->i_d.di_size == 0) /* might happen during shutdown. */ |
126 | return 1; | 126 | return 1; |
127 | if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) | 127 | if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) |
@@ -179,7 +179,7 @@ xfs_dir_init( | |||
179 | memset((char *)&args, 0, sizeof(args)); | 179 | memset((char *)&args, 0, sizeof(args)); |
180 | args.dp = dp; | 180 | args.dp = dp; |
181 | args.trans = tp; | 181 | args.trans = tp; |
182 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 182 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
183 | if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) | 183 | if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) |
184 | return error; | 184 | return error; |
185 | return xfs_dir2_sf_create(&args, pdp->i_ino); | 185 | return xfs_dir2_sf_create(&args, pdp->i_ino); |
@@ -202,7 +202,7 @@ xfs_dir_createname( | |||
202 | int rval; | 202 | int rval; |
203 | int v; /* type-checking value */ | 203 | int v; /* type-checking value */ |
204 | 204 | ||
205 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 205 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
206 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 206 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) |
207 | return rval; | 207 | return rval; |
208 | XFS_STATS_INC(xs_dir_create); | 208 | XFS_STATS_INC(xs_dir_create); |
@@ -278,7 +278,7 @@ xfs_dir_lookup( | |||
278 | int rval; | 278 | int rval; |
279 | int v; /* type-checking value */ | 279 | int v; /* type-checking value */ |
280 | 280 | ||
281 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 281 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
282 | XFS_STATS_INC(xs_dir_lookup); | 282 | XFS_STATS_INC(xs_dir_lookup); |
283 | 283 | ||
284 | memset(&args, 0, sizeof(xfs_da_args_t)); | 284 | memset(&args, 0, sizeof(xfs_da_args_t)); |
@@ -333,7 +333,7 @@ xfs_dir_removename( | |||
333 | int rval; | 333 | int rval; |
334 | int v; /* type-checking value */ | 334 | int v; /* type-checking value */ |
335 | 335 | ||
336 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 336 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
337 | XFS_STATS_INC(xs_dir_remove); | 337 | XFS_STATS_INC(xs_dir_remove); |
338 | 338 | ||
339 | memset(&args, 0, sizeof(xfs_da_args_t)); | 339 | memset(&args, 0, sizeof(xfs_da_args_t)); |
@@ -382,7 +382,7 @@ xfs_readdir( | |||
382 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | 382 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
383 | return XFS_ERROR(EIO); | 383 | return XFS_ERROR(EIO); |
384 | 384 | ||
385 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 385 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
386 | XFS_STATS_INC(xs_dir_getdents); | 386 | XFS_STATS_INC(xs_dir_getdents); |
387 | 387 | ||
388 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 388 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) |
@@ -414,7 +414,7 @@ xfs_dir_replace( | |||
414 | int rval; | 414 | int rval; |
415 | int v; /* type-checking value */ | 415 | int v; /* type-checking value */ |
416 | 416 | ||
417 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 417 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
418 | 418 | ||
419 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 419 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) |
420 | return rval; | 420 | return rval; |
@@ -464,7 +464,7 @@ xfs_dir_canenter( | |||
464 | if (resblks) | 464 | if (resblks) |
465 | return 0; | 465 | return 0; |
466 | 466 | ||
467 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 467 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
468 | 468 | ||
469 | memset(&args, 0, sizeof(xfs_da_args_t)); | 469 | memset(&args, 0, sizeof(xfs_da_args_t)); |
470 | args.name = name->name; | 470 | args.name = name->name; |
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/xfs_discard.c index 244e797dae32..244e797dae32 100644 --- a/fs/xfs/linux-2.6/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/xfs_discard.h index 344879aea646..344879aea646 100644 --- a/fs/xfs/linux-2.6/xfs_discard.h +++ b/fs/xfs/xfs_discard.h | |||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 837f31158d43..db62959bed13 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -318,10 +318,9 @@ xfs_qm_init_dquot_blk( | |||
318 | int curid, i; | 318 | int curid, i; |
319 | 319 | ||
320 | ASSERT(tp); | 320 | ASSERT(tp); |
321 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
322 | ASSERT(xfs_buf_islocked(bp)); | 321 | ASSERT(xfs_buf_islocked(bp)); |
323 | 322 | ||
324 | d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); | 323 | d = bp->b_addr; |
325 | 324 | ||
326 | /* | 325 | /* |
327 | * ID of the first dquot in the block - id's are zero based. | 326 | * ID of the first dquot in the block - id's are zero based. |
@@ -403,7 +402,7 @@ xfs_qm_dqalloc( | |||
403 | dqp->q_blkno, | 402 | dqp->q_blkno, |
404 | mp->m_quotainfo->qi_dqchunklen, | 403 | mp->m_quotainfo->qi_dqchunklen, |
405 | 0); | 404 | 0); |
406 | if (!bp || (error = XFS_BUF_GETERROR(bp))) | 405 | if (!bp || (error = xfs_buf_geterror(bp))) |
407 | goto error1; | 406 | goto error1; |
408 | /* | 407 | /* |
409 | * Make a chunk of dquots out of this buffer and log | 408 | * Make a chunk of dquots out of this buffer and log |
@@ -534,13 +533,12 @@ xfs_qm_dqtobp( | |||
534 | return XFS_ERROR(error); | 533 | return XFS_ERROR(error); |
535 | } | 534 | } |
536 | 535 | ||
537 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
538 | ASSERT(xfs_buf_islocked(bp)); | 536 | ASSERT(xfs_buf_islocked(bp)); |
539 | 537 | ||
540 | /* | 538 | /* |
541 | * calculate the location of the dquot inside the buffer. | 539 | * calculate the location of the dquot inside the buffer. |
542 | */ | 540 | */ |
543 | ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); | 541 | ddq = bp->b_addr + dqp->q_bufoffset; |
544 | 542 | ||
545 | /* | 543 | /* |
546 | * A simple sanity check in case we got a corrupted dquot... | 544 | * A simple sanity check in case we got a corrupted dquot... |
@@ -553,7 +551,6 @@ xfs_qm_dqtobp( | |||
553 | xfs_trans_brelse(tp, bp); | 551 | xfs_trans_brelse(tp, bp); |
554 | return XFS_ERROR(EIO); | 552 | return XFS_ERROR(EIO); |
555 | } | 553 | } |
556 | XFS_BUF_BUSY(bp); /* We dirtied this */ | ||
557 | } | 554 | } |
558 | 555 | ||
559 | *O_bpp = bp; | 556 | *O_bpp = bp; |
@@ -622,7 +619,6 @@ xfs_qm_dqread( | |||
622 | * this particular dquot was repaired. We still aren't afraid to | 619 | * this particular dquot was repaired. We still aren't afraid to |
623 | * brelse it because we have the changes incore. | 620 | * brelse it because we have the changes incore. |
624 | */ | 621 | */ |
625 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
626 | ASSERT(xfs_buf_islocked(bp)); | 622 | ASSERT(xfs_buf_islocked(bp)); |
627 | xfs_trans_brelse(tp, bp); | 623 | xfs_trans_brelse(tp, bp); |
628 | 624 | ||
@@ -1204,7 +1200,7 @@ xfs_qm_dqflush( | |||
1204 | /* | 1200 | /* |
1205 | * Calculate the location of the dquot inside the buffer. | 1201 | * Calculate the location of the dquot inside the buffer. |
1206 | */ | 1202 | */ |
1207 | ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); | 1203 | ddqp = bp->b_addr + dqp->q_bufoffset; |
1208 | 1204 | ||
1209 | /* | 1205 | /* |
1210 | * A simple sanity check in case we got a corrupted dquot.. | 1206 | * A simple sanity check in case we got a corrupted dquot.. |
@@ -1240,7 +1236,7 @@ xfs_qm_dqflush( | |||
1240 | * If the buffer is pinned then push on the log so we won't | 1236 | * If the buffer is pinned then push on the log so we won't |
1241 | * get stuck waiting in the write for too long. | 1237 | * get stuck waiting in the write for too long. |
1242 | */ | 1238 | */ |
1243 | if (XFS_BUF_ISPINNED(bp)) { | 1239 | if (xfs_buf_ispinned(bp)) { |
1244 | trace_xfs_dqflush_force(dqp); | 1240 | trace_xfs_dqflush_force(dqp); |
1245 | xfs_log_force(mp, 0); | 1241 | xfs_log_force(mp, 0); |
1246 | } | 1242 | } |
@@ -1447,7 +1443,7 @@ xfs_qm_dqflock_pushbuf_wait( | |||
1447 | goto out_lock; | 1443 | goto out_lock; |
1448 | 1444 | ||
1449 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 1445 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
1450 | if (XFS_BUF_ISPINNED(bp)) | 1446 | if (xfs_buf_ispinned(bp)) |
1451 | xfs_log_force(mp, 0); | 1447 | xfs_log_force(mp, 0); |
1452 | xfs_buf_delwri_promote(bp); | 1448 | xfs_buf_delwri_promote(bp); |
1453 | wake_up_process(bp->b_target->bt_task); | 1449 | wake_up_process(bp->b_target->bt_task); |
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 34b7e945dbfa..34b7e945dbfa 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 9e0e2fa3f2c8..9e0e2fa3f2c8 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 5acae2ada70b..5acae2ada70b 100644 --- a/fs/xfs/quota/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/xfs_export.c index 75e5d322e48f..75e5d322e48f 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/xfs_export.h index 3272b6ae7a35..3272b6ae7a35 100644 --- a/fs/xfs/linux-2.6/xfs_export.h +++ b/fs/xfs/xfs_export.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/xfs_file.c index 825390e1c138..7f7b42469ea7 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -149,7 +149,9 @@ xfs_file_fsync( | |||
149 | 149 | ||
150 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 150 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
151 | 151 | ||
152 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
152 | xfs_ioend_wait(ip); | 153 | xfs_ioend_wait(ip); |
154 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
153 | 155 | ||
154 | if (mp->m_flags & XFS_MOUNT_BARRIER) { | 156 | if (mp->m_flags & XFS_MOUNT_BARRIER) { |
155 | /* | 157 | /* |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 9124425b7f2f..3ff3d9e23ded 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -344,9 +344,9 @@ _xfs_filestream_update_ag( | |||
344 | * Either ip is a regular file and pip is a directory, or ip is a | 344 | * Either ip is a regular file and pip is a directory, or ip is a |
345 | * directory and pip is NULL. | 345 | * directory and pip is NULL. |
346 | */ | 346 | */ |
347 | ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip && | 347 | ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && |
348 | (pip->i_d.di_mode & S_IFDIR)) || | 348 | S_ISDIR(pip->i_d.di_mode)) || |
349 | ((ip->i_d.di_mode & S_IFDIR) && !pip))); | 349 | (S_ISDIR(ip->i_d.di_mode) && !pip))); |
350 | 350 | ||
351 | mp = ip->i_mount; | 351 | mp = ip->i_mount; |
352 | cache = mp->m_filestream; | 352 | cache = mp->m_filestream; |
@@ -537,7 +537,7 @@ xfs_filestream_lookup_ag( | |||
537 | xfs_agnumber_t ag; | 537 | xfs_agnumber_t ag; |
538 | int ref; | 538 | int ref; |
539 | 539 | ||
540 | if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) { | 540 | if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { |
541 | ASSERT(0); | 541 | ASSERT(0); |
542 | return NULLAGNUMBER; | 542 | return NULLAGNUMBER; |
543 | } | 543 | } |
@@ -579,9 +579,9 @@ xfs_filestream_associate( | |||
579 | xfs_agnumber_t ag, rotorstep, startag; | 579 | xfs_agnumber_t ag, rotorstep, startag; |
580 | int err = 0; | 580 | int err = 0; |
581 | 581 | ||
582 | ASSERT(pip->i_d.di_mode & S_IFDIR); | 582 | ASSERT(S_ISDIR(pip->i_d.di_mode)); |
583 | ASSERT(ip->i_d.di_mode & S_IFREG); | 583 | ASSERT(S_ISREG(ip->i_d.di_mode)); |
584 | if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG)) | 584 | if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) |
585 | return -EINVAL; | 585 | return -EINVAL; |
586 | 586 | ||
587 | mp = pip->i_mount; | 587 | mp = pip->i_mount; |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c index ed88ed16811c..ed88ed16811c 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/xfs_fs_subr.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/xfs_globals.c index 76e81cff70b9..76e81cff70b9 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/xfs_globals.c | |||
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index dd5628bd8d0b..9f24ec28283b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -202,8 +202,7 @@ xfs_ialloc_inode_init( | |||
202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | 202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, |
203 | mp->m_bsize * blks_per_cluster, | 203 | mp->m_bsize * blks_per_cluster, |
204 | XBF_LOCK); | 204 | XBF_LOCK); |
205 | ASSERT(fbuf); | 205 | ASSERT(!xfs_buf_geterror(fbuf)); |
206 | ASSERT(!XFS_BUF_GETERROR(fbuf)); | ||
207 | 206 | ||
208 | /* | 207 | /* |
209 | * Initialize all inodes in this buffer and then log them. | 208 | * Initialize all inodes in this buffer and then log them. |
@@ -1486,7 +1485,7 @@ xfs_read_agi( | |||
1486 | if (error) | 1485 | if (error) |
1487 | return error; | 1486 | return error; |
1488 | 1487 | ||
1489 | ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); | 1488 | ASSERT(!xfs_buf_geterror(*bpp)); |
1490 | agi = XFS_BUF_TO_AGI(*bpp); | 1489 | agi = XFS_BUF_TO_AGI(*bpp); |
1491 | 1490 | ||
1492 | /* | 1491 | /* |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3cc21ddf9f7e..0239a7c7c886 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -368,7 +368,7 @@ xfs_iformat( | |||
368 | /* | 368 | /* |
369 | * no local regular files yet | 369 | * no local regular files yet |
370 | */ | 370 | */ |
371 | if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { | 371 | if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { |
372 | xfs_warn(ip->i_mount, | 372 | xfs_warn(ip->i_mount, |
373 | "corrupt inode %Lu (local format for regular file).", | 373 | "corrupt inode %Lu (local format for regular file).", |
374 | (unsigned long long) ip->i_ino); | 374 | (unsigned long long) ip->i_ino); |
@@ -1040,7 +1040,7 @@ xfs_ialloc( | |||
1040 | 1040 | ||
1041 | if (pip && XFS_INHERIT_GID(pip)) { | 1041 | if (pip && XFS_INHERIT_GID(pip)) { |
1042 | ip->i_d.di_gid = pip->i_d.di_gid; | 1042 | ip->i_d.di_gid = pip->i_d.di_gid; |
1043 | if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { | 1043 | if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { |
1044 | ip->i_d.di_mode |= S_ISGID; | 1044 | ip->i_d.di_mode |= S_ISGID; |
1045 | } | 1045 | } |
1046 | } | 1046 | } |
@@ -1097,14 +1097,14 @@ xfs_ialloc( | |||
1097 | if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { | 1097 | if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { |
1098 | uint di_flags = 0; | 1098 | uint di_flags = 0; |
1099 | 1099 | ||
1100 | if ((mode & S_IFMT) == S_IFDIR) { | 1100 | if (S_ISDIR(mode)) { |
1101 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) | 1101 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
1102 | di_flags |= XFS_DIFLAG_RTINHERIT; | 1102 | di_flags |= XFS_DIFLAG_RTINHERIT; |
1103 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { | 1103 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
1104 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; | 1104 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; |
1105 | ip->i_d.di_extsize = pip->i_d.di_extsize; | 1105 | ip->i_d.di_extsize = pip->i_d.di_extsize; |
1106 | } | 1106 | } |
1107 | } else if ((mode & S_IFMT) == S_IFREG) { | 1107 | } else if (S_ISREG(mode)) { |
1108 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) | 1108 | if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
1109 | di_flags |= XFS_DIFLAG_REALTIME; | 1109 | di_flags |= XFS_DIFLAG_REALTIME; |
1110 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { | 1110 | if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
@@ -1188,7 +1188,7 @@ xfs_isize_check( | |||
1188 | int nimaps; | 1188 | int nimaps; |
1189 | xfs_bmbt_irec_t imaps[2]; | 1189 | xfs_bmbt_irec_t imaps[2]; |
1190 | 1190 | ||
1191 | if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) | 1191 | if (!S_ISREG(ip->i_d.di_mode)) |
1192 | return; | 1192 | return; |
1193 | 1193 | ||
1194 | if (XFS_IS_REALTIME_INODE(ip)) | 1194 | if (XFS_IS_REALTIME_INODE(ip)) |
@@ -1828,7 +1828,7 @@ xfs_ifree( | |||
1828 | ASSERT(ip->i_d.di_nextents == 0); | 1828 | ASSERT(ip->i_d.di_nextents == 0); |
1829 | ASSERT(ip->i_d.di_anextents == 0); | 1829 | ASSERT(ip->i_d.di_anextents == 0); |
1830 | ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || | 1830 | ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || |
1831 | ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); | 1831 | (!S_ISREG(ip->i_d.di_mode))); |
1832 | ASSERT(ip->i_d.di_nblocks == 0); | 1832 | ASSERT(ip->i_d.di_nblocks == 0); |
1833 | 1833 | ||
1834 | /* | 1834 | /* |
@@ -2473,7 +2473,7 @@ cluster_corrupt_out: | |||
2473 | if (bp->b_iodone) { | 2473 | if (bp->b_iodone) { |
2474 | XFS_BUF_UNDONE(bp); | 2474 | XFS_BUF_UNDONE(bp); |
2475 | XFS_BUF_STALE(bp); | 2475 | XFS_BUF_STALE(bp); |
2476 | XFS_BUF_ERROR(bp,EIO); | 2476 | xfs_buf_ioerror(bp, EIO); |
2477 | xfs_buf_ioend(bp, 0); | 2477 | xfs_buf_ioend(bp, 0); |
2478 | } else { | 2478 | } else { |
2479 | XFS_BUF_STALE(bp); | 2479 | XFS_BUF_STALE(bp); |
@@ -2585,7 +2585,7 @@ xfs_iflush( | |||
2585 | * If the buffer is pinned then push on the log now so we won't | 2585 | * If the buffer is pinned then push on the log now so we won't |
2586 | * get stuck waiting in the write for too long. | 2586 | * get stuck waiting in the write for too long. |
2587 | */ | 2587 | */ |
2588 | if (XFS_BUF_ISPINNED(bp)) | 2588 | if (xfs_buf_ispinned(bp)) |
2589 | xfs_log_force(mp, 0); | 2589 | xfs_log_force(mp, 0); |
2590 | 2590 | ||
2591 | /* | 2591 | /* |
@@ -2671,7 +2671,7 @@ xfs_iflush_int( | |||
2671 | __func__, ip->i_ino, ip, ip->i_d.di_magic); | 2671 | __func__, ip->i_ino, ip, ip->i_d.di_magic); |
2672 | goto corrupt_out; | 2672 | goto corrupt_out; |
2673 | } | 2673 | } |
2674 | if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { | 2674 | if (S_ISREG(ip->i_d.di_mode)) { |
2675 | if (XFS_TEST_ERROR( | 2675 | if (XFS_TEST_ERROR( |
2676 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && | 2676 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
2677 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), | 2677 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), |
@@ -2681,7 +2681,7 @@ xfs_iflush_int( | |||
2681 | __func__, ip->i_ino, ip); | 2681 | __func__, ip->i_ino, ip); |
2682 | goto corrupt_out; | 2682 | goto corrupt_out; |
2683 | } | 2683 | } |
2684 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { | 2684 | } else if (S_ISDIR(ip->i_d.di_mode)) { |
2685 | if (XFS_TEST_ERROR( | 2685 | if (XFS_TEST_ERROR( |
2686 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && | 2686 | (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
2687 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && | 2687 | (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a97644ab945a..2380a4bcbece 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -263,7 +263,7 @@ typedef struct xfs_inode { | |||
263 | struct inode i_vnode; /* embedded VFS inode */ | 263 | struct inode i_vnode; /* embedded VFS inode */ |
264 | } xfs_inode_t; | 264 | } xfs_inode_t; |
265 | 265 | ||
266 | #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ | 266 | #define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \ |
267 | (ip)->i_size : (ip)->i_d.di_size; | 267 | (ip)->i_size : (ip)->i_d.di_size; |
268 | 268 | ||
269 | /* Convert from vfs inode to xfs inode */ | 269 | /* Convert from vfs inode to xfs inode */ |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index acca2c5ca3fa..f7ce7debe14c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -265,7 +265,7 @@ xfs_open_by_handle( | |||
265 | return PTR_ERR(filp); | 265 | return PTR_ERR(filp); |
266 | } | 266 | } |
267 | 267 | ||
268 | if (inode->i_mode & S_IFREG) { | 268 | if (S_ISREG(inode->i_mode)) { |
269 | filp->f_flags |= O_NOATIME; | 269 | filp->f_flags |= O_NOATIME; |
270 | filp->f_mode |= FMODE_NOCMTIME; | 270 | filp->f_mode |= FMODE_NOCMTIME; |
271 | } | 271 | } |
@@ -850,14 +850,14 @@ xfs_set_diflags( | |||
850 | di_flags |= XFS_DIFLAG_NODEFRAG; | 850 | di_flags |= XFS_DIFLAG_NODEFRAG; |
851 | if (xflags & XFS_XFLAG_FILESTREAM) | 851 | if (xflags & XFS_XFLAG_FILESTREAM) |
852 | di_flags |= XFS_DIFLAG_FILESTREAM; | 852 | di_flags |= XFS_DIFLAG_FILESTREAM; |
853 | if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { | 853 | if (S_ISDIR(ip->i_d.di_mode)) { |
854 | if (xflags & XFS_XFLAG_RTINHERIT) | 854 | if (xflags & XFS_XFLAG_RTINHERIT) |
855 | di_flags |= XFS_DIFLAG_RTINHERIT; | 855 | di_flags |= XFS_DIFLAG_RTINHERIT; |
856 | if (xflags & XFS_XFLAG_NOSYMLINKS) | 856 | if (xflags & XFS_XFLAG_NOSYMLINKS) |
857 | di_flags |= XFS_DIFLAG_NOSYMLINKS; | 857 | di_flags |= XFS_DIFLAG_NOSYMLINKS; |
858 | if (xflags & XFS_XFLAG_EXTSZINHERIT) | 858 | if (xflags & XFS_XFLAG_EXTSZINHERIT) |
859 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; | 859 | di_flags |= XFS_DIFLAG_EXTSZINHERIT; |
860 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { | 860 | } else if (S_ISREG(ip->i_d.di_mode)) { |
861 | if (xflags & XFS_XFLAG_REALTIME) | 861 | if (xflags & XFS_XFLAG_REALTIME) |
862 | di_flags |= XFS_DIFLAG_REALTIME; | 862 | di_flags |= XFS_DIFLAG_REALTIME; |
863 | if (xflags & XFS_XFLAG_EXTSIZE) | 863 | if (xflags & XFS_XFLAG_EXTSIZE) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index d56173b34a2a..d56173b34a2a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 54e623bfbb85..54e623bfbb85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 80f4060e8970..80f4060e8970 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/xfs_iops.c index 6544c3236bc8..673704fab748 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -70,9 +70,8 @@ xfs_synchronize_times( | |||
70 | } | 70 | } |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * If the linux inode is valid, mark it dirty. | 73 | * If the linux inode is valid, mark it dirty, else mark the dirty state |
74 | * Used when committing a dirty inode into a transaction so that | 74 | * in the XFS inode to make sure we pick it up when reclaiming the inode. |
75 | * the inode will get written back by the linux code | ||
76 | */ | 75 | */ |
77 | void | 76 | void |
78 | xfs_mark_inode_dirty_sync( | 77 | xfs_mark_inode_dirty_sync( |
@@ -82,6 +81,10 @@ xfs_mark_inode_dirty_sync( | |||
82 | 81 | ||
83 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) | 82 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) |
84 | mark_inode_dirty_sync(inode); | 83 | mark_inode_dirty_sync(inode); |
84 | else { | ||
85 | barrier(); | ||
86 | ip->i_update_core = 1; | ||
87 | } | ||
85 | } | 88 | } |
86 | 89 | ||
87 | void | 90 | void |
@@ -92,6 +95,11 @@ xfs_mark_inode_dirty( | |||
92 | 95 | ||
93 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) | 96 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) |
94 | mark_inode_dirty(inode); | 97 | mark_inode_dirty(inode); |
98 | else { | ||
99 | barrier(); | ||
100 | ip->i_update_core = 1; | ||
101 | } | ||
102 | |||
95 | } | 103 | } |
96 | 104 | ||
97 | /* | 105 | /* |
@@ -1194,9 +1202,14 @@ xfs_setup_inode( | |||
1194 | break; | 1202 | break; |
1195 | } | 1203 | } |
1196 | 1204 | ||
1197 | /* if there is no attribute fork no ACL can exist on this inode */ | 1205 | /* |
1198 | if (!XFS_IFORK_Q(ip)) | 1206 | * If there is no attribute fork no ACL can exist on this inode, |
1207 | * and it can't have any file capabilities attached to it either. | ||
1208 | */ | ||
1209 | if (!XFS_IFORK_Q(ip)) { | ||
1210 | inode_has_no_xattr(inode); | ||
1199 | cache_no_acl(inode); | 1211 | cache_no_acl(inode); |
1212 | } | ||
1200 | 1213 | ||
1201 | xfs_iflags_clear(ip, XFS_INEW); | 1214 | xfs_iflags_clear(ip, XFS_INEW); |
1202 | barrier(); | 1215 | barrier(); |
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/xfs_iops.h index ef41c92ce66e..ef41c92ce66e 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/xfs_iops.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/xfs_linux.h index d42f814e4d35..1e8a45e74c3e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -32,13 +32,12 @@ | |||
32 | # define XFS_BIG_INUMS 0 | 32 | # define XFS_BIG_INUMS 0 |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | #include <xfs_types.h> | 35 | #include "xfs_types.h" |
36 | 36 | ||
37 | #include <kmem.h> | 37 | #include "kmem.h" |
38 | #include <mrlock.h> | 38 | #include "mrlock.h" |
39 | #include <time.h> | 39 | #include "time.h" |
40 | 40 | #include "uuid.h" | |
41 | #include <support/uuid.h> | ||
42 | 41 | ||
43 | #include <linux/semaphore.h> | 42 | #include <linux/semaphore.h> |
44 | #include <linux/mm.h> | 43 | #include <linux/mm.h> |
@@ -78,14 +77,14 @@ | |||
78 | #include <asm/byteorder.h> | 77 | #include <asm/byteorder.h> |
79 | #include <asm/unaligned.h> | 78 | #include <asm/unaligned.h> |
80 | 79 | ||
81 | #include <xfs_vnode.h> | 80 | #include "xfs_vnode.h" |
82 | #include <xfs_stats.h> | 81 | #include "xfs_stats.h" |
83 | #include <xfs_sysctl.h> | 82 | #include "xfs_sysctl.h" |
84 | #include <xfs_iops.h> | 83 | #include "xfs_iops.h" |
85 | #include <xfs_aops.h> | 84 | #include "xfs_aops.h" |
86 | #include <xfs_super.h> | 85 | #include "xfs_super.h" |
87 | #include <xfs_buf.h> | 86 | #include "xfs_buf.h" |
88 | #include <xfs_message.h> | 87 | #include "xfs_message.h" |
89 | 88 | ||
90 | #ifdef __BIG_ENDIAN | 89 | #ifdef __BIG_ENDIAN |
91 | #define XFS_NATIVE_HOST 1 | 90 | #define XFS_NATIVE_HOST 1 |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 06ff8437ed8e..3a8d4f66d702 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -878,7 +878,7 @@ xlog_iodone(xfs_buf_t *bp) | |||
878 | /* | 878 | /* |
879 | * Race to shutdown the filesystem if we see an error. | 879 | * Race to shutdown the filesystem if we see an error. |
880 | */ | 880 | */ |
881 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, | 881 | if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, |
882 | XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { | 882 | XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { |
883 | xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); | 883 | xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); |
884 | XFS_BUF_STALE(bp); | 884 | XFS_BUF_STALE(bp); |
@@ -1051,7 +1051,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1051 | if (!bp) | 1051 | if (!bp) |
1052 | goto out_free_log; | 1052 | goto out_free_log; |
1053 | bp->b_iodone = xlog_iodone; | 1053 | bp->b_iodone = xlog_iodone; |
1054 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
1055 | ASSERT(xfs_buf_islocked(bp)); | 1054 | ASSERT(xfs_buf_islocked(bp)); |
1056 | log->l_xbuf = bp; | 1055 | log->l_xbuf = bp; |
1057 | 1056 | ||
@@ -1108,7 +1107,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1108 | iclog->ic_callback_tail = &(iclog->ic_callback); | 1107 | iclog->ic_callback_tail = &(iclog->ic_callback); |
1109 | iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; | 1108 | iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; |
1110 | 1109 | ||
1111 | ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); | ||
1112 | ASSERT(xfs_buf_islocked(iclog->ic_bp)); | 1110 | ASSERT(xfs_buf_islocked(iclog->ic_bp)); |
1113 | init_waitqueue_head(&iclog->ic_force_wait); | 1111 | init_waitqueue_head(&iclog->ic_force_wait); |
1114 | init_waitqueue_head(&iclog->ic_write_wait); | 1112 | init_waitqueue_head(&iclog->ic_write_wait); |
@@ -1248,7 +1246,7 @@ xlog_bdstrat( | |||
1248 | struct xlog_in_core *iclog = bp->b_fspriv; | 1246 | struct xlog_in_core *iclog = bp->b_fspriv; |
1249 | 1247 | ||
1250 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 1248 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
1251 | XFS_BUF_ERROR(bp, EIO); | 1249 | xfs_buf_ioerror(bp, EIO); |
1252 | XFS_BUF_STALE(bp); | 1250 | XFS_BUF_STALE(bp); |
1253 | xfs_buf_ioend(bp, 0); | 1251 | xfs_buf_ioend(bp, 0); |
1254 | /* | 1252 | /* |
@@ -1355,7 +1353,6 @@ xlog_sync(xlog_t *log, | |||
1355 | XFS_BUF_SET_COUNT(bp, count); | 1353 | XFS_BUF_SET_COUNT(bp, count); |
1356 | bp->b_fspriv = iclog; | 1354 | bp->b_fspriv = iclog; |
1357 | XFS_BUF_ZEROFLAGS(bp); | 1355 | XFS_BUF_ZEROFLAGS(bp); |
1358 | XFS_BUF_BUSY(bp); | ||
1359 | XFS_BUF_ASYNC(bp); | 1356 | XFS_BUF_ASYNC(bp); |
1360 | bp->b_flags |= XBF_SYNCIO; | 1357 | bp->b_flags |= XBF_SYNCIO; |
1361 | 1358 | ||
@@ -1398,16 +1395,15 @@ xlog_sync(xlog_t *log, | |||
1398 | if (split) { | 1395 | if (split) { |
1399 | bp = iclog->ic_log->l_xbuf; | 1396 | bp = iclog->ic_log->l_xbuf; |
1400 | XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ | 1397 | XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ |
1401 | XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ | 1398 | xfs_buf_associate_memory(bp, |
1402 | (__psint_t)count), split); | 1399 | (char *)&iclog->ic_header + count, split); |
1403 | bp->b_fspriv = iclog; | 1400 | bp->b_fspriv = iclog; |
1404 | XFS_BUF_ZEROFLAGS(bp); | 1401 | XFS_BUF_ZEROFLAGS(bp); |
1405 | XFS_BUF_BUSY(bp); | ||
1406 | XFS_BUF_ASYNC(bp); | 1402 | XFS_BUF_ASYNC(bp); |
1407 | bp->b_flags |= XBF_SYNCIO; | 1403 | bp->b_flags |= XBF_SYNCIO; |
1408 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1404 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) |
1409 | bp->b_flags |= XBF_FUA; | 1405 | bp->b_flags |= XBF_FUA; |
1410 | dptr = XFS_BUF_PTR(bp); | 1406 | dptr = bp->b_addr; |
1411 | /* | 1407 | /* |
1412 | * Bump the cycle numbers at the start of each block | 1408 | * Bump the cycle numbers at the start of each block |
1413 | * since this part of the buffer is at the start of | 1409 | * since this part of the buffer is at the start of |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8fe4206de057..a199dbcee7d8 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -147,7 +147,7 @@ xlog_align( | |||
147 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); | 147 | xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); |
148 | 148 | ||
149 | ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); | 149 | ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); |
150 | return XFS_BUF_PTR(bp) + BBTOB(offset); | 150 | return bp->b_addr + BBTOB(offset); |
151 | } | 151 | } |
152 | 152 | ||
153 | 153 | ||
@@ -178,9 +178,7 @@ xlog_bread_noalign( | |||
178 | 178 | ||
179 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 179 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
180 | XFS_BUF_READ(bp); | 180 | XFS_BUF_READ(bp); |
181 | XFS_BUF_BUSY(bp); | ||
182 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 181 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
183 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | ||
184 | 182 | ||
185 | xfsbdstrat(log->l_mp, bp); | 183 | xfsbdstrat(log->l_mp, bp); |
186 | error = xfs_buf_iowait(bp); | 184 | error = xfs_buf_iowait(bp); |
@@ -220,18 +218,18 @@ xlog_bread_offset( | |||
220 | xfs_buf_t *bp, | 218 | xfs_buf_t *bp, |
221 | xfs_caddr_t offset) | 219 | xfs_caddr_t offset) |
222 | { | 220 | { |
223 | xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); | 221 | xfs_caddr_t orig_offset = bp->b_addr; |
224 | int orig_len = bp->b_buffer_length; | 222 | int orig_len = bp->b_buffer_length; |
225 | int error, error2; | 223 | int error, error2; |
226 | 224 | ||
227 | error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); | 225 | error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); |
228 | if (error) | 226 | if (error) |
229 | return error; | 227 | return error; |
230 | 228 | ||
231 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | 229 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
232 | 230 | ||
233 | /* must reset buffer pointer even on error */ | 231 | /* must reset buffer pointer even on error */ |
234 | error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); | 232 | error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); |
235 | if (error) | 233 | if (error) |
236 | return error; | 234 | return error; |
237 | return error2; | 235 | return error2; |
@@ -266,11 +264,9 @@ xlog_bwrite( | |||
266 | 264 | ||
267 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); | 265 | XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
268 | XFS_BUF_ZEROFLAGS(bp); | 266 | XFS_BUF_ZEROFLAGS(bp); |
269 | XFS_BUF_BUSY(bp); | 267 | xfs_buf_hold(bp); |
270 | XFS_BUF_HOLD(bp); | ||
271 | xfs_buf_lock(bp); | 268 | xfs_buf_lock(bp); |
272 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); | 269 | XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); |
273 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | ||
274 | 270 | ||
275 | if ((error = xfs_bwrite(log->l_mp, bp))) | 271 | if ((error = xfs_bwrite(log->l_mp, bp))) |
276 | xfs_ioerror_alert("xlog_bwrite", log->l_mp, | 272 | xfs_ioerror_alert("xlog_bwrite", log->l_mp, |
@@ -360,7 +356,7 @@ STATIC void | |||
360 | xlog_recover_iodone( | 356 | xlog_recover_iodone( |
361 | struct xfs_buf *bp) | 357 | struct xfs_buf *bp) |
362 | { | 358 | { |
363 | if (XFS_BUF_GETERROR(bp)) { | 359 | if (bp->b_error) { |
364 | /* | 360 | /* |
365 | * We're not going to bother about retrying | 361 | * We're not going to bother about retrying |
366 | * this during recovery. One strike! | 362 | * this during recovery. One strike! |
@@ -1262,7 +1258,7 @@ xlog_write_log_records( | |||
1262 | */ | 1258 | */ |
1263 | ealign = round_down(end_block, sectbb); | 1259 | ealign = round_down(end_block, sectbb); |
1264 | if (j == 0 && (start_block + endcount > ealign)) { | 1260 | if (j == 0 && (start_block + endcount > ealign)) { |
1265 | offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); | 1261 | offset = bp->b_addr + BBTOB(ealign - start_block); |
1266 | error = xlog_bread_offset(log, ealign, sectbb, | 1262 | error = xlog_bread_offset(log, ealign, sectbb, |
1267 | bp, offset); | 1263 | bp, offset); |
1268 | if (error) | 1264 | if (error) |
@@ -2135,15 +2131,16 @@ xlog_recover_buffer_pass2( | |||
2135 | 2131 | ||
2136 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, | 2132 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2137 | buf_flags); | 2133 | buf_flags); |
2138 | if (XFS_BUF_ISERROR(bp)) { | 2134 | if (!bp) |
2135 | return XFS_ERROR(ENOMEM); | ||
2136 | error = bp->b_error; | ||
2137 | if (error) { | ||
2139 | xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, | 2138 | xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, |
2140 | bp, buf_f->blf_blkno); | 2139 | bp, buf_f->blf_blkno); |
2141 | error = XFS_BUF_GETERROR(bp); | ||
2142 | xfs_buf_relse(bp); | 2140 | xfs_buf_relse(bp); |
2143 | return error; | 2141 | return error; |
2144 | } | 2142 | } |
2145 | 2143 | ||
2146 | error = 0; | ||
2147 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { | 2144 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
2148 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2145 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2149 | } else if (buf_f->blf_flags & | 2146 | } else if (buf_f->blf_flags & |
@@ -2227,14 +2224,17 @@ xlog_recover_inode_pass2( | |||
2227 | 2224 | ||
2228 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, | 2225 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, |
2229 | XBF_LOCK); | 2226 | XBF_LOCK); |
2230 | if (XFS_BUF_ISERROR(bp)) { | 2227 | if (!bp) { |
2228 | error = ENOMEM; | ||
2229 | goto error; | ||
2230 | } | ||
2231 | error = bp->b_error; | ||
2232 | if (error) { | ||
2231 | xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, | 2233 | xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, |
2232 | bp, in_f->ilf_blkno); | 2234 | bp, in_f->ilf_blkno); |
2233 | error = XFS_BUF_GETERROR(bp); | ||
2234 | xfs_buf_relse(bp); | 2235 | xfs_buf_relse(bp); |
2235 | goto error; | 2236 | goto error; |
2236 | } | 2237 | } |
2237 | error = 0; | ||
2238 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); | 2238 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
2239 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); | 2239 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); |
2240 | 2240 | ||
@@ -2283,7 +2283,7 @@ xlog_recover_inode_pass2( | |||
2283 | /* Take the opportunity to reset the flush iteration count */ | 2283 | /* Take the opportunity to reset the flush iteration count */ |
2284 | dicp->di_flushiter = 0; | 2284 | dicp->di_flushiter = 0; |
2285 | 2285 | ||
2286 | if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { | 2286 | if (unlikely(S_ISREG(dicp->di_mode))) { |
2287 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2287 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2288 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2288 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2289 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", | 2289 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
@@ -2296,7 +2296,7 @@ xlog_recover_inode_pass2( | |||
2296 | error = EFSCORRUPTED; | 2296 | error = EFSCORRUPTED; |
2297 | goto error; | 2297 | goto error; |
2298 | } | 2298 | } |
2299 | } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { | 2299 | } else if (unlikely(S_ISDIR(dicp->di_mode))) { |
2300 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2300 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
2301 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && | 2301 | (dicp->di_format != XFS_DINODE_FMT_BTREE) && |
2302 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2302 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
@@ -3437,7 +3437,7 @@ xlog_do_recovery_pass( | |||
3437 | /* | 3437 | /* |
3438 | * Check for header wrapping around physical end-of-log | 3438 | * Check for header wrapping around physical end-of-log |
3439 | */ | 3439 | */ |
3440 | offset = XFS_BUF_PTR(hbp); | 3440 | offset = hbp->b_addr; |
3441 | split_hblks = 0; | 3441 | split_hblks = 0; |
3442 | wrapped_hblks = 0; | 3442 | wrapped_hblks = 0; |
3443 | if (blk_no + hblks <= log->l_logBBsize) { | 3443 | if (blk_no + hblks <= log->l_logBBsize) { |
@@ -3497,7 +3497,7 @@ xlog_do_recovery_pass( | |||
3497 | } else { | 3497 | } else { |
3498 | /* This log record is split across the | 3498 | /* This log record is split across the |
3499 | * physical end of log */ | 3499 | * physical end of log */ |
3500 | offset = XFS_BUF_PTR(dbp); | 3500 | offset = dbp->b_addr; |
3501 | split_bblks = 0; | 3501 | split_bblks = 0; |
3502 | if (blk_no != log->l_logBBsize) { | 3502 | if (blk_no != log->l_logBBsize) { |
3503 | /* some data is before the physical | 3503 | /* some data is before the physical |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/xfs_message.c index bd672def95ac..bd672def95ac 100644 --- a/fs/xfs/linux-2.6/xfs_message.c +++ b/fs/xfs/xfs_message.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/xfs_message.h index 7fb7ea007672..7fb7ea007672 100644 --- a/fs/xfs/linux-2.6/xfs_message.h +++ b/fs/xfs/xfs_message.h | |||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 7f25245da289..0081657ad985 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1331,7 +1331,7 @@ xfs_mountfs( | |||
1331 | 1331 | ||
1332 | ASSERT(rip != NULL); | 1332 | ASSERT(rip != NULL); |
1333 | 1333 | ||
1334 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { | 1334 | if (unlikely(!S_ISDIR(rip->i_d.di_mode))) { |
1335 | xfs_warn(mp, "corrupted root inode %llu: not a directory", | 1335 | xfs_warn(mp, "corrupted root inode %llu: not a directory", |
1336 | (unsigned long long)rip->i_ino); | 1336 | (unsigned long long)rip->i_ino); |
1337 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | 1337 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
@@ -1615,7 +1615,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) | |||
1615 | XFS_BUF_UNDELAYWRITE(sbp); | 1615 | XFS_BUF_UNDELAYWRITE(sbp); |
1616 | XFS_BUF_WRITE(sbp); | 1616 | XFS_BUF_WRITE(sbp); |
1617 | XFS_BUF_UNASYNC(sbp); | 1617 | XFS_BUF_UNASYNC(sbp); |
1618 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); | 1618 | ASSERT(sbp->b_target == mp->m_ddev_targp); |
1619 | xfsbdstrat(mp, sbp); | 1619 | xfsbdstrat(mp, sbp); |
1620 | error = xfs_buf_iowait(sbp); | 1620 | error = xfs_buf_iowait(sbp); |
1621 | if (error) | 1621 | if (error) |
@@ -1938,7 +1938,7 @@ xfs_getsb( | |||
1938 | xfs_buf_lock(bp); | 1938 | xfs_buf_lock(bp); |
1939 | } | 1939 | } |
1940 | 1940 | ||
1941 | XFS_BUF_HOLD(bp); | 1941 | xfs_buf_hold(bp); |
1942 | ASSERT(XFS_BUF_ISDONE(bp)); | 1942 | ASSERT(XFS_BUF_ISDONE(bp)); |
1943 | return bp; | 1943 | return bp; |
1944 | } | 1944 | } |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/xfs_qm.c index 46e54ad9a2dc..9a0aa76facdf 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -1240,7 +1240,7 @@ xfs_qm_reset_dqcounts( | |||
1240 | do_div(j, sizeof(xfs_dqblk_t)); | 1240 | do_div(j, sizeof(xfs_dqblk_t)); |
1241 | ASSERT(mp->m_quotainfo->qi_dqperchunk == j); | 1241 | ASSERT(mp->m_quotainfo->qi_dqperchunk == j); |
1242 | #endif | 1242 | #endif |
1243 | ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); | 1243 | ddq = bp->b_addr; |
1244 | for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { | 1244 | for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { |
1245 | /* | 1245 | /* |
1246 | * Do a sanity check, and if needed, repair the dqblk. Don't | 1246 | * Do a sanity check, and if needed, repair the dqblk. Don't |
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/xfs_qm.h index 43b9abe1052c..43b9abe1052c 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index a0a829addca9..a0a829addca9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c | |||
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c index 8671a0b32644..8671a0b32644 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/xfs_qm_stats.c | |||
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h index 5b964fc0dc09..5b964fc0dc09 100644 --- a/fs/xfs/quota/xfs_qm_stats.h +++ b/fs/xfs/xfs_qm_stats.h | |||
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 609246f42e6c..609246f42e6c 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h index 94a3d927d716..94a3d927d716 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/xfs_quota_priv.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 29b9d642e93d..7e76f537abb7 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include "xfs_trans.h" | 25 | #include "xfs_trans.h" |
26 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
27 | #include "xfs_inode.h" | 27 | #include "xfs_inode.h" |
28 | #include "quota/xfs_qm.h" | 28 | #include "xfs_qm.h" |
29 | #include <linux/quota.h> | 29 | #include <linux/quota.h> |
30 | 30 | ||
31 | 31 | ||
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 77a59891734e..df78c297d1a1 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -116,7 +116,7 @@ xfs_rename( | |||
116 | trace_xfs_rename(src_dp, target_dp, src_name, target_name); | 116 | trace_xfs_rename(src_dp, target_dp, src_name, target_name); |
117 | 117 | ||
118 | new_parent = (src_dp != target_dp); | 118 | new_parent = (src_dp != target_dp); |
119 | src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); | 119 | src_is_directory = S_ISDIR(src_ip->i_d.di_mode); |
120 | 120 | ||
121 | if (src_is_directory) { | 121 | if (src_is_directory) { |
122 | /* | 122 | /* |
@@ -226,7 +226,7 @@ xfs_rename( | |||
226 | * target and source are directories and that target can be | 226 | * target and source are directories and that target can be |
227 | * destroyed, or that neither is a directory. | 227 | * destroyed, or that neither is a directory. |
228 | */ | 228 | */ |
229 | if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { | 229 | if (S_ISDIR(target_ip->i_d.di_mode)) { |
230 | /* | 230 | /* |
231 | * Make sure target dir is empty. | 231 | * Make sure target dir is empty. |
232 | */ | 232 | */ |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 8f76fdff4f46..35561a511b57 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -168,7 +168,7 @@ error_cancel: | |||
168 | xfs_trans_cancel(tp, cancelflags); | 168 | xfs_trans_cancel(tp, cancelflags); |
169 | goto error; | 169 | goto error; |
170 | } | 170 | } |
171 | memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); | 171 | memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); |
172 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); | 172 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); |
173 | /* | 173 | /* |
174 | * Commit the transaction. | 174 | * Commit the transaction. |
@@ -883,7 +883,7 @@ xfs_rtbuf_get( | |||
883 | if (error) { | 883 | if (error) { |
884 | return error; | 884 | return error; |
885 | } | 885 | } |
886 | ASSERT(bp && !XFS_BUF_GETERROR(bp)); | 886 | ASSERT(!xfs_buf_geterror(bp)); |
887 | *bpp = bp; | 887 | *bpp = bp; |
888 | return 0; | 888 | return 0; |
889 | } | 889 | } |
@@ -943,7 +943,7 @@ xfs_rtcheck_range( | |||
943 | if (error) { | 943 | if (error) { |
944 | return error; | 944 | return error; |
945 | } | 945 | } |
946 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 946 | bufp = bp->b_addr; |
947 | /* | 947 | /* |
948 | * Compute the starting word's address, and starting bit. | 948 | * Compute the starting word's address, and starting bit. |
949 | */ | 949 | */ |
@@ -994,7 +994,7 @@ xfs_rtcheck_range( | |||
994 | if (error) { | 994 | if (error) { |
995 | return error; | 995 | return error; |
996 | } | 996 | } |
997 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 997 | b = bufp = bp->b_addr; |
998 | word = 0; | 998 | word = 0; |
999 | } else { | 999 | } else { |
1000 | /* | 1000 | /* |
@@ -1040,7 +1040,7 @@ xfs_rtcheck_range( | |||
1040 | if (error) { | 1040 | if (error) { |
1041 | return error; | 1041 | return error; |
1042 | } | 1042 | } |
1043 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1043 | b = bufp = bp->b_addr; |
1044 | word = 0; | 1044 | word = 0; |
1045 | } else { | 1045 | } else { |
1046 | /* | 1046 | /* |
@@ -1158,7 +1158,7 @@ xfs_rtfind_back( | |||
1158 | if (error) { | 1158 | if (error) { |
1159 | return error; | 1159 | return error; |
1160 | } | 1160 | } |
1161 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1161 | bufp = bp->b_addr; |
1162 | /* | 1162 | /* |
1163 | * Get the first word's index & point to it. | 1163 | * Get the first word's index & point to it. |
1164 | */ | 1164 | */ |
@@ -1210,7 +1210,7 @@ xfs_rtfind_back( | |||
1210 | if (error) { | 1210 | if (error) { |
1211 | return error; | 1211 | return error; |
1212 | } | 1212 | } |
1213 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1213 | bufp = bp->b_addr; |
1214 | word = XFS_BLOCKWMASK(mp); | 1214 | word = XFS_BLOCKWMASK(mp); |
1215 | b = &bufp[word]; | 1215 | b = &bufp[word]; |
1216 | } else { | 1216 | } else { |
@@ -1256,7 +1256,7 @@ xfs_rtfind_back( | |||
1256 | if (error) { | 1256 | if (error) { |
1257 | return error; | 1257 | return error; |
1258 | } | 1258 | } |
1259 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1259 | bufp = bp->b_addr; |
1260 | word = XFS_BLOCKWMASK(mp); | 1260 | word = XFS_BLOCKWMASK(mp); |
1261 | b = &bufp[word]; | 1261 | b = &bufp[word]; |
1262 | } else { | 1262 | } else { |
@@ -1333,7 +1333,7 @@ xfs_rtfind_forw( | |||
1333 | if (error) { | 1333 | if (error) { |
1334 | return error; | 1334 | return error; |
1335 | } | 1335 | } |
1336 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1336 | bufp = bp->b_addr; |
1337 | /* | 1337 | /* |
1338 | * Get the first word's index & point to it. | 1338 | * Get the first word's index & point to it. |
1339 | */ | 1339 | */ |
@@ -1384,7 +1384,7 @@ xfs_rtfind_forw( | |||
1384 | if (error) { | 1384 | if (error) { |
1385 | return error; | 1385 | return error; |
1386 | } | 1386 | } |
1387 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1387 | b = bufp = bp->b_addr; |
1388 | word = 0; | 1388 | word = 0; |
1389 | } else { | 1389 | } else { |
1390 | /* | 1390 | /* |
@@ -1429,7 +1429,7 @@ xfs_rtfind_forw( | |||
1429 | if (error) { | 1429 | if (error) { |
1430 | return error; | 1430 | return error; |
1431 | } | 1431 | } |
1432 | b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1432 | b = bufp = bp->b_addr; |
1433 | word = 0; | 1433 | word = 0; |
1434 | } else { | 1434 | } else { |
1435 | /* | 1435 | /* |
@@ -1649,7 +1649,7 @@ xfs_rtmodify_range( | |||
1649 | if (error) { | 1649 | if (error) { |
1650 | return error; | 1650 | return error; |
1651 | } | 1651 | } |
1652 | bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1652 | bufp = bp->b_addr; |
1653 | /* | 1653 | /* |
1654 | * Compute the starting word's address, and starting bit. | 1654 | * Compute the starting word's address, and starting bit. |
1655 | */ | 1655 | */ |
@@ -1694,7 +1694,7 @@ xfs_rtmodify_range( | |||
1694 | if (error) { | 1694 | if (error) { |
1695 | return error; | 1695 | return error; |
1696 | } | 1696 | } |
1697 | first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1697 | first = b = bufp = bp->b_addr; |
1698 | word = 0; | 1698 | word = 0; |
1699 | } else { | 1699 | } else { |
1700 | /* | 1700 | /* |
@@ -1734,7 +1734,7 @@ xfs_rtmodify_range( | |||
1734 | if (error) { | 1734 | if (error) { |
1735 | return error; | 1735 | return error; |
1736 | } | 1736 | } |
1737 | first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); | 1737 | first = b = bufp = bp->b_addr; |
1738 | word = 0; | 1738 | word = 0; |
1739 | } else { | 1739 | } else { |
1740 | /* | 1740 | /* |
@@ -1832,8 +1832,8 @@ xfs_rtmodify_summary( | |||
1832 | */ | 1832 | */ |
1833 | sp = XFS_SUMPTR(mp, bp, so); | 1833 | sp = XFS_SUMPTR(mp, bp, so); |
1834 | *sp += delta; | 1834 | *sp += delta; |
1835 | xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)), | 1835 | xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), |
1836 | (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1)); | 1836 | (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); |
1837 | return 0; | 1837 | return 0; |
1838 | } | 1838 | } |
1839 | 1839 | ||
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 09e1f4f35e97..f7f3a359c1c5 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h | |||
@@ -47,7 +47,7 @@ struct xfs_trans; | |||
47 | #define XFS_SUMOFFSTOBLOCK(mp,s) \ | 47 | #define XFS_SUMOFFSTOBLOCK(mp,s) \ |
48 | (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) | 48 | (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) |
49 | #define XFS_SUMPTR(mp,bp,so) \ | 49 | #define XFS_SUMPTR(mp,bp,so) \ |
50 | ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \ | 50 | ((xfs_suminfo_t *)((bp)->b_addr + \ |
51 | (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) | 51 | (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) |
52 | 52 | ||
53 | #define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) | 53 | #define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index d6d6fdfe9422..c96a8a05ac03 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -104,9 +104,9 @@ xfs_ioerror_alert( | |||
104 | xfs_alert(mp, | 104 | xfs_alert(mp, |
105 | "I/O error occurred: meta-data dev %s block 0x%llx" | 105 | "I/O error occurred: meta-data dev %s block 0x%llx" |
106 | " (\"%s\") error %d buf count %zd", | 106 | " (\"%s\") error %d buf count %zd", |
107 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 107 | xfs_buf_target_name(bp->b_target), |
108 | (__uint64_t)blkno, func, | 108 | (__uint64_t)blkno, func, |
109 | XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); | 109 | bp->b_error, XFS_BUF_COUNT(bp)); |
110 | } | 110 | } |
111 | 111 | ||
112 | /* | 112 | /* |
@@ -137,8 +137,8 @@ xfs_read_buf( | |||
137 | bp = xfs_buf_read(target, blkno, len, flags); | 137 | bp = xfs_buf_read(target, blkno, len, flags); |
138 | if (!bp) | 138 | if (!bp) |
139 | return XFS_ERROR(EIO); | 139 | return XFS_ERROR(EIO); |
140 | error = XFS_BUF_GETERROR(bp); | 140 | error = bp->b_error; |
141 | if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) { | 141 | if (!error && !XFS_FORCED_SHUTDOWN(mp)) { |
142 | *bpp = bp; | 142 | *bpp = bp; |
143 | } else { | 143 | } else { |
144 | *bpp = NULL; | 144 | *bpp = NULL; |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 1eb2ba586814..cb6ae715814a 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -509,7 +509,7 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) | |||
509 | 509 | ||
510 | #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ | 510 | #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ |
511 | #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) | 511 | #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) |
512 | #define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)XFS_BUF_PTR(bp)) | 512 | #define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) |
513 | 513 | ||
514 | #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) | 514 | #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) |
515 | #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ | 515 | #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ |
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/xfs_stats.c index 76fdc5861932..76fdc5861932 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/xfs_stats.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/xfs_stats.h index 736854b1ca1a..736854b1ca1a 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/xfs_stats.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/xfs_super.c index 9a72dda58bd0..2366c54cc4fa 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -356,6 +356,8 @@ xfs_parseargs( | |||
356 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | 356 | mp->m_flags |= XFS_MOUNT_DELAYLOG; |
357 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | 357 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { |
358 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | 358 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; |
359 | xfs_warn(mp, | ||
360 | "nodelaylog is deprecated and will be removed in Linux 3.3"); | ||
359 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { | 361 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { |
360 | mp->m_flags |= XFS_MOUNT_DISCARD; | 362 | mp->m_flags |= XFS_MOUNT_DISCARD; |
361 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { | 363 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { |
@@ -877,33 +879,17 @@ xfs_log_inode( | |||
877 | struct xfs_trans *tp; | 879 | struct xfs_trans *tp; |
878 | int error; | 880 | int error; |
879 | 881 | ||
880 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
881 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | 882 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); |
882 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); | 883 | error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); |
883 | |||
884 | if (error) { | 884 | if (error) { |
885 | xfs_trans_cancel(tp, 0); | 885 | xfs_trans_cancel(tp, 0); |
886 | /* we need to return with the lock hold shared */ | ||
887 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
888 | return error; | 886 | return error; |
889 | } | 887 | } |
890 | 888 | ||
891 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 889 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
892 | 890 | xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); | |
893 | /* | ||
894 | * Note - it's possible that we might have pushed ourselves out of the | ||
895 | * way during trans_reserve which would flush the inode. But there's | ||
896 | * no guarantee that the inode buffer has actually gone out yet (it's | ||
897 | * delwri). Plus the buffer could be pinned anyway if it's part of | ||
898 | * an inode in another recent transaction. So we play it safe and | ||
899 | * fire off the transaction anyway. | ||
900 | */ | ||
901 | xfs_trans_ijoin(tp, ip); | ||
902 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 891 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
903 | error = xfs_trans_commit(tp, 0); | 892 | return xfs_trans_commit(tp, 0); |
904 | xfs_ilock_demote(ip, XFS_ILOCK_EXCL); | ||
905 | |||
906 | return error; | ||
907 | } | 893 | } |
908 | 894 | ||
909 | STATIC int | 895 | STATIC int |
@@ -918,7 +904,9 @@ xfs_fs_write_inode( | |||
918 | trace_xfs_write_inode(ip); | 904 | trace_xfs_write_inode(ip); |
919 | 905 | ||
920 | if (XFS_FORCED_SHUTDOWN(mp)) | 906 | if (XFS_FORCED_SHUTDOWN(mp)) |
921 | return XFS_ERROR(EIO); | 907 | return -XFS_ERROR(EIO); |
908 | if (!ip->i_update_core) | ||
909 | return 0; | ||
922 | 910 | ||
923 | if (wbc->sync_mode == WB_SYNC_ALL) { | 911 | if (wbc->sync_mode == WB_SYNC_ALL) { |
924 | /* | 912 | /* |
@@ -929,12 +917,10 @@ xfs_fs_write_inode( | |||
929 | * of synchronous log foces dramatically. | 917 | * of synchronous log foces dramatically. |
930 | */ | 918 | */ |
931 | xfs_ioend_wait(ip); | 919 | xfs_ioend_wait(ip); |
932 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 920 | error = xfs_log_inode(ip); |
933 | if (ip->i_update_core) { | 921 | if (error) |
934 | error = xfs_log_inode(ip); | 922 | goto out; |
935 | if (error) | 923 | return 0; |
936 | goto out_unlock; | ||
937 | } | ||
938 | } else { | 924 | } else { |
939 | /* | 925 | /* |
940 | * We make this non-blocking if the inode is contended, return | 926 | * We make this non-blocking if the inode is contended, return |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/xfs_super.h index 50a3266c999e..50a3266c999e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/xfs_super.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/xfs_sync.c index e4c938afb910..4604f90f86a3 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -332,7 +332,7 @@ xfs_sync_fsdata( | |||
332 | * between there and here. | 332 | * between there and here. |
333 | */ | 333 | */ |
334 | bp = xfs_getsb(mp, 0); | 334 | bp = xfs_getsb(mp, 0); |
335 | if (XFS_BUF_ISPINNED(bp)) | 335 | if (xfs_buf_ispinned(bp)) |
336 | xfs_log_force(mp, 0); | 336 | xfs_log_force(mp, 0); |
337 | 337 | ||
338 | return xfs_bwrite(mp, bp); | 338 | return xfs_bwrite(mp, bp); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/xfs_sync.h index 941202e7ac6e..941202e7ac6e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/xfs_sync.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index ee2d2adaa438..ee2d2adaa438 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index b9937d450f8e..b9937d450f8e 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h | |||
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/xfs_trace.c index 88d25d4aa56e..9010ce885e6a 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/xfs_trace.c | |||
@@ -43,8 +43,8 @@ | |||
43 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
44 | #include "xfs_iomap.h" | 44 | #include "xfs_iomap.h" |
45 | #include "xfs_aops.h" | 45 | #include "xfs_aops.h" |
46 | #include "quota/xfs_dquot_item.h" | 46 | #include "xfs_dquot_item.h" |
47 | #include "quota/xfs_dquot.h" | 47 | #include "xfs_dquot.h" |
48 | #include "xfs_log_recover.h" | 48 | #include "xfs_log_recover.h" |
49 | #include "xfs_inode_item.h" | 49 | #include "xfs_inode_item.h" |
50 | 50 | ||
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/xfs_trace.h index 690fc7a7bd72..690fc7a7bd72 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 43233e92f0f6..c15aa29fa169 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -299,7 +299,7 @@ xfs_trans_ail_cursor_last( | |||
299 | * Splice the log item list into the AIL at the given LSN. We splice to the | 299 | * Splice the log item list into the AIL at the given LSN. We splice to the |
300 | * tail of the given LSN to maintain insert order for push traversals. The | 300 | * tail of the given LSN to maintain insert order for push traversals. The |
301 | * cursor is optional, allowing repeated updates to the same LSN to avoid | 301 | * cursor is optional, allowing repeated updates to the same LSN to avoid |
302 | * repeated traversals. | 302 | * repeated traversals. This should not be called with an empty list. |
303 | */ | 303 | */ |
304 | static void | 304 | static void |
305 | xfs_ail_splice( | 305 | xfs_ail_splice( |
@@ -308,50 +308,39 @@ xfs_ail_splice( | |||
308 | struct list_head *list, | 308 | struct list_head *list, |
309 | xfs_lsn_t lsn) | 309 | xfs_lsn_t lsn) |
310 | { | 310 | { |
311 | struct xfs_log_item *lip = cur ? cur->item : NULL; | 311 | struct xfs_log_item *lip; |
312 | struct xfs_log_item *next_lip; | 312 | |
313 | ASSERT(!list_empty(list)); | ||
313 | 314 | ||
314 | /* | 315 | /* |
315 | * Get a new cursor if we don't have a placeholder or the existing one | 316 | * Use the cursor to determine the insertion point if one is |
316 | * has been invalidated. | 317 | * provided. If not, or if the one we got is not valid, |
318 | * find the place in the AIL where the items belong. | ||
317 | */ | 319 | */ |
318 | if (!lip || (__psint_t)lip & 1) { | 320 | lip = cur ? cur->item : NULL; |
321 | if (!lip || (__psint_t) lip & 1) | ||
319 | lip = __xfs_trans_ail_cursor_last(ailp, lsn); | 322 | lip = __xfs_trans_ail_cursor_last(ailp, lsn); |
320 | 323 | ||
321 | if (!lip) { | 324 | /* |
322 | /* The list is empty, so just splice and return. */ | 325 | * If a cursor is provided, we know we're processing the AIL |
323 | if (cur) | 326 | * in lsn order, and future items to be spliced in will |
324 | cur->item = NULL; | 327 | * follow the last one being inserted now. Update the |
325 | list_splice(list, &ailp->xa_ail); | 328 | * cursor to point to that last item, now while we have a |
326 | return; | 329 | * reliable pointer to it. |
327 | } | 330 | */ |
328 | } | 331 | if (cur) |
332 | cur->item = list_entry(list->prev, struct xfs_log_item, li_ail); | ||
329 | 333 | ||
330 | /* | 334 | /* |
331 | * Our cursor points to the item we want to insert _after_, so we have | 335 | * Finally perform the splice. Unless the AIL was empty, |
332 | * to update the cursor to point to the end of the list we are splicing | 336 | * lip points to the item in the AIL _after_ which the new |
333 | * in so that it points to the correct location for the next splice. | 337 | * items should go. If lip is null the AIL was empty, so |
334 | * i.e. before the splice | 338 | * the new items go at the head of the AIL. |
335 | * | ||
336 | * lsn -> lsn -> lsn + x -> lsn + x ... | ||
337 | * ^ | ||
338 | * | cursor points here | ||
339 | * | ||
340 | * After the splice we have: | ||
341 | * | ||
342 | * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... | ||
343 | * ^ ^ | ||
344 | * | cursor points here | needs to move here | ||
345 | * | ||
346 | * So we set the cursor to the last item in the list to be spliced | ||
347 | * before we execute the splice, resulting in the cursor pointing to | ||
348 | * the correct item after the splice occurs. | ||
349 | */ | 339 | */ |
350 | if (cur) { | 340 | if (lip) |
351 | next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); | 341 | list_splice(list, &lip->li_ail); |
352 | cur->item = next_lip; | 342 | else |
353 | } | 343 | list_splice(list, &ailp->xa_ail); |
354 | list_splice(list, &lip->li_ail); | ||
355 | } | 344 | } |
356 | 345 | ||
357 | /* | 346 | /* |
@@ -682,6 +671,7 @@ xfs_trans_ail_update_bulk( | |||
682 | int i; | 671 | int i; |
683 | LIST_HEAD(tmp); | 672 | LIST_HEAD(tmp); |
684 | 673 | ||
674 | ASSERT(nr_items > 0); /* Not required, but true. */ | ||
685 | mlip = xfs_ail_min(ailp); | 675 | mlip = xfs_ail_min(ailp); |
686 | 676 | ||
687 | for (i = 0; i < nr_items; i++) { | 677 | for (i = 0; i < nr_items; i++) { |
@@ -701,7 +691,8 @@ xfs_trans_ail_update_bulk( | |||
701 | list_add(&lip->li_ail, &tmp); | 691 | list_add(&lip->li_ail, &tmp); |
702 | } | 692 | } |
703 | 693 | ||
704 | xfs_ail_splice(ailp, cur, &tmp, lsn); | 694 | if (!list_empty(&tmp)) |
695 | xfs_ail_splice(ailp, cur, &tmp, lsn); | ||
705 | 696 | ||
706 | if (!mlip_changed) { | 697 | if (!mlip_changed) { |
707 | spin_unlock(&ailp->xa_lock); | 698 | spin_unlock(&ailp->xa_lock); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 15584fc3ed7d..137e2b9e2948 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -54,7 +54,7 @@ xfs_trans_buf_item_match( | |||
54 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { | 54 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { |
55 | blip = (struct xfs_buf_log_item *)lidp->lid_item; | 55 | blip = (struct xfs_buf_log_item *)lidp->lid_item; |
56 | if (blip->bli_item.li_type == XFS_LI_BUF && | 56 | if (blip->bli_item.li_type == XFS_LI_BUF && |
57 | XFS_BUF_TARGET(blip->bli_buf) == target && | 57 | blip->bli_buf->b_target == target && |
58 | XFS_BUF_ADDR(blip->bli_buf) == blkno && | 58 | XFS_BUF_ADDR(blip->bli_buf) == blkno && |
59 | XFS_BUF_COUNT(blip->bli_buf) == len) | 59 | XFS_BUF_COUNT(blip->bli_buf) == len) |
60 | return blip->bli_buf; | 60 | return blip->bli_buf; |
@@ -80,7 +80,6 @@ _xfs_trans_bjoin( | |||
80 | { | 80 | { |
81 | struct xfs_buf_log_item *bip; | 81 | struct xfs_buf_log_item *bip; |
82 | 82 | ||
83 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
84 | ASSERT(bp->b_transp == NULL); | 83 | ASSERT(bp->b_transp == NULL); |
85 | 84 | ||
86 | /* | 85 | /* |
@@ -194,7 +193,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, | |||
194 | return NULL; | 193 | return NULL; |
195 | } | 194 | } |
196 | 195 | ||
197 | ASSERT(!XFS_BUF_GETERROR(bp)); | 196 | ASSERT(!bp->b_error); |
198 | 197 | ||
199 | _xfs_trans_bjoin(tp, bp, 1); | 198 | _xfs_trans_bjoin(tp, bp, 1); |
200 | trace_xfs_trans_get_buf(bp->b_fspriv); | 199 | trace_xfs_trans_get_buf(bp->b_fspriv); |
@@ -293,10 +292,10 @@ xfs_trans_read_buf( | |||
293 | return (flags & XBF_TRYLOCK) ? | 292 | return (flags & XBF_TRYLOCK) ? |
294 | EAGAIN : XFS_ERROR(ENOMEM); | 293 | EAGAIN : XFS_ERROR(ENOMEM); |
295 | 294 | ||
296 | if (XFS_BUF_GETERROR(bp) != 0) { | 295 | if (bp->b_error) { |
296 | error = bp->b_error; | ||
297 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 297 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
298 | bp, blkno); | 298 | bp, blkno); |
299 | error = XFS_BUF_GETERROR(bp); | ||
300 | xfs_buf_relse(bp); | 299 | xfs_buf_relse(bp); |
301 | return error; | 300 | return error; |
302 | } | 301 | } |
@@ -330,7 +329,7 @@ xfs_trans_read_buf( | |||
330 | ASSERT(xfs_buf_islocked(bp)); | 329 | ASSERT(xfs_buf_islocked(bp)); |
331 | ASSERT(bp->b_transp == tp); | 330 | ASSERT(bp->b_transp == tp); |
332 | ASSERT(bp->b_fspriv != NULL); | 331 | ASSERT(bp->b_fspriv != NULL); |
333 | ASSERT((XFS_BUF_ISERROR(bp)) == 0); | 332 | ASSERT(!bp->b_error); |
334 | if (!(XFS_BUF_ISDONE(bp))) { | 333 | if (!(XFS_BUF_ISDONE(bp))) { |
335 | trace_xfs_trans_read_buf_io(bp, _RET_IP_); | 334 | trace_xfs_trans_read_buf_io(bp, _RET_IP_); |
336 | ASSERT(!XFS_BUF_ISASYNC(bp)); | 335 | ASSERT(!XFS_BUF_ISASYNC(bp)); |
@@ -386,10 +385,9 @@ xfs_trans_read_buf( | |||
386 | return (flags & XBF_TRYLOCK) ? | 385 | return (flags & XBF_TRYLOCK) ? |
387 | 0 : XFS_ERROR(ENOMEM); | 386 | 0 : XFS_ERROR(ENOMEM); |
388 | } | 387 | } |
389 | if (XFS_BUF_GETERROR(bp) != 0) { | 388 | if (bp->b_error) { |
390 | XFS_BUF_SUPER_STALE(bp); | 389 | error = bp->b_error; |
391 | error = XFS_BUF_GETERROR(bp); | 390 | XFS_BUF_SUPER_STALE(bp); |
392 | |||
393 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 391 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
394 | bp, blkno); | 392 | bp, blkno); |
395 | if (tp->t_flags & XFS_TRANS_DIRTY) | 393 | if (tp->t_flags & XFS_TRANS_DIRTY) |
@@ -430,7 +428,7 @@ shutdown_abort: | |||
430 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) | 428 | if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) |
431 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); | 429 | xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); |
432 | #endif | 430 | #endif |
433 | ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != | 431 | ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) != |
434 | (XBF_STALE|XBF_DELWRI)); | 432 | (XBF_STALE|XBF_DELWRI)); |
435 | 433 | ||
436 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); | 434 | trace_xfs_trans_read_buf_shut(bp, _RET_IP_); |
@@ -581,7 +579,6 @@ xfs_trans_bhold(xfs_trans_t *tp, | |||
581 | { | 579 | { |
582 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 580 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
583 | 581 | ||
584 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
585 | ASSERT(bp->b_transp == tp); | 582 | ASSERT(bp->b_transp == tp); |
586 | ASSERT(bip != NULL); | 583 | ASSERT(bip != NULL); |
587 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 584 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
@@ -602,7 +599,6 @@ xfs_trans_bhold_release(xfs_trans_t *tp, | |||
602 | { | 599 | { |
603 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 600 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
604 | 601 | ||
605 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
606 | ASSERT(bp->b_transp == tp); | 602 | ASSERT(bp->b_transp == tp); |
607 | ASSERT(bip != NULL); | 603 | ASSERT(bip != NULL); |
608 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); | 604 | ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); |
@@ -631,7 +627,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
631 | { | 627 | { |
632 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 628 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
633 | 629 | ||
634 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
635 | ASSERT(bp->b_transp == tp); | 630 | ASSERT(bp->b_transp == tp); |
636 | ASSERT(bip != NULL); | 631 | ASSERT(bip != NULL); |
637 | ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); | 632 | ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); |
@@ -702,7 +697,6 @@ xfs_trans_binval( | |||
702 | { | 697 | { |
703 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 698 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
704 | 699 | ||
705 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
706 | ASSERT(bp->b_transp == tp); | 700 | ASSERT(bp->b_transp == tp); |
707 | ASSERT(bip != NULL); | 701 | ASSERT(bip != NULL); |
708 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 702 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -774,7 +768,6 @@ xfs_trans_inode_buf( | |||
774 | { | 768 | { |
775 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 769 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
776 | 770 | ||
777 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
778 | ASSERT(bp->b_transp == tp); | 771 | ASSERT(bp->b_transp == tp); |
779 | ASSERT(bip != NULL); | 772 | ASSERT(bip != NULL); |
780 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 773 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -798,7 +791,6 @@ xfs_trans_stale_inode_buf( | |||
798 | { | 791 | { |
799 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 792 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
800 | 793 | ||
801 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
802 | ASSERT(bp->b_transp == tp); | 794 | ASSERT(bp->b_transp == tp); |
803 | ASSERT(bip != NULL); | 795 | ASSERT(bip != NULL); |
804 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 796 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -823,7 +815,6 @@ xfs_trans_inode_alloc_buf( | |||
823 | { | 815 | { |
824 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 816 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
825 | 817 | ||
826 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
827 | ASSERT(bp->b_transp == tp); | 818 | ASSERT(bp->b_transp == tp); |
828 | ASSERT(bip != NULL); | 819 | ASSERT(bip != NULL); |
829 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 820 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
@@ -851,7 +842,6 @@ xfs_trans_dquot_buf( | |||
851 | { | 842 | { |
852 | xfs_buf_log_item_t *bip = bp->b_fspriv; | 843 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
853 | 844 | ||
854 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
855 | ASSERT(bp->b_transp == tp); | 845 | ASSERT(bp->b_transp == tp); |
856 | ASSERT(bip != NULL); | 846 | ASSERT(bip != NULL); |
857 | ASSERT(type == XFS_BLF_UDQUOT_BUF || | 847 | ASSERT(type == XFS_BLF_UDQUOT_BUF || |
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 4d00ee67792d..4d00ee67792d 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/xfs_vnode.h index 7c220b4227bc..7c220b4227bc 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h | |||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 88d121486c52..51fc429527bc 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -83,7 +83,9 @@ xfs_readlink_bmap( | |||
83 | 83 | ||
84 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), | 84 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), |
85 | XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); | 85 | XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); |
86 | error = XFS_BUF_GETERROR(bp); | 86 | if (!bp) |
87 | return XFS_ERROR(ENOMEM); | ||
88 | error = bp->b_error; | ||
87 | if (error) { | 89 | if (error) { |
88 | xfs_ioerror_alert("xfs_readlink", | 90 | xfs_ioerror_alert("xfs_readlink", |
89 | ip->i_mount, bp, XFS_BUF_ADDR(bp)); | 91 | ip->i_mount, bp, XFS_BUF_ADDR(bp)); |
@@ -94,7 +96,7 @@ xfs_readlink_bmap( | |||
94 | byte_cnt = pathlen; | 96 | byte_cnt = pathlen; |
95 | pathlen -= byte_cnt; | 97 | pathlen -= byte_cnt; |
96 | 98 | ||
97 | memcpy(link, XFS_BUF_PTR(bp), byte_cnt); | 99 | memcpy(link, bp->b_addr, byte_cnt); |
98 | xfs_buf_relse(bp); | 100 | xfs_buf_relse(bp); |
99 | } | 101 | } |
100 | 102 | ||
@@ -121,7 +123,7 @@ xfs_readlink( | |||
121 | 123 | ||
122 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 124 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
123 | 125 | ||
124 | ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); | 126 | ASSERT(S_ISLNK(ip->i_d.di_mode)); |
125 | ASSERT(ip->i_d.di_size <= MAXPATHLEN); | 127 | ASSERT(ip->i_d.di_size <= MAXPATHLEN); |
126 | 128 | ||
127 | pathlen = ip->i_d.di_size; | 129 | pathlen = ip->i_d.di_size; |
@@ -529,7 +531,7 @@ xfs_release( | |||
529 | if (ip->i_d.di_nlink == 0) | 531 | if (ip->i_d.di_nlink == 0) |
530 | return 0; | 532 | return 0; |
531 | 533 | ||
532 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 534 | if ((S_ISREG(ip->i_d.di_mode) && |
533 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | 535 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
534 | ip->i_delayed_blks > 0)) && | 536 | ip->i_delayed_blks > 0)) && |
535 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | 537 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && |
@@ -610,7 +612,7 @@ xfs_inactive( | |||
610 | truncate = ((ip->i_d.di_nlink == 0) && | 612 | truncate = ((ip->i_d.di_nlink == 0) && |
611 | ((ip->i_d.di_size != 0) || (ip->i_size != 0) || | 613 | ((ip->i_d.di_size != 0) || (ip->i_size != 0) || |
612 | (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && | 614 | (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && |
613 | ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); | 615 | S_ISREG(ip->i_d.di_mode)); |
614 | 616 | ||
615 | mp = ip->i_mount; | 617 | mp = ip->i_mount; |
616 | 618 | ||
@@ -621,7 +623,7 @@ xfs_inactive( | |||
621 | goto out; | 623 | goto out; |
622 | 624 | ||
623 | if (ip->i_d.di_nlink != 0) { | 625 | if (ip->i_d.di_nlink != 0) { |
624 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 626 | if ((S_ISREG(ip->i_d.di_mode) && |
625 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | 627 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || |
626 | ip->i_delayed_blks > 0)) && | 628 | ip->i_delayed_blks > 0)) && |
627 | (ip->i_df.if_flags & XFS_IFEXTENTS) && | 629 | (ip->i_df.if_flags & XFS_IFEXTENTS) && |
@@ -669,7 +671,7 @@ xfs_inactive( | |||
669 | xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 671 | xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); |
670 | return VN_INACTIVE_CACHE; | 672 | return VN_INACTIVE_CACHE; |
671 | } | 673 | } |
672 | } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { | 674 | } else if (S_ISLNK(ip->i_d.di_mode)) { |
673 | 675 | ||
674 | /* | 676 | /* |
675 | * If we get an error while cleaning up a | 677 | * If we get an error while cleaning up a |
@@ -1648,13 +1650,13 @@ xfs_symlink( | |||
1648 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); | 1650 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); |
1649 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | 1651 | bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, |
1650 | BTOBB(byte_cnt), 0); | 1652 | BTOBB(byte_cnt), 0); |
1651 | ASSERT(bp && !XFS_BUF_GETERROR(bp)); | 1653 | ASSERT(!xfs_buf_geterror(bp)); |
1652 | if (pathlen < byte_cnt) { | 1654 | if (pathlen < byte_cnt) { |
1653 | byte_cnt = pathlen; | 1655 | byte_cnt = pathlen; |
1654 | } | 1656 | } |
1655 | pathlen -= byte_cnt; | 1657 | pathlen -= byte_cnt; |
1656 | 1658 | ||
1657 | memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); | 1659 | memcpy(bp->b_addr, cur_chunk, byte_cnt); |
1658 | cur_chunk += byte_cnt; | 1660 | cur_chunk += byte_cnt; |
1659 | 1661 | ||
1660 | xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); | 1662 | xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); |
@@ -1999,7 +2001,7 @@ xfs_zero_remaining_bytes( | |||
1999 | mp, bp, XFS_BUF_ADDR(bp)); | 2001 | mp, bp, XFS_BUF_ADDR(bp)); |
2000 | break; | 2002 | break; |
2001 | } | 2003 | } |
2002 | memset(XFS_BUF_PTR(bp) + | 2004 | memset(bp->b_addr + |
2003 | (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), | 2005 | (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), |
2004 | 0, lastoffset - offset + 1); | 2006 | 0, lastoffset - offset + 1); |
2005 | XFS_BUF_UNDONE(bp); | 2007 | XFS_BUF_UNDONE(bp); |
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 87d3e03878c8..87d3e03878c8 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c | |||