aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig3
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/Makefile1
-rw-r--r--fs/befs/linuxvfs.c14
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/ceph/Kconfig3
-rw-r--r--fs/cifs/Kconfig15
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/crypto/crypto.c1
-rw-r--r--fs/crypto/fname.c140
-rw-r--r--fs/crypto/fscrypt_private.h31
-rw-r--r--fs/crypto/hooks.c158
-rw-r--r--fs/crypto/keyinfo.c17
-rw-r--r--fs/dax.c30
-rw-r--r--fs/dcache.c71
-rw-r--r--fs/devpts/inode.c4
-rw-r--r--fs/direct-io.c24
-rw-r--r--fs/eventfd.c127
-rw-r--r--fs/exofs/super.c7
-rw-r--r--fs/ext2/Kconfig6
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/super.c12
-rw-r--r--fs/ext4/Kconfig3
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/block_validity.c6
-rw-r--r--fs/ext4/ext4.h22
-rw-r--r--fs/ext4/ext4_extents.h14
-rw-r--r--fs/ext4/ext4_jbd2.h5
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/extents_status.h2
-rw-r--r--fs/ext4/file.c10
-rw-r--r--fs/ext4/fsmap.c15
-rw-r--r--fs/ext4/fsmap.h15
-rw-r--r--fs/ext4/hash.c6
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c16
-rw-r--r--fs/ext4/mballoc.c28
-rw-r--r--fs/ext4/mballoc.h2
-rw-r--r--fs/ext4/migrate.c9
-rw-r--r--fs/ext4/move_extent.c10
-rw-r--r--fs/ext4/namei.c64
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c25
-rw-r--r--fs/ext4/symlink.c43
-rw-r--r--fs/ext4/sysfs.c65
-rw-r--r--fs/ext4/truncate.h2
-rw-r--r--fs/ext4/xattr.h2
-rw-r--r--fs/f2fs/Kconfig6
-rw-r--r--fs/f2fs/inode.c2
-rw-r--r--fs/f2fs/namei.c132
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/fhandle.c3
-rw-r--r--fs/file.c5
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/freevxfs/vxfs_super.c8
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/aops.c35
-rw-r--r--fs/gfs2/bmap.c583
-rw-r--r--fs/gfs2/bmap.h1
-rw-r--r--fs/gfs2/dir.c3
-rw-r--r--fs/gfs2/file.c23
-rw-r--r--fs/gfs2/glock.c70
-rw-r--r--fs/gfs2/glops.c19
-rw-r--r--fs/gfs2/incore.h6
-rw-r--r--fs/gfs2/inode.c11
-rw-r--r--fs/gfs2/lock_dlm.c4
-rw-r--r--fs/gfs2/log.c124
-rw-r--r--fs/gfs2/log.h10
-rw-r--r--fs/gfs2/lops.c18
-rw-r--r--fs/gfs2/lops.h3
-rw-r--r--fs/gfs2/main.c90
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/quota.c3
-rw-r--r--fs/gfs2/recovery.c110
-rw-r--r--fs/gfs2/rgrp.c38
-rw-r--r--fs/gfs2/super.c19
-rw-r--r--fs/gfs2/sys.c4
-rw-r--r--fs/gfs2/trace_gfs2.h11
-rw-r--r--fs/gfs2/trans.c4
-rw-r--r--fs/hfsplus/Kconfig3
-rw-r--r--fs/hugetlbfs/inode.c39
-rw-r--r--fs/iomap.c14
-rw-r--r--fs/jbd2/checkpoint.c5
-rw-r--r--fs/jbd2/commit.c5
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--fs/jbd2/revoke.c5
-rw-r--r--fs/jbd2/transaction.c10
-rw-r--r--fs/jffs2/Kconfig6
-rw-r--r--fs/jffs2/fs.c1
-rw-r--r--fs/jfs/Kconfig3
-rw-r--r--fs/jfs/super.c8
-rw-r--r--fs/mbcache.c8
-rw-r--r--fs/namei.c71
-rw-r--r--fs/ncpfs/Kconfig108
-rw-r--r--fs/ncpfs/Makefile17
-rw-r--r--fs/ncpfs/dir.c1232
-rw-r--r--fs/ncpfs/file.c263
-rw-r--r--fs/ncpfs/getopt.c76
-rw-r--r--fs/ncpfs/getopt.h17
-rw-r--r--fs/ncpfs/inode.c1066
-rw-r--r--fs/ncpfs/ioctl.c923
-rw-r--r--fs/ncpfs/mmap.c125
-rw-r--r--fs/ncpfs/ncp_fs.h101
-rw-r--r--fs/ncpfs/ncp_fs_i.h31
-rw-r--r--fs/ncpfs/ncp_fs_sb.h174
-rw-r--r--fs/ncpfs/ncplib_kernel.c1322
-rw-r--r--fs/ncpfs/ncplib_kernel.h215
-rw-r--r--fs/ncpfs/ncpsign_kernel.c128
-rw-r--r--fs/ncpfs/ncpsign_kernel.h27
-rw-r--r--fs/ncpfs/sock.c855
-rw-r--r--fs/ncpfs/symlink.c182
-rw-r--r--fs/nfs/nfs4file.c1
-rw-r--r--fs/nsfs.c29
-rw-r--r--fs/ocfs2/acl.c6
-rw-r--r--fs/ocfs2/alloc.c261
-rw-r--r--fs/ocfs2/alloc.h1
-rw-r--r--fs/ocfs2/aops.c10
-rw-r--r--fs/ocfs2/cluster/quorum.c5
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h2
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c7
-rw-r--r--fs/ocfs2/dlmglue.c136
-rw-r--r--fs/ocfs2/dlmglue.h35
-rw-r--r--fs/ocfs2/extent_map.c45
-rw-r--r--fs/ocfs2/extent_map.h3
-rw-r--r--fs/ocfs2/file.c101
-rw-r--r--fs/ocfs2/journal.c23
-rw-r--r--fs/ocfs2/mmap.c2
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/ocfs2_trace.h10
-rw-r--r--fs/ocfs2/suballoc.c8
-rw-r--r--fs/ocfs2/super.c13
-rw-r--r--fs/ocfs2/xattr.c5
-rw-r--r--fs/orangefs/super.c15
-rw-r--r--fs/proc/task_mmu.c19
-rw-r--r--fs/pstore/platform.c1
-rw-r--r--fs/reiserfs/Kconfig6
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/dir.c9
-rw-r--r--fs/sysfs/file.c8
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/sysfs/symlink.c3
-rw-r--r--fs/sysfs/sysfs.h3
-rw-r--r--fs/ubifs/dir.c63
-rw-r--r--fs/ubifs/file.c36
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--fs/ufs/super.c13
-rw-r--r--fs/userfaultfd.c75
-rw-r--r--fs/xfs/Kconfig3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c124
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h10
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c47
-rw-r--r--fs/xfs/libxfs/xfs_attr.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c148
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h1
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c104
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c120
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c58
-rw-r--r--fs/xfs/libxfs/xfs_btree.c117
-rw-r--r--fs/xfs/libxfs/xfs_btree.h16
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c70
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h6
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c5
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c39
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c208
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c89
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c89
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h12
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c30
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c174
-rw-r--r--fs/xfs/libxfs/xfs_fs.h7
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c143
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c65
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c128
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h4
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c152
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h14
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h9
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c19
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h3
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c40
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c67
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h5
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c40
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c21
-rw-r--r--fs/xfs/libxfs/xfs_sb.c113
-rw-r--r--fs/xfs/libxfs/xfs_sb.h4
-rw-r--r--fs/xfs/libxfs/xfs_shared.h4
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c75
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c199
-rw-r--r--fs/xfs/scrub/agheader.c340
-rw-r--r--fs/xfs/scrub/alloc.c81
-rw-r--r--fs/xfs/scrub/bmap.c219
-rw-r--r--fs/xfs/scrub/btree.c184
-rw-r--r--fs/xfs/scrub/btree.h9
-rw-r--r--fs/xfs/scrub/common.c255
-rw-r--r--fs/xfs/scrub/common.h23
-rw-r--r--fs/xfs/scrub/dabtree.c22
-rw-r--r--fs/xfs/scrub/dir.c44
-rw-r--r--fs/xfs/scrub/ialloc.c194
-rw-r--r--fs/xfs/scrub/inode.c178
-rw-r--r--fs/xfs/scrub/parent.c8
-rw-r--r--fs/xfs/scrub/quota.c7
-rw-r--r--fs/xfs/scrub/refcount.c420
-rw-r--r--fs/xfs/scrub/rmap.c123
-rw-r--r--fs/xfs/scrub/rtbitmap.c35
-rw-r--r--fs/xfs/scrub/scrub.c203
-rw-r--r--fs/xfs/scrub/scrub.h37
-rw-r--r--fs/xfs/scrub/trace.h44
-rw-r--r--fs/xfs/xfs_aops.c15
-rw-r--r--fs/xfs/xfs_bmap_util.c4
-rw-r--r--fs/xfs/xfs_buf.c22
-rw-r--r--fs/xfs/xfs_buf.h8
-rw-r--r--fs/xfs/xfs_buf_item.c156
-rw-r--r--fs/xfs/xfs_buf_item.h7
-rw-r--r--fs/xfs/xfs_dir2_readdir.c4
-rw-r--r--fs/xfs/xfs_dquot.c62
-rw-r--r--fs/xfs/xfs_dquot_item.c9
-rw-r--r--fs/xfs/xfs_error.c64
-rw-r--r--fs/xfs/xfs_error.h14
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_fsops.c79
-rw-r--r--fs/xfs/xfs_fsops.h1
-rw-r--r--fs/xfs/xfs_icache.c70
-rw-r--r--fs/xfs/xfs_inode.c104
-rw-r--r--fs/xfs/xfs_inode.h5
-rw-r--r--fs/xfs/xfs_inode_item.c41
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_linux.h14
-rw-r--r--fs/xfs/xfs_log.c17
-rw-r--r--fs/xfs/xfs_log_recover.c58
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_qm.c33
-rw-r--r--fs/xfs/xfs_reflink.c95
-rw-r--r--fs/xfs/xfs_rtalloc.h4
-rw-r--r--fs/xfs/xfs_super.c14
-rw-r--r--fs/xfs/xfs_trace.h68
-rw-r--r--fs/xfs/xfs_trans.c22
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c98
252 files changed, 6676 insertions, 9929 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 6489e1fc1afd..11045d8e356a 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -25,9 +25,6 @@ config 9P_FS_POSIX_ACL
25 POSIX Access Control Lists (ACLs) support permissions for users and 25 POSIX Access Control Lists (ACLs) support permissions for users and
26 groups beyond the owner/group/world scheme. 26 groups beyond the owner/group/world scheme.
27 27
28 To learn more about Access Control Lists, visit the POSIX ACLs for
29 Linux website <http://acl.bestbits.at/>.
30
31 If you don't know what Access Control Lists are, say N 28 If you don't know what Access Control Lists are, say N
32 29
33endif 30endif
diff --git a/fs/Kconfig b/fs/Kconfig
index 7aee6d699fd6..9774588da60e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -167,17 +167,13 @@ config TMPFS_POSIX_ACL
167 files for sound to work properly. In short, if you're not sure, 167 files for sound to work properly. In short, if you're not sure,
168 say Y. 168 say Y.
169 169
170 To learn more about Access Control Lists, visit the POSIX ACLs for
171 Linux website <http://acl.bestbits.at/>.
172
173config TMPFS_XATTR 170config TMPFS_XATTR
174 bool "Tmpfs extended attributes" 171 bool "Tmpfs extended attributes"
175 depends on TMPFS 172 depends on TMPFS
176 default n 173 default n
177 help 174 help
178 Extended attributes are name:value pairs associated with inodes by 175 Extended attributes are name:value pairs associated with inodes by
179 the kernel or by users (see the attr(5) manual page, or visit 176 the kernel or by users (see the attr(5) manual page for details).
180 <http://acl.bestbits.at/> for details).
181 177
182 Currently this enables support for the trusted.* and 178 Currently this enables support for the trusted.* and
183 security.* namespaces. 179 security.* namespaces.
@@ -298,7 +294,6 @@ config NFS_COMMON
298source "net/sunrpc/Kconfig" 294source "net/sunrpc/Kconfig"
299source "fs/ceph/Kconfig" 295source "fs/ceph/Kconfig"
300source "fs/cifs/Kconfig" 296source "fs/cifs/Kconfig"
301source "fs/ncpfs/Kconfig"
302source "fs/coda/Kconfig" 297source "fs/coda/Kconfig"
303source "fs/afs/Kconfig" 298source "fs/afs/Kconfig"
304source "fs/9p/Kconfig" 299source "fs/9p/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index ef772f1eaff8..add789ea270a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -92,7 +92,6 @@ obj-$(CONFIG_LOCKD) += lockd/
92obj-$(CONFIG_NLS) += nls/ 92obj-$(CONFIG_NLS) += nls/
93obj-$(CONFIG_SYSV_FS) += sysv/ 93obj-$(CONFIG_SYSV_FS) += sysv/
94obj-$(CONFIG_CIFS) += cifs/ 94obj-$(CONFIG_CIFS) += cifs/
95obj-$(CONFIG_NCP_FS) += ncpfs/
96obj-$(CONFIG_HPFS_FS) += hpfs/ 95obj-$(CONFIG_HPFS_FS) += hpfs/
97obj-$(CONFIG_NTFS_FS) += ntfs/ 96obj-$(CONFIG_NTFS_FS) += ntfs/
98obj-$(CONFIG_UFS_FS) += ufs/ 97obj-$(CONFIG_UFS_FS) += ufs/
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index ee236231cafa..af2832aaeec5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -444,11 +444,15 @@ unacquire_none:
444static int __init 444static int __init
445befs_init_inodecache(void) 445befs_init_inodecache(void)
446{ 446{
447 befs_inode_cachep = kmem_cache_create("befs_inode_cache", 447 befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache",
448 sizeof (struct befs_inode_info), 448 sizeof(struct befs_inode_info), 0,
449 0, (SLAB_RECLAIM_ACCOUNT| 449 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
450 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 450 SLAB_ACCOUNT),
451 init_once); 451 offsetof(struct befs_inode_info,
452 i_data.symlink),
453 sizeof_field(struct befs_inode_info,
454 i_data.symlink),
455 init_once);
452 if (befs_inode_cachep == NULL) 456 if (befs_inode_cachep == NULL)
453 return -ENOMEM; 457 return -ENOMEM;
454 458
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 2e558227931a..273351ee4c46 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -38,9 +38,6 @@ config BTRFS_FS_POSIX_ACL
38 POSIX Access Control Lists (ACLs) support permissions for users and 38 POSIX Access Control Lists (ACLs) support permissions for users and
39 groups beyond the owner/group/world scheme. 39 groups beyond the owner/group/world scheme.
40 40
41 To learn more about Access Control Lists, visit the POSIX ACLs for
42 Linux website <http://acl.bestbits.at/>.
43
44 If you don't know what Access Control Lists are, say N 41 If you don't know what Access Control Lists are, say N
45 42
46config BTRFS_FS_CHECK_INTEGRITY 43config BTRFS_FS_CHECK_INTEGRITY
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ed095202942f..21f34ad0d411 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,6 +30,7 @@
30#include <linux/ratelimit.h> 30#include <linux/ratelimit.h>
31#include <linux/uuid.h> 31#include <linux/uuid.h>
32#include <linux/semaphore.h> 32#include <linux/semaphore.h>
33#include <linux/error-injection.h>
33#include <asm/unaligned.h> 34#include <asm/unaligned.h>
34#include "ctree.h" 35#include "ctree.h"
35#include "disk-io.h" 36#include "disk-io.h"
@@ -3115,6 +3116,7 @@ recovery_tree_root:
3115 goto fail_block_groups; 3116 goto fail_block_groups;
3116 goto retry_root_backup; 3117 goto retry_root_backup;
3117} 3118}
3119ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
3118 3120
3119static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 3121static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
3120{ 3122{
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 014f3c090231..a9f22ac50d6a 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -22,6 +22,7 @@
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/math64.h> 23#include <linux/math64.h>
24#include <linux/ratelimit.h> 24#include <linux/ratelimit.h>
25#include <linux/error-injection.h>
25#include "ctree.h" 26#include "ctree.h"
26#include "free-space-cache.h" 27#include "free-space-cache.h"
27#include "transaction.h" 28#include "transaction.h"
@@ -332,6 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
332 333
333 return 0; 334 return 0;
334} 335}
336ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
335 337
336static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 338static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
337{ 339{
diff --git a/fs/buffer.c b/fs/buffer.c
index 8b26295a56fe..9a73924db22f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -53,13 +53,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
53 53
54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
55 55
56void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
57{
58 bh->b_end_io = handler;
59 bh->b_private = private;
60}
61EXPORT_SYMBOL(init_buffer);
62
63inline void touch_buffer(struct buffer_head *bh) 56inline void touch_buffer(struct buffer_head *bh)
64{ 57{
65 trace_block_touch_buffer(bh); 58 trace_block_touch_buffer(bh);
@@ -922,7 +915,8 @@ init_page_buffers(struct page *page, struct block_device *bdev,
922 915
923 do { 916 do {
924 if (!buffer_mapped(bh)) { 917 if (!buffer_mapped(bh)) {
925 init_buffer(bh, NULL, NULL); 918 bh->b_end_io = NULL;
919 bh->b_private = NULL;
926 bh->b_bdev = bdev; 920 bh->b_bdev = bdev;
927 bh->b_blocknr = block; 921 bh->b_blocknr = block;
928 if (uptodate) 922 if (uptodate)
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 264e9bf83ff3..52095f473464 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -34,7 +34,4 @@ config CEPH_FS_POSIX_ACL
34 POSIX Access Control Lists (ACLs) support permissions for users and 34 POSIX Access Control Lists (ACLs) support permissions for users and
35 groups beyond the owner/group/world scheme. 35 groups beyond the owner/group/world scheme.
36 36
37 To learn more about Access Control Lists, visit the POSIX ACLs for
38 Linux website <http://acl.bestbits.at/>.
39
40 If you don't know what Access Control Lists are, say N 37 If you don't know what Access Control Lists are, say N
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index c71971c01c63..687da62daf4e 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -108,14 +108,13 @@ config CIFS_XATTR
108 depends on CIFS 108 depends on CIFS
109 help 109 help
110 Extended attributes are name:value pairs associated with inodes by 110 Extended attributes are name:value pairs associated with inodes by
111 the kernel or by users (see the attr(5) manual page, or visit 111 the kernel or by users (see the attr(5) manual page for details).
112 <http://acl.bestbits.at/> for details). CIFS maps the name of 112 CIFS maps the name of extended attributes beginning with the user
113 extended attributes beginning with the user namespace prefix 113 namespace prefix to SMB/CIFS EAs. EAs are stored on Windows
114 to SMB/CIFS EAs. EAs are stored on Windows servers without the 114 servers without the user namespace prefix, but their names are
115 user namespace prefix, but their names are seen by Linux cifs clients 115 seen by Linux cifs clients prefaced by the user namespace prefix.
116 prefaced by the user namespace prefix. The system namespace 116 The system namespace (used by some filesystems to store ACLs) is
117 (used by some filesystems to store ACLs) is not supported at 117 not supported at this time.
118 this time.
119 118
120 If unsure, say Y. 119 If unsure, say Y.
121 120
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a7be591d8e18..32cdea67bbfd 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1239,9 +1239,11 @@ cifs_init_request_bufs(void)
1239 cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n", 1239 cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n",
1240 CIFSMaxBufSize, CIFSMaxBufSize); 1240 CIFSMaxBufSize, CIFSMaxBufSize);
1241*/ 1241*/
1242 cifs_req_cachep = kmem_cache_create("cifs_request", 1242 cifs_req_cachep = kmem_cache_create_usercopy("cifs_request",
1243 CIFSMaxBufSize + max_hdr_size, 0, 1243 CIFSMaxBufSize + max_hdr_size, 0,
1244 SLAB_HWCACHE_ALIGN, NULL); 1244 SLAB_HWCACHE_ALIGN, 0,
1245 CIFSMaxBufSize + max_hdr_size,
1246 NULL);
1245 if (cifs_req_cachep == NULL) 1247 if (cifs_req_cachep == NULL)
1246 return -ENOMEM; 1248 return -ENOMEM;
1247 1249
@@ -1267,9 +1269,9 @@ cifs_init_request_bufs(void)
1267 more SMBs to use small buffer alloc and is still much more 1269 more SMBs to use small buffer alloc and is still much more
1268 efficient to alloc 1 per page off the slab compared to 17K (5page) 1270 efficient to alloc 1 per page off the slab compared to 17K (5page)
1269 alloc of large cifs buffers even when page debugging is on */ 1271 alloc of large cifs buffers even when page debugging is on */
1270 cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", 1272 cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq",
1271 MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, 1273 MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN,
1272 NULL); 1274 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL);
1273 if (cifs_sm_req_cachep == NULL) { 1275 if (cifs_sm_req_cachep == NULL) {
1274 mempool_destroy(cifs_req_poolp); 1276 mempool_destroy(cifs_req_poolp);
1275 kmem_cache_destroy(cifs_req_cachep); 1277 kmem_cache_destroy(cifs_req_cachep);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 732a786cce9d..ce654526c0fb 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -27,6 +27,7 @@
27#include <linux/dcache.h> 27#include <linux/dcache.h>
28#include <linux/namei.h> 28#include <linux/namei.h>
29#include <crypto/aes.h> 29#include <crypto/aes.h>
30#include <crypto/skcipher.h>
30#include "fscrypt_private.h" 31#include "fscrypt_private.h"
31 32
32static unsigned int num_prealloc_crypto_pages = 32; 33static unsigned int num_prealloc_crypto_pages = 32;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 305541bcd108..e33f3d3c5ade 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -13,42 +13,46 @@
13 13
14#include <linux/scatterlist.h> 14#include <linux/scatterlist.h>
15#include <linux/ratelimit.h> 15#include <linux/ratelimit.h>
16#include <crypto/skcipher.h>
16#include "fscrypt_private.h" 17#include "fscrypt_private.h"
17 18
19static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
20{
21 if (str->len == 1 && str->name[0] == '.')
22 return true;
23
24 if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
25 return true;
26
27 return false;
28}
29
18/** 30/**
19 * fname_encrypt() - encrypt a filename 31 * fname_encrypt() - encrypt a filename
20 * 32 *
21 * The caller must have allocated sufficient memory for the @oname string. 33 * The output buffer must be at least as large as the input buffer.
34 * Any extra space is filled with NUL padding before encryption.
22 * 35 *
23 * Return: 0 on success, -errno on failure 36 * Return: 0 on success, -errno on failure
24 */ 37 */
25static int fname_encrypt(struct inode *inode, 38int fname_encrypt(struct inode *inode, const struct qstr *iname,
26 const struct qstr *iname, struct fscrypt_str *oname) 39 u8 *out, unsigned int olen)
27{ 40{
28 struct skcipher_request *req = NULL; 41 struct skcipher_request *req = NULL;
29 DECLARE_CRYPTO_WAIT(wait); 42 DECLARE_CRYPTO_WAIT(wait);
30 struct fscrypt_info *ci = inode->i_crypt_info; 43 struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
31 struct crypto_skcipher *tfm = ci->ci_ctfm;
32 int res = 0; 44 int res = 0;
33 char iv[FS_CRYPTO_BLOCK_SIZE]; 45 char iv[FS_CRYPTO_BLOCK_SIZE];
34 struct scatterlist sg; 46 struct scatterlist sg;
35 int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
36 unsigned int lim;
37 unsigned int cryptlen;
38
39 lim = inode->i_sb->s_cop->max_namelen(inode);
40 if (iname->len <= 0 || iname->len > lim)
41 return -EIO;
42 47
43 /* 48 /*
44 * Copy the filename to the output buffer for encrypting in-place and 49 * Copy the filename to the output buffer for encrypting in-place and
45 * pad it with the needed number of NUL bytes. 50 * pad it with the needed number of NUL bytes.
46 */ 51 */
47 cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); 52 if (WARN_ON(olen < iname->len))
48 cryptlen = round_up(cryptlen, padding); 53 return -ENOBUFS;
49 cryptlen = min(cryptlen, lim); 54 memcpy(out, iname->name, iname->len);
50 memcpy(oname->name, iname->name, iname->len); 55 memset(out + iname->len, 0, olen - iname->len);
51 memset(oname->name + iname->len, 0, cryptlen - iname->len);
52 56
53 /* Initialize the IV */ 57 /* Initialize the IV */
54 memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); 58 memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
@@ -63,8 +67,8 @@ static int fname_encrypt(struct inode *inode,
63 skcipher_request_set_callback(req, 67 skcipher_request_set_callback(req,
64 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, 68 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
65 crypto_req_done, &wait); 69 crypto_req_done, &wait);
66 sg_init_one(&sg, oname->name, cryptlen); 70 sg_init_one(&sg, out, olen);
67 skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); 71 skcipher_request_set_crypt(req, &sg, &sg, olen, iv);
68 72
69 /* Do the encryption */ 73 /* Do the encryption */
70 res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); 74 res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
@@ -75,7 +79,6 @@ static int fname_encrypt(struct inode *inode,
75 return res; 79 return res;
76 } 80 }
77 81
78 oname->len = cryptlen;
79 return 0; 82 return 0;
80} 83}
81 84
@@ -188,50 +191,52 @@ static int digest_decode(const char *src, int len, char *dst)
188 return cp - dst; 191 return cp - dst;
189} 192}
190 193
191u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen) 194bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
195 u32 max_len, u32 *encrypted_len_ret)
192{ 196{
193 int padding = 32; 197 int padding = 4 << (inode->i_crypt_info->ci_flags &
194 struct fscrypt_info *ci = inode->i_crypt_info; 198 FS_POLICY_FLAGS_PAD_MASK);
195 199 u32 encrypted_len;
196 if (ci) 200
197 padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); 201 if (orig_len > max_len)
198 ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE); 202 return false;
199 return round_up(ilen, padding); 203 encrypted_len = max(orig_len, (u32)FS_CRYPTO_BLOCK_SIZE);
204 encrypted_len = round_up(encrypted_len, padding);
205 *encrypted_len_ret = min(encrypted_len, max_len);
206 return true;
200} 207}
201EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
202 208
203/** 209/**
204 * fscrypt_fname_crypto_alloc_obuff() - 210 * fscrypt_fname_alloc_buffer - allocate a buffer for presented filenames
211 *
212 * Allocate a buffer that is large enough to hold any decrypted or encoded
213 * filename (null-terminated), for the given maximum encrypted filename length.
205 * 214 *
206 * Allocates an output buffer that is sufficient for the crypto operation 215 * Return: 0 on success, -errno on failure
207 * specified by the context and the direction.
208 */ 216 */
209int fscrypt_fname_alloc_buffer(const struct inode *inode, 217int fscrypt_fname_alloc_buffer(const struct inode *inode,
210 u32 ilen, struct fscrypt_str *crypto_str) 218 u32 max_encrypted_len,
219 struct fscrypt_str *crypto_str)
211{ 220{
212 u32 olen = fscrypt_fname_encrypted_size(inode, ilen);
213 const u32 max_encoded_len = 221 const u32 max_encoded_len =
214 max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), 222 max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE),
215 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); 223 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name)));
224 u32 max_presented_len;
216 225
217 crypto_str->len = olen; 226 max_presented_len = max(max_encoded_len, max_encrypted_len);
218 olen = max(olen, max_encoded_len);
219 227
220 /* 228 crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS);
221 * Allocated buffer can hold one more character to null-terminate the 229 if (!crypto_str->name)
222 * string
223 */
224 crypto_str->name = kmalloc(olen + 1, GFP_NOFS);
225 if (!(crypto_str->name))
226 return -ENOMEM; 230 return -ENOMEM;
231 crypto_str->len = max_presented_len;
227 return 0; 232 return 0;
228} 233}
229EXPORT_SYMBOL(fscrypt_fname_alloc_buffer); 234EXPORT_SYMBOL(fscrypt_fname_alloc_buffer);
230 235
231/** 236/**
232 * fscrypt_fname_crypto_free_buffer() - 237 * fscrypt_fname_free_buffer - free the buffer for presented filenames
233 * 238 *
234 * Frees the buffer allocated for crypto operation. 239 * Free the buffer allocated by fscrypt_fname_alloc_buffer().
235 */ 240 */
236void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) 241void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
237{ 242{
@@ -298,35 +303,6 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
298EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); 303EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
299 304
300/** 305/**
301 * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk
302 * space
303 *
304 * The caller must have allocated sufficient memory for the @oname string.
305 *
306 * Return: 0 on success, -errno on failure
307 */
308int fscrypt_fname_usr_to_disk(struct inode *inode,
309 const struct qstr *iname,
310 struct fscrypt_str *oname)
311{
312 if (fscrypt_is_dot_dotdot(iname)) {
313 oname->name[0] = '.';
314 oname->name[iname->len - 1] = '.';
315 oname->len = iname->len;
316 return 0;
317 }
318 if (inode->i_crypt_info)
319 return fname_encrypt(inode, iname, oname);
320 /*
321 * Without a proper key, a user is not allowed to modify the filenames
322 * in a directory. Consequently, a user space name cannot be mapped to
323 * a disk-space name
324 */
325 return -ENOKEY;
326}
327EXPORT_SYMBOL(fscrypt_fname_usr_to_disk);
328
329/**
330 * fscrypt_setup_filename() - prepare to search a possibly encrypted directory 306 * fscrypt_setup_filename() - prepare to search a possibly encrypted directory
331 * @dir: the directory that will be searched 307 * @dir: the directory that will be searched
332 * @iname: the user-provided filename being searched for 308 * @iname: the user-provided filename being searched for
@@ -369,11 +345,17 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
369 return ret; 345 return ret;
370 346
371 if (dir->i_crypt_info) { 347 if (dir->i_crypt_info) {
372 ret = fscrypt_fname_alloc_buffer(dir, iname->len, 348 if (!fscrypt_fname_encrypted_size(dir, iname->len,
373 &fname->crypto_buf); 349 dir->i_sb->s_cop->max_namelen(dir),
374 if (ret) 350 &fname->crypto_buf.len))
375 return ret; 351 return -ENAMETOOLONG;
376 ret = fname_encrypt(dir, iname, &fname->crypto_buf); 352 fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,
353 GFP_NOFS);
354 if (!fname->crypto_buf.name)
355 return -ENOMEM;
356
357 ret = fname_encrypt(dir, iname, fname->crypto_buf.name,
358 fname->crypto_buf.len);
377 if (ret) 359 if (ret)
378 goto errout; 360 goto errout;
379 fname->disk_name.name = fname->crypto_buf.name; 361 fname->disk_name.name = fname->crypto_buf.name;
@@ -425,7 +407,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
425 return 0; 407 return 0;
426 408
427errout: 409errout:
428 fscrypt_fname_free_buffer(&fname->crypto_buf); 410 kfree(fname->crypto_buf.name);
429 return ret; 411 return ret;
430} 412}
431EXPORT_SYMBOL(fscrypt_setup_filename); 413EXPORT_SYMBOL(fscrypt_setup_filename);
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index c0b4f5597e1a..ad6722bae8b7 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -50,6 +50,15 @@ struct fscrypt_context {
50 50
51#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 51#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
52 52
53/**
54 * For encrypted symlinks, the ciphertext length is stored at the beginning
55 * of the string in little-endian format.
56 */
57struct fscrypt_symlink_data {
58 __le16 len;
59 char encrypted_path[1];
60} __packed;
61
53/* 62/*
54 * A pointer to this structure is stored in the file system's in-core 63 * A pointer to this structure is stored in the file system's in-core
55 * representation of an inode. 64 * representation of an inode.
@@ -71,7 +80,22 @@ typedef enum {
71#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 80#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
72#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 81#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
73 82
83static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
84 u32 filenames_mode)
85{
86 if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
87 filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
88 return true;
89
90 if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
91 filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
92 return true;
93
94 return false;
95}
96
74/* crypto.c */ 97/* crypto.c */
98extern struct kmem_cache *fscrypt_info_cachep;
75extern int fscrypt_initialize(unsigned int cop_flags); 99extern int fscrypt_initialize(unsigned int cop_flags);
76extern struct workqueue_struct *fscrypt_read_workqueue; 100extern struct workqueue_struct *fscrypt_read_workqueue;
77extern int fscrypt_do_page_crypto(const struct inode *inode, 101extern int fscrypt_do_page_crypto(const struct inode *inode,
@@ -83,6 +107,13 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
83extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, 107extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
84 gfp_t gfp_flags); 108 gfp_t gfp_flags);
85 109
110/* fname.c */
111extern int fname_encrypt(struct inode *inode, const struct qstr *iname,
112 u8 *out, unsigned int olen);
113extern bool fscrypt_fname_encrypted_size(const struct inode *inode,
114 u32 orig_len, u32 max_len,
115 u32 *encrypted_len_ret);
116
86/* keyinfo.c */ 117/* keyinfo.c */
87extern void __exit fscrypt_essiv_cleanup(void); 118extern void __exit fscrypt_essiv_cleanup(void);
88 119
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 9f5fb2eb9cf7..bec06490fb13 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -110,3 +110,161 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry)
110 return 0; 110 return 0;
111} 111}
112EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); 112EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup);
113
114int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len,
115 unsigned int max_len,
116 struct fscrypt_str *disk_link)
117{
118 int err;
119
120 /*
121 * To calculate the size of the encrypted symlink target we need to know
122 * the amount of NUL padding, which is determined by the flags set in
123 * the encryption policy which will be inherited from the directory.
124 * The easiest way to get access to this is to just load the directory's
125 * fscrypt_info, since we'll need it to create the dir_entry anyway.
126 *
127 * Note: in test_dummy_encryption mode, @dir may be unencrypted.
128 */
129 err = fscrypt_get_encryption_info(dir);
130 if (err)
131 return err;
132 if (!fscrypt_has_encryption_key(dir))
133 return -ENOKEY;
134
135 /*
136 * Calculate the size of the encrypted symlink and verify it won't
137 * exceed max_len. Note that for historical reasons, encrypted symlink
138 * targets are prefixed with the ciphertext length, despite this
139 * actually being redundant with i_size. This decreases by 2 bytes the
140 * longest symlink target we can accept.
141 *
142 * We could recover 1 byte by not counting a null terminator, but
143 * counting it (even though it is meaningless for ciphertext) is simpler
144 * for now since filesystems will assume it is there and subtract it.
145 */
146 if (!fscrypt_fname_encrypted_size(dir, len,
147 max_len - sizeof(struct fscrypt_symlink_data),
148 &disk_link->len))
149 return -ENAMETOOLONG;
150 disk_link->len += sizeof(struct fscrypt_symlink_data);
151
152 disk_link->name = NULL;
153 return 0;
154}
155EXPORT_SYMBOL_GPL(__fscrypt_prepare_symlink);
156
157int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
158 unsigned int len, struct fscrypt_str *disk_link)
159{
160 int err;
161 struct qstr iname = QSTR_INIT(target, len);
162 struct fscrypt_symlink_data *sd;
163 unsigned int ciphertext_len;
164
165 err = fscrypt_require_key(inode);
166 if (err)
167 return err;
168
169 if (disk_link->name) {
170 /* filesystem-provided buffer */
171 sd = (struct fscrypt_symlink_data *)disk_link->name;
172 } else {
173 sd = kmalloc(disk_link->len, GFP_NOFS);
174 if (!sd)
175 return -ENOMEM;
176 }
177 ciphertext_len = disk_link->len - sizeof(*sd);
178 sd->len = cpu_to_le16(ciphertext_len);
179
180 err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len);
181 if (err) {
182 if (!disk_link->name)
183 kfree(sd);
184 return err;
185 }
186 /*
187 * Null-terminating the ciphertext doesn't make sense, but we still
188 * count the null terminator in the length, so we might as well
189 * initialize it just in case the filesystem writes it out.
190 */
191 sd->encrypted_path[ciphertext_len] = '\0';
192
193 if (!disk_link->name)
194 disk_link->name = (unsigned char *)sd;
195 return 0;
196}
197EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink);
198
199/**
200 * fscrypt_get_symlink - get the target of an encrypted symlink
201 * @inode: the symlink inode
202 * @caddr: the on-disk contents of the symlink
203 * @max_size: size of @caddr buffer
204 * @done: if successful, will be set up to free the returned target
205 *
206 * If the symlink's encryption key is available, we decrypt its target.
207 * Otherwise, we encode its target for presentation.
208 *
209 * This may sleep, so the filesystem must have dropped out of RCU mode already.
210 *
211 * Return: the presentable symlink target or an ERR_PTR()
212 */
213const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
214 unsigned int max_size,
215 struct delayed_call *done)
216{
217 const struct fscrypt_symlink_data *sd;
218 struct fscrypt_str cstr, pstr;
219 int err;
220
221 /* This is for encrypted symlinks only */
222 if (WARN_ON(!IS_ENCRYPTED(inode)))
223 return ERR_PTR(-EINVAL);
224
225 /*
226 * Try to set up the symlink's encryption key, but we can continue
227 * regardless of whether the key is available or not.
228 */
229 err = fscrypt_get_encryption_info(inode);
230 if (err)
231 return ERR_PTR(err);
232
233 /*
234 * For historical reasons, encrypted symlink targets are prefixed with
235 * the ciphertext length, even though this is redundant with i_size.
236 */
237
238 if (max_size < sizeof(*sd))
239 return ERR_PTR(-EUCLEAN);
240 sd = caddr;
241 cstr.name = (unsigned char *)sd->encrypted_path;
242 cstr.len = le16_to_cpu(sd->len);
243
244 if (cstr.len == 0)
245 return ERR_PTR(-EUCLEAN);
246
247 if (cstr.len + sizeof(*sd) - 1 > max_size)
248 return ERR_PTR(-EUCLEAN);
249
250 err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
251 if (err)
252 return ERR_PTR(err);
253
254 err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
255 if (err)
256 goto err_kfree;
257
258 err = -EUCLEAN;
259 if (pstr.name[0] == '\0')
260 goto err_kfree;
261
262 pstr.name[pstr.len] = '\0';
263 set_delayed_call(done, kfree_link, pstr.name);
264 return pstr.name;
265
266err_kfree:
267 kfree(pstr.name);
268 return ERR_PTR(err);
269}
270EXPORT_SYMBOL_GPL(fscrypt_get_symlink);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 5e6e846f5a24..05f5ee1f0705 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -14,6 +14,7 @@
14#include <linux/ratelimit.h> 14#include <linux/ratelimit.h>
15#include <crypto/aes.h> 15#include <crypto/aes.h>
16#include <crypto/sha.h> 16#include <crypto/sha.h>
17#include <crypto/skcipher.h>
17#include "fscrypt_private.h" 18#include "fscrypt_private.h"
18 19
19static struct crypto_shash *essiv_hash_tfm; 20static struct crypto_shash *essiv_hash_tfm;
@@ -354,19 +355,9 @@ out:
354} 355}
355EXPORT_SYMBOL(fscrypt_get_encryption_info); 356EXPORT_SYMBOL(fscrypt_get_encryption_info);
356 357
357void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci) 358void fscrypt_put_encryption_info(struct inode *inode)
358{ 359{
359 struct fscrypt_info *prev; 360 put_crypt_info(inode->i_crypt_info);
360 361 inode->i_crypt_info = NULL;
361 if (ci == NULL)
362 ci = READ_ONCE(inode->i_crypt_info);
363 if (ci == NULL)
364 return;
365
366 prev = cmpxchg(&inode->i_crypt_info, ci, NULL);
367 if (prev != ci)
368 return;
369
370 put_crypt_info(ci);
371} 362}
372EXPORT_SYMBOL(fscrypt_put_encryption_info); 363EXPORT_SYMBOL(fscrypt_put_encryption_info);
diff --git a/fs/dax.c b/fs/dax.c
index 95981591977a..0276df90e86c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -44,6 +44,7 @@
44 44
45/* The 'colour' (ie low bits) within a PMD of a page offset. */ 45/* The 'colour' (ie low bits) within a PMD of a page offset. */
46#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) 46#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
47#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
47 48
48static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES]; 49static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
49 50
@@ -375,8 +376,8 @@ restart:
375 * unmapped. 376 * unmapped.
376 */ 377 */
377 if (pmd_downgrade && dax_is_zero_entry(entry)) 378 if (pmd_downgrade && dax_is_zero_entry(entry))
378 unmap_mapping_range(mapping, 379 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
379 (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0); 380 PG_PMD_NR, false);
380 381
381 err = radix_tree_preload( 382 err = radix_tree_preload(
382 mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM); 383 mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
@@ -538,12 +539,10 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
538 if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { 539 if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
539 /* we are replacing a zero page with block mapping */ 540 /* we are replacing a zero page with block mapping */
540 if (dax_is_pmd_entry(entry)) 541 if (dax_is_pmd_entry(entry))
541 unmap_mapping_range(mapping, 542 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
542 (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, 543 PG_PMD_NR, false);
543 PMD_SIZE, 0);
544 else /* pte entry */ 544 else /* pte entry */
545 unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT, 545 unmap_mapping_pages(mapping, vmf->pgoff, 1, false);
546 PAGE_SIZE, 0);
547 } 546 }
548 547
549 spin_lock_irq(&mapping->tree_lock); 548 spin_lock_irq(&mapping->tree_lock);
@@ -636,8 +635,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
636 pmd = pmd_mkclean(pmd); 635 pmd = pmd_mkclean(pmd);
637 set_pmd_at(vma->vm_mm, address, pmdp, pmd); 636 set_pmd_at(vma->vm_mm, address, pmdp, pmd);
638unlock_pmd: 637unlock_pmd:
639 spin_unlock(ptl);
640#endif 638#endif
639 spin_unlock(ptl);
641 } else { 640 } else {
642 if (pfn != pte_pfn(*ptep)) 641 if (pfn != pte_pfn(*ptep))
643 goto unlock_pte; 642 goto unlock_pte;
@@ -1096,7 +1095,7 @@ static bool dax_fault_is_synchronous(unsigned long flags,
1096} 1095}
1097 1096
1098static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, 1097static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1099 const struct iomap_ops *ops) 1098 int *iomap_errp, const struct iomap_ops *ops)
1100{ 1099{
1101 struct vm_area_struct *vma = vmf->vma; 1100 struct vm_area_struct *vma = vmf->vma;
1102 struct address_space *mapping = vma->vm_file->f_mapping; 1101 struct address_space *mapping = vma->vm_file->f_mapping;
@@ -1149,6 +1148,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1149 * that we never have to deal with more than a single extent here. 1148 * that we never have to deal with more than a single extent here.
1150 */ 1149 */
1151 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); 1150 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
1151 if (iomap_errp)
1152 *iomap_errp = error;
1152 if (error) { 1153 if (error) {
1153 vmf_ret = dax_fault_return(error); 1154 vmf_ret = dax_fault_return(error);
1154 goto unlock_entry; 1155 goto unlock_entry;
@@ -1269,12 +1270,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1269} 1270}
1270 1271
1271#ifdef CONFIG_FS_DAX_PMD 1272#ifdef CONFIG_FS_DAX_PMD
1272/*
1273 * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
1274 * more often than one might expect in the below functions.
1275 */
1276#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
1277
1278static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, 1273static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
1279 void *entry) 1274 void *entry)
1280{ 1275{
@@ -1488,6 +1483,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1488 * @vmf: The description of the fault 1483 * @vmf: The description of the fault
1489 * @pe_size: Size of the page to fault in 1484 * @pe_size: Size of the page to fault in
1490 * @pfnp: PFN to insert for synchronous faults if fsync is required 1485 * @pfnp: PFN to insert for synchronous faults if fsync is required
1486 * @iomap_errp: Storage for detailed error code in case of error
1491 * @ops: Iomap ops passed from the file system 1487 * @ops: Iomap ops passed from the file system
1492 * 1488 *
1493 * When a page fault occurs, filesystems may call this helper in 1489 * When a page fault occurs, filesystems may call this helper in
@@ -1496,11 +1492,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1496 * successfully. 1492 * successfully.
1497 */ 1493 */
1498int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 1494int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
1499 pfn_t *pfnp, const struct iomap_ops *ops) 1495 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
1500{ 1496{
1501 switch (pe_size) { 1497 switch (pe_size) {
1502 case PE_SIZE_PTE: 1498 case PE_SIZE_PTE:
1503 return dax_iomap_pte_fault(vmf, pfnp, ops); 1499 return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
1504 case PE_SIZE_PMD: 1500 case PE_SIZE_PMD:
1505 return dax_iomap_pmd_fault(vmf, pfnp, ops); 1501 return dax_iomap_pmd_fault(vmf, pfnp, ops);
1506 default: 1502 default:
diff --git a/fs/dcache.c b/fs/dcache.c
index 379dce86f001..cca2b377ff0a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,14 +32,11 @@
32#include <linux/swap.h> 32#include <linux/swap.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h> 35#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 36#include <linux/rculist_bl.h>
38#include <linux/prefetch.h> 37#include <linux/prefetch.h>
39#include <linux/ratelimit.h> 38#include <linux/ratelimit.h>
40#include <linux/list_lru.h> 39#include <linux/list_lru.h>
41#include <linux/kasan.h>
42
43#include "internal.h" 40#include "internal.h"
44#include "mount.h" 41#include "mount.h"
45 42
@@ -49,8 +46,8 @@
49 * - i_dentry, d_u.d_alias, d_inode of aliases 46 * - i_dentry, d_u.d_alias, d_inode of aliases
50 * dcache_hash_bucket lock protects: 47 * dcache_hash_bucket lock protects:
51 * - the dcache hash table 48 * - the dcache hash table
52 * s_anon bl list spinlock protects: 49 * s_roots bl list spinlock protects:
53 * - the s_anon list (see __d_drop) 50 * - the s_roots list (see __d_drop)
54 * dentry->d_sb->s_dentry_lru_lock protects: 51 * dentry->d_sb->s_dentry_lru_lock protects:
55 * - the dcache lru lists and counters 52 * - the dcache lru lists and counters
56 * d_lock protects: 53 * d_lock protects:
@@ -68,7 +65,7 @@
68 * dentry->d_lock 65 * dentry->d_lock
69 * dentry->d_sb->s_dentry_lru_lock 66 * dentry->d_sb->s_dentry_lru_lock
70 * dcache_hash_bucket lock 67 * dcache_hash_bucket lock
71 * s_anon lock 68 * s_roots lock
72 * 69 *
73 * If there is an ancestor relationship: 70 * If there is an ancestor relationship:
74 * dentry->d_parent->...->d_parent->d_lock 71 * dentry->d_parent->...->d_parent->d_lock
@@ -104,14 +101,13 @@ EXPORT_SYMBOL(slash_name);
104 * information, yet avoid using a prime hash-size or similar. 101 * information, yet avoid using a prime hash-size or similar.
105 */ 102 */
106 103
107static unsigned int d_hash_mask __read_mostly;
108static unsigned int d_hash_shift __read_mostly; 104static unsigned int d_hash_shift __read_mostly;
109 105
110static struct hlist_bl_head *dentry_hashtable __read_mostly; 106static struct hlist_bl_head *dentry_hashtable __read_mostly;
111 107
112static inline struct hlist_bl_head *d_hash(unsigned int hash) 108static inline struct hlist_bl_head *d_hash(unsigned int hash)
113{ 109{
114 return dentry_hashtable + (hash >> (32 - d_hash_shift)); 110 return dentry_hashtable + (hash >> d_hash_shift);
115} 111}
116 112
117#define IN_LOOKUP_SHIFT 10 113#define IN_LOOKUP_SHIFT 10
@@ -195,7 +191,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
195 unsigned long a,b,mask; 191 unsigned long a,b,mask;
196 192
197 for (;;) { 193 for (;;) {
198 a = *(unsigned long *)cs; 194 a = read_word_at_a_time(cs);
199 b = load_unaligned_zeropad(ct); 195 b = load_unaligned_zeropad(ct);
200 if (tcount < sizeof(unsigned long)) 196 if (tcount < sizeof(unsigned long))
201 break; 197 break;
@@ -468,30 +464,37 @@ static void dentry_lru_add(struct dentry *dentry)
468 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 464 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
469 * reason (NFS timeouts or autofs deletes). 465 * reason (NFS timeouts or autofs deletes).
470 * 466 *
471 * __d_drop requires dentry->d_lock. 467 * __d_drop requires dentry->d_lock
468 * ___d_drop doesn't mark dentry as "unhashed"
469 * (dentry->d_hash.pprev will be LIST_POISON2, not NULL).
472 */ 470 */
473void __d_drop(struct dentry *dentry) 471static void ___d_drop(struct dentry *dentry)
474{ 472{
475 if (!d_unhashed(dentry)) { 473 if (!d_unhashed(dentry)) {
476 struct hlist_bl_head *b; 474 struct hlist_bl_head *b;
477 /* 475 /*
478 * Hashed dentries are normally on the dentry hashtable, 476 * Hashed dentries are normally on the dentry hashtable,
479 * with the exception of those newly allocated by 477 * with the exception of those newly allocated by
480 * d_obtain_alias, which are always IS_ROOT: 478 * d_obtain_root, which are always IS_ROOT:
481 */ 479 */
482 if (unlikely(IS_ROOT(dentry))) 480 if (unlikely(IS_ROOT(dentry)))
483 b = &dentry->d_sb->s_anon; 481 b = &dentry->d_sb->s_roots;
484 else 482 else
485 b = d_hash(dentry->d_name.hash); 483 b = d_hash(dentry->d_name.hash);
486 484
487 hlist_bl_lock(b); 485 hlist_bl_lock(b);
488 __hlist_bl_del(&dentry->d_hash); 486 __hlist_bl_del(&dentry->d_hash);
489 dentry->d_hash.pprev = NULL;
490 hlist_bl_unlock(b); 487 hlist_bl_unlock(b);
491 /* After this call, in-progress rcu-walk path lookup will fail. */ 488 /* After this call, in-progress rcu-walk path lookup will fail. */
492 write_seqcount_invalidate(&dentry->d_seq); 489 write_seqcount_invalidate(&dentry->d_seq);
493 } 490 }
494} 491}
492
493void __d_drop(struct dentry *dentry)
494{
495 ___d_drop(dentry);
496 dentry->d_hash.pprev = NULL;
497}
495EXPORT_SYMBOL(__d_drop); 498EXPORT_SYMBOL(__d_drop);
496 499
497void d_drop(struct dentry *dentry) 500void d_drop(struct dentry *dentry)
@@ -1500,8 +1503,8 @@ void shrink_dcache_for_umount(struct super_block *sb)
1500 sb->s_root = NULL; 1503 sb->s_root = NULL;
1501 do_one_tree(dentry); 1504 do_one_tree(dentry);
1502 1505
1503 while (!hlist_bl_empty(&sb->s_anon)) { 1506 while (!hlist_bl_empty(&sb->s_roots)) {
1504 dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash)); 1507 dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
1505 do_one_tree(dentry); 1508 do_one_tree(dentry);
1506 } 1509 }
1507} 1510}
@@ -1623,9 +1626,6 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1623 } 1626 }
1624 atomic_set(&p->u.count, 1); 1627 atomic_set(&p->u.count, 1);
1625 dname = p->name; 1628 dname = p->name;
1626 if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS))
1627 kasan_unpoison_shadow(dname,
1628 round_up(name->len + 1, sizeof(unsigned long)));
1629 } else { 1629 } else {
1630 dname = dentry->d_iname; 1630 dname = dentry->d_iname;
1631 } 1631 }
@@ -1964,9 +1964,11 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1964 spin_lock(&tmp->d_lock); 1964 spin_lock(&tmp->d_lock);
1965 __d_set_inode_and_type(tmp, inode, add_flags); 1965 __d_set_inode_and_type(tmp, inode, add_flags);
1966 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); 1966 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
1967 hlist_bl_lock(&tmp->d_sb->s_anon); 1967 if (!disconnected) {
1968 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1968 hlist_bl_lock(&tmp->d_sb->s_roots);
1969 hlist_bl_unlock(&tmp->d_sb->s_anon); 1969 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_roots);
1970 hlist_bl_unlock(&tmp->d_sb->s_roots);
1971 }
1970 spin_unlock(&tmp->d_lock); 1972 spin_unlock(&tmp->d_lock);
1971 spin_unlock(&inode->i_lock); 1973 spin_unlock(&inode->i_lock);
1972 1974
@@ -2380,7 +2382,7 @@ EXPORT_SYMBOL(d_delete);
2380static void __d_rehash(struct dentry *entry) 2382static void __d_rehash(struct dentry *entry)
2381{ 2383{
2382 struct hlist_bl_head *b = d_hash(entry->d_name.hash); 2384 struct hlist_bl_head *b = d_hash(entry->d_name.hash);
2383 BUG_ON(!d_unhashed(entry)); 2385
2384 hlist_bl_lock(b); 2386 hlist_bl_lock(b);
2385 hlist_bl_add_head_rcu(&entry->d_hash, b); 2387 hlist_bl_add_head_rcu(&entry->d_hash, b);
2386 hlist_bl_unlock(b); 2388 hlist_bl_unlock(b);
@@ -2815,9 +2817,9 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
2815 write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); 2817 write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
2816 2818
2817 /* unhash both */ 2819 /* unhash both */
2818 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2820 /* ___d_drop does write_seqcount_barrier, but they're OK to nest. */
2819 __d_drop(dentry); 2821 ___d_drop(dentry);
2820 __d_drop(target); 2822 ___d_drop(target);
2821 2823
2822 /* Switch the names.. */ 2824 /* Switch the names.. */
2823 if (exchange) 2825 if (exchange)
@@ -2829,6 +2831,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
2829 __d_rehash(dentry); 2831 __d_rehash(dentry);
2830 if (exchange) 2832 if (exchange)
2831 __d_rehash(target); 2833 __d_rehash(target);
2834 else
2835 target->d_hash.pprev = NULL;
2832 2836
2833 /* ... and switch them in the tree */ 2837 /* ... and switch them in the tree */
2834 if (IS_ROOT(dentry)) { 2838 if (IS_ROOT(dentry)) {
@@ -3585,9 +3589,10 @@ static void __init dcache_init_early(void)
3585 13, 3589 13,
3586 HASH_EARLY | HASH_ZERO, 3590 HASH_EARLY | HASH_ZERO,
3587 &d_hash_shift, 3591 &d_hash_shift,
3588 &d_hash_mask, 3592 NULL,
3589 0, 3593 0,
3590 0); 3594 0);
3595 d_hash_shift = 32 - d_hash_shift;
3591} 3596}
3592 3597
3593static void __init dcache_init(void) 3598static void __init dcache_init(void)
@@ -3597,8 +3602,9 @@ static void __init dcache_init(void)
3597 * but it is probably not worth it because of the cache nature 3602 * but it is probably not worth it because of the cache nature
3598 * of the dcache. 3603 * of the dcache.
3599 */ 3604 */
3600 dentry_cache = KMEM_CACHE(dentry, 3605 dentry_cache = KMEM_CACHE_USERCOPY(dentry,
3601 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT); 3606 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
3607 d_iname);
3602 3608
3603 /* Hash may have been set up in dcache_init_early */ 3609 /* Hash may have been set up in dcache_init_early */
3604 if (!hashdist) 3610 if (!hashdist)
@@ -3611,9 +3617,10 @@ static void __init dcache_init(void)
3611 13, 3617 13,
3612 HASH_ZERO, 3618 HASH_ZERO,
3613 &d_hash_shift, 3619 &d_hash_shift,
3614 &d_hash_mask, 3620 NULL,
3615 0, 3621 0,
3616 0); 3622 0);
3623 d_hash_shift = 32 - d_hash_shift;
3617} 3624}
3618 3625
3619/* SLAB cache for __getname() consumers */ 3626/* SLAB cache for __getname() consumers */
@@ -3635,8 +3642,8 @@ void __init vfs_caches_init_early(void)
3635 3642
3636void __init vfs_caches_init(void) 3643void __init vfs_caches_init(void)
3637{ 3644{
3638 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3645 names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0,
3639 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3646 SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL);
3640 3647
3641 dcache_init(); 3648 dcache_init();
3642 inode_init(); 3649 inode_init();
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 7eae33ffa3fc..e31d6ed3ec32 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -168,11 +168,11 @@ struct vfsmount *devpts_mntget(struct file *filp, struct pts_fs_info *fsi)
168 dput(path.dentry); 168 dput(path.dentry);
169 if (err) { 169 if (err) {
170 mntput(path.mnt); 170 mntput(path.mnt);
171 path.mnt = ERR_PTR(err); 171 return ERR_PTR(err);
172 } 172 }
173 if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) { 173 if (DEVPTS_SB(path.mnt->mnt_sb) != fsi) {
174 mntput(path.mnt); 174 mntput(path.mnt);
175 path.mnt = ERR_PTR(-ENODEV); 175 return ERR_PTR(-ENODEV);
176 } 176 }
177 return path.mnt; 177 return path.mnt;
178} 178}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3aafb3343a65..a0ca9e48e993 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -219,6 +219,27 @@ static inline struct page *dio_get_page(struct dio *dio,
219 return dio->pages[sdio->head]; 219 return dio->pages[sdio->head];
220} 220}
221 221
222/*
223 * Warn about a page cache invalidation failure during a direct io write.
224 */
225void dio_warn_stale_pagecache(struct file *filp)
226{
227 static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
228 char pathname[128];
229 struct inode *inode = file_inode(filp);
230 char *path;
231
232 errseq_set(&inode->i_mapping->wb_err, -EIO);
233 if (__ratelimit(&_rs)) {
234 path = file_path(filp, pathname, sizeof(pathname));
235 if (IS_ERR(path))
236 path = "(unknown)";
237 pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n");
238 pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
239 current->comm);
240 }
241}
242
222/** 243/**
223 * dio_complete() - called when all DIO BIO I/O has been completed 244 * dio_complete() - called when all DIO BIO I/O has been completed
224 * @offset: the byte offset in the file of the completed operation 245 * @offset: the byte offset in the file of the completed operation
@@ -290,7 +311,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
290 err = invalidate_inode_pages2_range(dio->inode->i_mapping, 311 err = invalidate_inode_pages2_range(dio->inode->i_mapping,
291 offset >> PAGE_SHIFT, 312 offset >> PAGE_SHIFT,
292 (offset + ret - 1) >> PAGE_SHIFT); 313 (offset + ret - 1) >> PAGE_SHIFT);
293 WARN_ON_ONCE(err); 314 if (err)
315 dio_warn_stale_pagecache(dio->iocb->ki_filp);
294 } 316 }
295 317
296 if (!(dio->flags & DIO_SKIP_DIO_COUNT)) 318 if (!(dio->flags & DIO_SKIP_DIO_COUNT))
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 6318a9b57e53..04fd824142a1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -80,24 +80,11 @@ static void eventfd_free(struct kref *kref)
80} 80}
81 81
82/** 82/**
83 * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
84 * @ctx: [in] Pointer to the eventfd context.
85 *
86 * Returns: In case of success, returns a pointer to the eventfd context.
87 */
88struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
89{
90 kref_get(&ctx->kref);
91 return ctx;
92}
93EXPORT_SYMBOL_GPL(eventfd_ctx_get);
94
95/**
96 * eventfd_ctx_put - Releases a reference to the internal eventfd context. 83 * eventfd_ctx_put - Releases a reference to the internal eventfd context.
97 * @ctx: [in] Pointer to eventfd context. 84 * @ctx: [in] Pointer to eventfd context.
98 * 85 *
99 * The eventfd context reference must have been previously acquired either 86 * The eventfd context reference must have been previously acquired either
100 * with eventfd_ctx_get() or eventfd_ctx_fdget(). 87 * with eventfd_ctx_fdget() or eventfd_ctx_fileget().
101 */ 88 */
102void eventfd_ctx_put(struct eventfd_ctx *ctx) 89void eventfd_ctx_put(struct eventfd_ctx *ctx)
103{ 90{
@@ -207,36 +194,27 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
207} 194}
208EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); 195EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
209 196
210/** 197static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
211 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. 198 loff_t *ppos)
212 * @ctx: [in] Pointer to eventfd context.
213 * @no_wait: [in] Different from zero if the operation should not block.
214 * @cnt: [out] Pointer to the 64-bit counter value.
215 *
216 * Returns %0 if successful, or the following error codes:
217 *
218 * - -EAGAIN : The operation would have blocked but @no_wait was non-zero.
219 * - -ERESTARTSYS : A signal interrupted the wait operation.
220 *
221 * If @no_wait is zero, the function might sleep until the eventfd internal
222 * counter becomes greater than zero.
223 */
224ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
225{ 199{
200 struct eventfd_ctx *ctx = file->private_data;
226 ssize_t res; 201 ssize_t res;
202 __u64 ucnt = 0;
227 DECLARE_WAITQUEUE(wait, current); 203 DECLARE_WAITQUEUE(wait, current);
228 204
205 if (count < sizeof(ucnt))
206 return -EINVAL;
207
229 spin_lock_irq(&ctx->wqh.lock); 208 spin_lock_irq(&ctx->wqh.lock);
230 *cnt = 0;
231 res = -EAGAIN; 209 res = -EAGAIN;
232 if (ctx->count > 0) 210 if (ctx->count > 0)
233 res = 0; 211 res = sizeof(ucnt);
234 else if (!no_wait) { 212 else if (!(file->f_flags & O_NONBLOCK)) {
235 __add_wait_queue(&ctx->wqh, &wait); 213 __add_wait_queue(&ctx->wqh, &wait);
236 for (;;) { 214 for (;;) {
237 set_current_state(TASK_INTERRUPTIBLE); 215 set_current_state(TASK_INTERRUPTIBLE);
238 if (ctx->count > 0) { 216 if (ctx->count > 0) {
239 res = 0; 217 res = sizeof(ucnt);
240 break; 218 break;
241 } 219 }
242 if (signal_pending(current)) { 220 if (signal_pending(current)) {
@@ -250,31 +228,17 @@ ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
250 __remove_wait_queue(&ctx->wqh, &wait); 228 __remove_wait_queue(&ctx->wqh, &wait);
251 __set_current_state(TASK_RUNNING); 229 __set_current_state(TASK_RUNNING);
252 } 230 }
253 if (likely(res == 0)) { 231 if (likely(res > 0)) {
254 eventfd_ctx_do_read(ctx, cnt); 232 eventfd_ctx_do_read(ctx, &ucnt);
255 if (waitqueue_active(&ctx->wqh)) 233 if (waitqueue_active(&ctx->wqh))
256 wake_up_locked_poll(&ctx->wqh, POLLOUT); 234 wake_up_locked_poll(&ctx->wqh, POLLOUT);
257 } 235 }
258 spin_unlock_irq(&ctx->wqh.lock); 236 spin_unlock_irq(&ctx->wqh.lock);
259 237
260 return res; 238 if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
261} 239 return -EFAULT;
262EXPORT_SYMBOL_GPL(eventfd_ctx_read);
263
264static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
265 loff_t *ppos)
266{
267 struct eventfd_ctx *ctx = file->private_data;
268 ssize_t res;
269 __u64 cnt;
270
271 if (count < sizeof(cnt))
272 return -EINVAL;
273 res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
274 if (res < 0)
275 return res;
276 240
277 return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt); 241 return res;
278} 242}
279 243
280static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, 244static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
@@ -405,79 +369,44 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
405 */ 369 */
406struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) 370struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
407{ 371{
372 struct eventfd_ctx *ctx;
373
408 if (file->f_op != &eventfd_fops) 374 if (file->f_op != &eventfd_fops)
409 return ERR_PTR(-EINVAL); 375 return ERR_PTR(-EINVAL);
410 376
411 return eventfd_ctx_get(file->private_data); 377 ctx = file->private_data;
378 kref_get(&ctx->kref);
379 return ctx;
412} 380}
413EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); 381EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
414 382
415/** 383SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
416 * eventfd_file_create - Creates an eventfd file pointer.
417 * @count: Initial eventfd counter value.
418 * @flags: Flags for the eventfd file.
419 *
420 * This function creates an eventfd file pointer, w/out installing it into
421 * the fd table. This is useful when the eventfd file is used during the
422 * initialization of data structures that require extra setup after the eventfd
423 * creation. So the eventfd creation is split into the file pointer creation
424 * phase, and the file descriptor installation phase.
425 * In this way races with userspace closing the newly installed file descriptor
426 * can be avoided.
427 * Returns an eventfd file pointer, or a proper error pointer.
428 */
429struct file *eventfd_file_create(unsigned int count, int flags)
430{ 384{
431 struct file *file;
432 struct eventfd_ctx *ctx; 385 struct eventfd_ctx *ctx;
386 int fd;
433 387
434 /* Check the EFD_* constants for consistency. */ 388 /* Check the EFD_* constants for consistency. */
435 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); 389 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
436 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); 390 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
437 391
438 if (flags & ~EFD_FLAGS_SET) 392 if (flags & ~EFD_FLAGS_SET)
439 return ERR_PTR(-EINVAL); 393 return -EINVAL;
440 394
441 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 395 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
442 if (!ctx) 396 if (!ctx)
443 return ERR_PTR(-ENOMEM); 397 return -ENOMEM;
444 398
445 kref_init(&ctx->kref); 399 kref_init(&ctx->kref);
446 init_waitqueue_head(&ctx->wqh); 400 init_waitqueue_head(&ctx->wqh);
447 ctx->count = count; 401 ctx->count = count;
448 ctx->flags = flags; 402 ctx->flags = flags;
449 403
450 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, 404 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
451 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS)); 405 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
452 if (IS_ERR(file)) 406 if (fd < 0)
453 eventfd_free_ctx(ctx); 407 eventfd_free_ctx(ctx);
454 408
455 return file;
456}
457
458SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
459{
460 int fd, error;
461 struct file *file;
462
463 error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
464 if (error < 0)
465 return error;
466 fd = error;
467
468 file = eventfd_file_create(count, flags);
469 if (IS_ERR(file)) {
470 error = PTR_ERR(file);
471 goto err_put_unused_fd;
472 }
473 fd_install(fd, file);
474
475 return fd; 409 return fd;
476
477err_put_unused_fd:
478 put_unused_fd(fd);
479
480 return error;
481} 410}
482 411
483SYSCALL_DEFINE1(eventfd, unsigned int, count) 412SYSCALL_DEFINE1(eventfd, unsigned int, count)
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 7e244093c0e5..179cd5c2f52a 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -193,10 +193,13 @@ static void exofs_init_once(void *foo)
193 */ 193 */
194static int init_inodecache(void) 194static int init_inodecache(void)
195{ 195{
196 exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", 196 exofs_inode_cachep = kmem_cache_create_usercopy("exofs_inode_cache",
197 sizeof(struct exofs_i_info), 0, 197 sizeof(struct exofs_i_info), 0,
198 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | 198 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
199 SLAB_ACCOUNT, exofs_init_once); 199 SLAB_ACCOUNT,
200 offsetof(struct exofs_i_info, i_data),
201 sizeof_field(struct exofs_i_info, i_data),
202 exofs_init_once);
200 if (exofs_inode_cachep == NULL) 203 if (exofs_inode_cachep == NULL)
201 return -ENOMEM; 204 return -ENOMEM;
202 return 0; 205 return 0;
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index c634874e12d9..894e4c53d1d2 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -13,8 +13,7 @@ config EXT2_FS_XATTR
13 depends on EXT2_FS 13 depends on EXT2_FS
14 help 14 help
15 Extended attributes are name:value pairs associated with inodes by 15 Extended attributes are name:value pairs associated with inodes by
16 the kernel or by users (see the attr(5) manual page, or visit 16 the kernel or by users (see the attr(5) manual page for details).
17 <http://acl.bestbits.at/> for details).
18 17
19 If unsure, say N. 18 If unsure, say N.
20 19
@@ -26,9 +25,6 @@ config EXT2_FS_POSIX_ACL
26 Posix Access Control Lists (ACLs) support permissions for users and 25 Posix Access Control Lists (ACLs) support permissions for users and
27 groups beyond the owner/group/world scheme. 26 groups beyond the owner/group/world scheme.
28 27
29 To learn more about Access Control Lists, visit the Posix ACLs for
30 Linux website <http://acl.bestbits.at/>.
31
32 If you don't know what Access Control Lists are, say N 28 If you don't know what Access Control Lists are, say N
33 29
34config EXT2_FS_SECURITY 30config EXT2_FS_SECURITY
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2da67699dc33..09640220fda8 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf)
100 } 100 }
101 down_read(&ei->dax_sem); 101 down_read(&ei->dax_sem);
102 102
103 ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops); 103 ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
104 104
105 up_read(&ei->dax_sem); 105 up_read(&ei->dax_sem);
106 if (vmf->flags & FAULT_FLAG_WRITE) 106 if (vmf->flags & FAULT_FLAG_WRITE)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 554c98b8a93a..db5f9daa7780 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -221,11 +221,13 @@ static void init_once(void *foo)
221 221
222static int __init init_inodecache(void) 222static int __init init_inodecache(void)
223{ 223{
224 ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", 224 ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache",
225 sizeof(struct ext2_inode_info), 225 sizeof(struct ext2_inode_info), 0,
226 0, (SLAB_RECLAIM_ACCOUNT| 226 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
227 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 227 SLAB_ACCOUNT),
228 init_once); 228 offsetof(struct ext2_inode_info, i_data),
229 sizeof_field(struct ext2_inode_info, i_data),
230 init_once);
229 if (ext2_inode_cachep == NULL) 231 if (ext2_inode_cachep == NULL)
230 return -ENOMEM; 232 return -ENOMEM;
231 return 0; 233 return 0;
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 73b850f5659c..a453cc87082b 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -82,9 +82,6 @@ config EXT4_FS_POSIX_ACL
82 POSIX Access Control Lists (ACLs) support permissions for users and 82 POSIX Access Control Lists (ACLs) support permissions for users and
83 groups beyond the owner/group/world scheme. 83 groups beyond the owner/group/world scheme.
84 84
85 To learn more about Access Control Lists, visit the POSIX ACLs for
86 Linux website <http://acl.bestbits.at/>.
87
88 If you don't know what Access Control Lists are, say N 85 If you don't know what Access Control Lists are, say N
89 86
90config EXT4_FS_SECURITY 87config EXT4_FS_SECURITY
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index a48fc5ae2701..9b63f5416a2f 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 File: fs/ext4/acl.h 3 File: fs/ext4/acl.h
4 4
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index a943e568292e..f9b3e0a83526 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -355,10 +355,10 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
355 blk = ext4_inode_table(sb, desc); 355 blk = ext4_inode_table(sb, desc);
356 offset = blk - group_first_block; 356 offset = blk - group_first_block;
357 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, 357 next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
358 EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group), 358 EXT4_B2C(sbi, offset + sbi->s_itb_per_group),
359 EXT4_B2C(sbi, offset)); 359 EXT4_B2C(sbi, offset));
360 if (next_zero_bit < 360 if (next_zero_bit <
361 EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group)) 361 EXT4_B2C(sbi, offset + sbi->s_itb_per_group))
362 /* bad bitmap for inode tables */ 362 /* bad bitmap for inode tables */
363 return blk; 363 return blk;
364 return 0; 364 return 0;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index bee888e0e2db..913061c0de1b 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -147,11 +147,11 @@ int ext4_setup_system_zone(struct super_block *sb)
147 int ret; 147 int ret;
148 148
149 if (!test_opt(sb, BLOCK_VALIDITY)) { 149 if (!test_opt(sb, BLOCK_VALIDITY)) {
150 if (EXT4_SB(sb)->system_blks.rb_node) 150 if (sbi->system_blks.rb_node)
151 ext4_release_system_zone(sb); 151 ext4_release_system_zone(sb);
152 return 0; 152 return 0;
153 } 153 }
154 if (EXT4_SB(sb)->system_blks.rb_node) 154 if (sbi->system_blks.rb_node)
155 return 0; 155 return 0;
156 156
157 for (i=0; i < ngroups; i++) { 157 for (i=0; i < ngroups; i++) {
@@ -173,7 +173,7 @@ int ext4_setup_system_zone(struct super_block *sb)
173 } 173 }
174 174
175 if (test_opt(sb, DEBUG)) 175 if (test_opt(sb, DEBUG))
176 debug_print_tree(EXT4_SB(sb)); 176 debug_print_tree(sbi);
177 return 0; 177 return 0;
178} 178}
179 179
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4e091eae38b1..3241475a1733 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * ext4.h 3 * ext4.h
4 * 4 *
@@ -611,10 +611,10 @@ enum {
611/* 611/*
612 * Flags used by ext4_free_blocks 612 * Flags used by ext4_free_blocks
613 */ 613 */
614#define EXT4_FREE_BLOCKS_METADATA 0x0001 614#define EXT4_FREE_BLOCKS_METADATA 0x0001
615#define EXT4_FREE_BLOCKS_FORGET 0x0002 615#define EXT4_FREE_BLOCKS_FORGET 0x0002
616#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 616#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
617#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 617#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
618#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 618#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
619#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 619#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
620 620
@@ -1986,10 +1986,10 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
1986 1986
1987/* Legal values for the dx_root hash_version field: */ 1987/* Legal values for the dx_root hash_version field: */
1988 1988
1989#define DX_HASH_LEGACY 0 1989#define DX_HASH_LEGACY 0
1990#define DX_HASH_HALF_MD4 1 1990#define DX_HASH_HALF_MD4 1
1991#define DX_HASH_TEA 2 1991#define DX_HASH_TEA 2
1992#define DX_HASH_LEGACY_UNSIGNED 3 1992#define DX_HASH_LEGACY_UNSIGNED 3
1993#define DX_HASH_HALF_MD4_UNSIGNED 4 1993#define DX_HASH_HALF_MD4_UNSIGNED 4
1994#define DX_HASH_TEA_UNSIGNED 5 1994#define DX_HASH_TEA_UNSIGNED 5
1995 1995
@@ -2000,7 +2000,6 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
2000 struct shash_desc shash; 2000 struct shash_desc shash;
2001 char ctx[4]; 2001 char ctx[4];
2002 } desc; 2002 } desc;
2003 int err;
2004 2003
2005 BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx)); 2004 BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx));
2006 2005
@@ -2008,8 +2007,7 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
2008 desc.shash.flags = 0; 2007 desc.shash.flags = 0;
2009 *(u32 *)desc.ctx = crc; 2008 *(u32 *)desc.ctx = crc;
2010 2009
2011 err = crypto_shash_update(&desc.shash, address, length); 2010 BUG_ON(crypto_shash_update(&desc.shash, address, length));
2012 BUG_ON(err);
2013 2011
2014 return *(u32 *)desc.ctx; 2012 return *(u32 *)desc.ctx;
2015} 2013}
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 8ecf84b8f5a1..98fb0c119c68 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -1,19 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com 3 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com> 4 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 */ 5 */
18 6
19#ifndef _EXT4_EXTENTS 7#ifndef _EXT4_EXTENTS
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 48143e32411c..15b6dd733780 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * ext4_jbd2.h 3 * ext4_jbd2.h
3 * 4 *
@@ -5,10 +6,6 @@
5 * 6 *
6 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved 7 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
7 * 8 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Ext4-specific journaling extensions. 9 * Ext4-specific journaling extensions.
13 */ 10 */
14 11
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c941251ac0c0..054416e9d827 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com 3 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com> 4 * Written by Alex Tomas <alex@clusterfs.com>
@@ -5,19 +6,6 @@
5 * Architecture independence: 6 * Architecture independence:
6 * Copyright (c) 2005, Bull S.A. 7 * Copyright (c) 2005, Bull S.A.
7 * Written by Pierre Peiffer <pierre.peiffer@bull.net> 8 * Written by Pierre Peiffer <pierre.peiffer@bull.net>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
21 */ 9 */
22 10
23/* 11/*
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index ca90fc96f47e..8efdeb903d6b 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * fs/ext4/extents_status.h 3 * fs/ext4/extents_status.h
4 * 4 *
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a0ae27b1bc66..fb6f023622fe 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -280,7 +280,8 @@ out:
280static int ext4_dax_huge_fault(struct vm_fault *vmf, 280static int ext4_dax_huge_fault(struct vm_fault *vmf,
281 enum page_entry_size pe_size) 281 enum page_entry_size pe_size)
282{ 282{
283 int result; 283 int result, error = 0;
284 int retries = 0;
284 handle_t *handle = NULL; 285 handle_t *handle = NULL;
285 struct inode *inode = file_inode(vmf->vma->vm_file); 286 struct inode *inode = file_inode(vmf->vma->vm_file);
286 struct super_block *sb = inode->i_sb; 287 struct super_block *sb = inode->i_sb;
@@ -304,6 +305,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
304 sb_start_pagefault(sb); 305 sb_start_pagefault(sb);
305 file_update_time(vmf->vma->vm_file); 306 file_update_time(vmf->vma->vm_file);
306 down_read(&EXT4_I(inode)->i_mmap_sem); 307 down_read(&EXT4_I(inode)->i_mmap_sem);
308retry:
307 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, 309 handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
308 EXT4_DATA_TRANS_BLOCKS(sb)); 310 EXT4_DATA_TRANS_BLOCKS(sb));
309 if (IS_ERR(handle)) { 311 if (IS_ERR(handle)) {
@@ -314,9 +316,13 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
314 } else { 316 } else {
315 down_read(&EXT4_I(inode)->i_mmap_sem); 317 down_read(&EXT4_I(inode)->i_mmap_sem);
316 } 318 }
317 result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops); 319 result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
318 if (write) { 320 if (write) {
319 ext4_journal_stop(handle); 321 ext4_journal_stop(handle);
322
323 if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
324 ext4_should_retry_alloc(sb, &retries))
325 goto retry;
320 /* Handling synchronous page fault? */ 326 /* Handling synchronous page fault? */
321 if (result & VM_FAULT_NEEDDSYNC) 327 if (result & VM_FAULT_NEEDDSYNC)
322 result = dax_finish_sync_fault(vmf, pe_size, pfn); 328 result = dax_finish_sync_fault(vmf, pe_size, pfn);
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 7ec340898598..e871c4bf18e9 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -1,21 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 * 4 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */ 6 */
20#include "ext4.h" 7#include "ext4.h"
21#include <linux/fsmap.h> 8#include <linux/fsmap.h>
diff --git a/fs/ext4/fsmap.h b/fs/ext4/fsmap.h
index 9a2cd367cc66..68c8001fee85 100644
--- a/fs/ext4/fsmap.h
+++ b/fs/ext4/fsmap.h
@@ -1,21 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 * 4 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */ 6 */
20#ifndef __EXT4_FSMAP_H__ 7#ifndef __EXT4_FSMAP_H__
21#define __EXT4_FSMAP_H__ 8#define __EXT4_FSMAP_H__
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 00c6dd29e621..e22dcfab308b 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -1,12 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * linux/fs/ext4/hash.c 3 * linux/fs/ext4/hash.c
3 * 4 *
4 * Copyright (C) 2002 by Theodore Ts'o 5 * Copyright (C) 2002 by Theodore Ts'o
5 *
6 * This file is released under the GPL v2.
7 *
8 * This file may be redistributed under the terms of the GNU Public
9 * License.
10 */ 6 */
11 7
12#include <linux/fs.h> 8#include <linux/fs.h>
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b32cf263750d..7830d28df331 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -303,7 +303,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
303 /* Do this BEFORE marking the inode not in use or returning an error */ 303 /* Do this BEFORE marking the inode not in use or returning an error */
304 ext4_clear_inode(inode); 304 ext4_clear_inode(inode);
305 305
306 es = EXT4_SB(sb)->s_es; 306 es = sbi->s_es;
307 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 307 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
308 ext4_error(sb, "reserved or nonexistent inode %lu", ino); 308 ext4_error(sb, "reserved or nonexistent inode %lu", ino);
309 goto error_return; 309 goto error_return;
@@ -1157,7 +1157,7 @@ got:
1157 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 1157 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
1158 ext4_set_inode_state(inode, EXT4_STATE_NEW); 1158 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1159 1159
1160 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1160 ei->i_extra_isize = sbi->s_want_extra_isize;
1161 ei->i_inline_off = 0; 1161 ei->i_inline_off = 0;
1162 if (ext4_has_feature_inline_data(sb)) 1162 if (ext4_has_feature_inline_data(sb))
1163 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1163 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index a8b987b71173..7c4165b88505 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1,15 +1,7 @@
1// SPDX-License-Identifier: LGPL-2.1
1/* 2/*
2 * Copyright (c) 2012 Taobao. 3 * Copyright (c) 2012 Taobao.
3 * Written by Tao Ma <boyu.mt@taobao.com> 4 * Written by Tao Ma <boyu.mt@taobao.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */ 5 */
14 6
15#include <linux/iomap.h> 7#include <linux/iomap.h>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0eff5b761c6e..c94780075b04 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3768,10 +3768,18 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
3768 /* Credits for sb + inode write */ 3768 /* Credits for sb + inode write */
3769 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 3769 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3770 if (IS_ERR(handle)) { 3770 if (IS_ERR(handle)) {
3771 /* This is really bad luck. We've written the data 3771 /*
3772 * but cannot extend i_size. Bail out and pretend 3772 * We wrote the data but cannot extend
3773 * the write failed... */ 3773 * i_size. Bail out. In async io case, we do
3774 ret = PTR_ERR(handle); 3774 * not return error here because we have
3775 * already submmitted the corresponding
3776 * bio. Returning error here makes the caller
3777 * think that this IO is done and failed
3778 * resulting in race with bio's completion
3779 * handler.
3780 */
3781 if (!ret)
3782 ret = PTR_ERR(handle);
3775 if (inode->i_nlink) 3783 if (inode->i_nlink)
3776 ext4_orphan_del(NULL, inode); 3784 ext4_orphan_del(NULL, inode);
3777 3785
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d9f8b90a93ed..769a62708b1c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1,19 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com 3 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com> 4 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 */ 5 */
18 6
19 7
@@ -769,10 +757,10 @@ void ext4_mb_generate_buddy(struct super_block *sb,
769 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); 757 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
770 758
771 period = get_cycles() - period; 759 period = get_cycles() - period;
772 spin_lock(&EXT4_SB(sb)->s_bal_lock); 760 spin_lock(&sbi->s_bal_lock);
773 EXT4_SB(sb)->s_mb_buddies_generated++; 761 sbi->s_mb_buddies_generated++;
774 EXT4_SB(sb)->s_mb_generation_time += period; 762 sbi->s_mb_generation_time += period;
775 spin_unlock(&EXT4_SB(sb)->s_bal_lock); 763 spin_unlock(&sbi->s_bal_lock);
776} 764}
777 765
778static void mb_regenerate_buddy(struct ext4_buddy *e4b) 766static void mb_regenerate_buddy(struct ext4_buddy *e4b)
@@ -1459,7 +1447,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1459 ext4_fsblk_t blocknr; 1447 ext4_fsblk_t blocknr;
1460 1448
1461 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1449 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1462 blocknr += EXT4_C2B(EXT4_SB(sb), block); 1450 blocknr += EXT4_C2B(sbi, block);
1463 ext4_grp_locked_error(sb, e4b->bd_group, 1451 ext4_grp_locked_error(sb, e4b->bd_group,
1464 inode ? inode->i_ino : 0, 1452 inode ? inode->i_ino : 0,
1465 blocknr, 1453 blocknr,
@@ -4850,9 +4838,9 @@ do_more:
4850 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4838 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4851 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4839 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4852 in_range(block, ext4_inode_table(sb, gdp), 4840 in_range(block, ext4_inode_table(sb, gdp),
4853 EXT4_SB(sb)->s_itb_per_group) || 4841 sbi->s_itb_per_group) ||
4854 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4842 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4855 EXT4_SB(sb)->s_itb_per_group)) { 4843 sbi->s_itb_per_group)) {
4856 4844
4857 ext4_error(sb, "Freeing blocks in system zone - " 4845 ext4_error(sb, "Freeing blocks in system zone - "
4858 "Block = %llu, count = %lu", block, count); 4846 "Block = %llu, count = %lu", block, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dcf52540f379..88c98f17e3d9 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * fs/ext4/mballoc.h 3 * fs/ext4/mballoc.h
4 * 4 *
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index cf5181b62df1..61a9d1927817 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -1,15 +1,8 @@
1// SPDX-License-Identifier: LGPL-2.1
1/* 2/*
2 * Copyright IBM Corporation, 2007 3 * Copyright IBM Corporation, 2007
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */ 6 */
14 7
15#include <linux/slab.h> 8#include <linux/slab.h>
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 9bb36909ec92..b96e4bd3b3ec 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1,16 +1,8 @@
1// SPDX-License-Identifier: LGPL-2.1
1/* 2/*
2 * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. 3 * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
3 * Written by Takashi Sato <t-sato@yk.jp.nec.com> 4 * Written by Takashi Sato <t-sato@yk.jp.nec.com>
4 * Akira Fujita <a-fujita@rs.jp.nec.com> 5 * Akira Fujita <a-fujita@rs.jp.nec.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2.1 of the GNU Lesser General Public License
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */ 6 */
15 7
16#include <linux/fs.h> 8#include <linux/fs.h>
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6660686e505a..b1f21e3a0763 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3057,39 +3057,19 @@ static int ext4_symlink(struct inode *dir,
3057 struct inode *inode; 3057 struct inode *inode;
3058 int err, len = strlen(symname); 3058 int err, len = strlen(symname);
3059 int credits; 3059 int credits;
3060 bool encryption_required;
3061 struct fscrypt_str disk_link; 3060 struct fscrypt_str disk_link;
3062 struct fscrypt_symlink_data *sd = NULL;
3063 3061
3064 if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) 3062 if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3065 return -EIO; 3063 return -EIO;
3066 3064
3067 disk_link.len = len + 1; 3065 err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
3068 disk_link.name = (char *) symname; 3066 &disk_link);
3069 3067 if (err)
3070 encryption_required = (ext4_encrypted_inode(dir) || 3068 return err;
3071 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
3072 if (encryption_required) {
3073 err = fscrypt_get_encryption_info(dir);
3074 if (err)
3075 return err;
3076 if (!fscrypt_has_encryption_key(dir))
3077 return -ENOKEY;
3078 disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
3079 sizeof(struct fscrypt_symlink_data));
3080 sd = kzalloc(disk_link.len, GFP_KERNEL);
3081 if (!sd)
3082 return -ENOMEM;
3083 }
3084
3085 if (disk_link.len > dir->i_sb->s_blocksize) {
3086 err = -ENAMETOOLONG;
3087 goto err_free_sd;
3088 }
3089 3069
3090 err = dquot_initialize(dir); 3070 err = dquot_initialize(dir);
3091 if (err) 3071 if (err)
3092 goto err_free_sd; 3072 return err;
3093 3073
3094 if ((disk_link.len > EXT4_N_BLOCKS * 4)) { 3074 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3095 /* 3075 /*
@@ -3118,27 +3098,18 @@ static int ext4_symlink(struct inode *dir,
3118 if (IS_ERR(inode)) { 3098 if (IS_ERR(inode)) {
3119 if (handle) 3099 if (handle)
3120 ext4_journal_stop(handle); 3100 ext4_journal_stop(handle);
3121 err = PTR_ERR(inode); 3101 return PTR_ERR(inode);
3122 goto err_free_sd;
3123 } 3102 }
3124 3103
3125 if (encryption_required) { 3104 if (IS_ENCRYPTED(inode)) {
3126 struct qstr istr; 3105 err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
3127 struct fscrypt_str ostr =
3128 FSTR_INIT(sd->encrypted_path, disk_link.len);
3129
3130 istr.name = (const unsigned char *) symname;
3131 istr.len = len;
3132 err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
3133 if (err) 3106 if (err)
3134 goto err_drop_inode; 3107 goto err_drop_inode;
3135 sd->len = cpu_to_le16(ostr.len);
3136 disk_link.name = (char *) sd;
3137 inode->i_op = &ext4_encrypted_symlink_inode_operations; 3108 inode->i_op = &ext4_encrypted_symlink_inode_operations;
3138 } 3109 }
3139 3110
3140 if ((disk_link.len > EXT4_N_BLOCKS * 4)) { 3111 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3141 if (!encryption_required) 3112 if (!IS_ENCRYPTED(inode))
3142 inode->i_op = &ext4_symlink_inode_operations; 3113 inode->i_op = &ext4_symlink_inode_operations;
3143 inode_nohighmem(inode); 3114 inode_nohighmem(inode);
3144 ext4_set_aops(inode); 3115 ext4_set_aops(inode);
@@ -3180,7 +3151,7 @@ static int ext4_symlink(struct inode *dir,
3180 } else { 3151 } else {
3181 /* clear the extent format for fast symlink */ 3152 /* clear the extent format for fast symlink */
3182 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 3153 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
3183 if (!encryption_required) { 3154 if (!IS_ENCRYPTED(inode)) {
3184 inode->i_op = &ext4_fast_symlink_inode_operations; 3155 inode->i_op = &ext4_fast_symlink_inode_operations;
3185 inode->i_link = (char *)&EXT4_I(inode)->i_data; 3156 inode->i_link = (char *)&EXT4_I(inode)->i_data;
3186 } 3157 }
@@ -3195,16 +3166,17 @@ static int ext4_symlink(struct inode *dir,
3195 3166
3196 if (handle) 3167 if (handle)
3197 ext4_journal_stop(handle); 3168 ext4_journal_stop(handle);
3198 kfree(sd); 3169 goto out_free_encrypted_link;
3199 return err; 3170
3200err_drop_inode: 3171err_drop_inode:
3201 if (handle) 3172 if (handle)
3202 ext4_journal_stop(handle); 3173 ext4_journal_stop(handle);
3203 clear_nlink(inode); 3174 clear_nlink(inode);
3204 unlock_new_inode(inode); 3175 unlock_new_inode(inode);
3205 iput(inode); 3176 iput(inode);
3206err_free_sd: 3177out_free_encrypted_link:
3207 kfree(sd); 3178 if (disk_link.name != (unsigned char *)symname)
3179 kfree(disk_link.name);
3208 return err; 3180 return err;
3209} 3181}
3210 3182
@@ -3222,9 +3194,9 @@ static int ext4_link(struct dentry *old_dentry,
3222 if (err) 3194 if (err)
3223 return err; 3195 return err;
3224 3196
3225 if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && 3197 if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
3226 (!projid_eq(EXT4_I(dir)->i_projid, 3198 (!projid_eq(EXT4_I(dir)->i_projid,
3227 EXT4_I(old_dentry->d_inode)->i_projid))) 3199 EXT4_I(old_dentry->d_inode)->i_projid)))
3228 return -EXDEV; 3200 return -EXDEV;
3229 3201
3230 err = dquot_initialize(dir); 3202 err = dquot_initialize(dir);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 50443bda8e98..b6bec270a8e4 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1477,7 +1477,7 @@ static int ext4_flex_group_add(struct super_block *sb,
1477 goto exit_journal; 1477 goto exit_journal;
1478 1478
1479 group = flex_gd->groups[0].group; 1479 group = flex_gd->groups[0].group;
1480 BUG_ON(group != EXT4_SB(sb)->s_groups_count); 1480 BUG_ON(group != sbi->s_groups_count);
1481 err = ext4_add_new_descs(handle, sb, group, 1481 err = ext4_add_new_descs(handle, sb, group,
1482 resize_inode, flex_gd->count); 1482 resize_inode, flex_gd->count);
1483 if (err) 1483 if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5de959fb0244..421222ec3509 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * linux/fs/ext4/super.c 3 * linux/fs/ext4/super.c
3 * 4 *
@@ -743,6 +744,7 @@ __acquires(bitlock)
743 } 744 }
744 745
745 ext4_unlock_group(sb, grp); 746 ext4_unlock_group(sb, grp);
747 ext4_commit_super(sb, 1);
746 ext4_handle_error(sb); 748 ext4_handle_error(sb);
747 /* 749 /*
748 * We only get here in the ERRORS_RO case; relocking the group 750 * We only get here in the ERRORS_RO case; relocking the group
@@ -871,7 +873,6 @@ static void ext4_put_super(struct super_block *sb)
871 ext4_unregister_li_request(sb); 873 ext4_unregister_li_request(sb);
872 ext4_quota_off_umount(sb); 874 ext4_quota_off_umount(sb);
873 875
874 flush_workqueue(sbi->rsv_conversion_wq);
875 destroy_workqueue(sbi->rsv_conversion_wq); 876 destroy_workqueue(sbi->rsv_conversion_wq);
876 877
877 if (sbi->s_journal) { 878 if (sbi->s_journal) {
@@ -1037,11 +1038,13 @@ static void init_once(void *foo)
1037 1038
1038static int __init init_inodecache(void) 1039static int __init init_inodecache(void)
1039{ 1040{
1040 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 1041 ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1041 sizeof(struct ext4_inode_info), 1042 sizeof(struct ext4_inode_info), 0,
1042 0, (SLAB_RECLAIM_ACCOUNT| 1043 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1043 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1044 SLAB_ACCOUNT),
1044 init_once); 1045 offsetof(struct ext4_inode_info, i_data),
1046 sizeof_field(struct ext4_inode_info, i_data),
1047 init_once);
1045 if (ext4_inode_cachep == NULL) 1048 if (ext4_inode_cachep == NULL)
1046 return -ENOMEM; 1049 return -ENOMEM;
1047 return 0; 1050 return 0;
@@ -1070,9 +1073,7 @@ void ext4_clear_inode(struct inode *inode)
1070 jbd2_free_inode(EXT4_I(inode)->jinode); 1073 jbd2_free_inode(EXT4_I(inode)->jinode);
1071 EXT4_I(inode)->jinode = NULL; 1074 EXT4_I(inode)->jinode = NULL;
1072 } 1075 }
1073#ifdef CONFIG_EXT4_FS_ENCRYPTION 1076 fscrypt_put_encryption_info(inode);
1074 fscrypt_put_encryption_info(inode, NULL);
1075#endif
1076} 1077}
1077 1078
1078static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1079static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -2677,7 +2678,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2677 * compensate. 2678 * compensate.
2678 */ 2679 */
2679 if (sb->s_blocksize == 1024 && nr == 0 && 2680 if (sb->s_blocksize == 1024 && nr == 0 &&
2680 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) 2681 le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
2681 has_super++; 2682 has_super++;
2682 2683
2683 return (has_super + ext4_group_first_block_no(sb, bg)); 2684 return (has_super + ext4_group_first_block_no(sb, bg));
@@ -3122,7 +3123,7 @@ int ext4_register_li_request(struct super_block *sb,
3122{ 3123{
3123 struct ext4_sb_info *sbi = EXT4_SB(sb); 3124 struct ext4_sb_info *sbi = EXT4_SB(sb);
3124 struct ext4_li_request *elr = NULL; 3125 struct ext4_li_request *elr = NULL;
3125 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 3126 ext4_group_t ngroups = sbi->s_groups_count;
3126 int ret = 0; 3127 int ret = 0;
3127 3128
3128 mutex_lock(&ext4_li_mtx); 3129 mutex_lock(&ext4_li_mtx);
@@ -4837,7 +4838,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4837 bool needs_barrier = false; 4838 bool needs_barrier = false;
4838 struct ext4_sb_info *sbi = EXT4_SB(sb); 4839 struct ext4_sb_info *sbi = EXT4_SB(sb);
4839 4840
4840 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) 4841 if (unlikely(ext4_forced_shutdown(sbi)))
4841 return 0; 4842 return 0;
4842 4843
4843 trace_ext4_sync_fs(sb, wait); 4844 trace_ext4_sync_fs(sb, wait);
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index a2006c9af1d9..dd05af983092 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -28,59 +28,28 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
28 struct delayed_call *done) 28 struct delayed_call *done)
29{ 29{
30 struct page *cpage = NULL; 30 struct page *cpage = NULL;
31 char *caddr, *paddr = NULL; 31 const void *caddr;
32 struct fscrypt_str cstr, pstr; 32 unsigned int max_size;
33 struct fscrypt_symlink_data *sd; 33 const char *paddr;
34 int res;
35 u32 max_size = inode->i_sb->s_blocksize;
36 34
37 if (!dentry) 35 if (!dentry)
38 return ERR_PTR(-ECHILD); 36 return ERR_PTR(-ECHILD);
39 37
40 res = fscrypt_get_encryption_info(inode);
41 if (res)
42 return ERR_PTR(res);
43
44 if (ext4_inode_is_fast_symlink(inode)) { 38 if (ext4_inode_is_fast_symlink(inode)) {
45 caddr = (char *) EXT4_I(inode)->i_data; 39 caddr = EXT4_I(inode)->i_data;
46 max_size = sizeof(EXT4_I(inode)->i_data); 40 max_size = sizeof(EXT4_I(inode)->i_data);
47 } else { 41 } else {
48 cpage = read_mapping_page(inode->i_mapping, 0, NULL); 42 cpage = read_mapping_page(inode->i_mapping, 0, NULL);
49 if (IS_ERR(cpage)) 43 if (IS_ERR(cpage))
50 return ERR_CAST(cpage); 44 return ERR_CAST(cpage);
51 caddr = page_address(cpage); 45 caddr = page_address(cpage);
46 max_size = inode->i_sb->s_blocksize;
52 } 47 }
53 48
54 /* Symlink is encrypted */ 49 paddr = fscrypt_get_symlink(inode, caddr, max_size, done);
55 sd = (struct fscrypt_symlink_data *)caddr;
56 cstr.name = sd->encrypted_path;
57 cstr.len = le16_to_cpu(sd->len);
58 if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) {
59 /* Symlink data on the disk is corrupted */
60 res = -EFSCORRUPTED;
61 goto errout;
62 }
63
64 res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
65 if (res)
66 goto errout;
67 paddr = pstr.name;
68
69 res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
70 if (res)
71 goto errout;
72
73 /* Null-terminate the name */
74 paddr[pstr.len] = '\0';
75 if (cpage) 50 if (cpage)
76 put_page(cpage); 51 put_page(cpage);
77 set_delayed_call(done, kfree_link, paddr);
78 return paddr; 52 return paddr;
79errout:
80 if (cpage)
81 put_page(cpage);
82 kfree(paddr);
83 return ERR_PTR(res);
84} 53}
85 54
86const struct inode_operations ext4_encrypted_symlink_inode_operations = { 55const struct inode_operations ext4_encrypted_symlink_inode_operations = {
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index e21afd52e7d7..1205261f130c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -11,6 +11,7 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/slab.h>
14#include <linux/proc_fs.h> 15#include <linux/proc_fs.h>
15 16
16#include "ext4.h" 17#include "ext4.h"
@@ -329,6 +330,13 @@ static void ext4_sb_release(struct kobject *kobj)
329 complete(&sbi->s_kobj_unregister); 330 complete(&sbi->s_kobj_unregister);
330} 331}
331 332
333static void ext4_kset_release(struct kobject *kobj)
334{
335 struct kset *kset = container_of(kobj, struct kset, kobj);
336
337 kfree(kset);
338}
339
332static const struct sysfs_ops ext4_attr_ops = { 340static const struct sysfs_ops ext4_attr_ops = {
333 .show = ext4_attr_show, 341 .show = ext4_attr_show,
334 .store = ext4_attr_store, 342 .store = ext4_attr_store,
@@ -342,20 +350,18 @@ static struct kobj_type ext4_sb_ktype = {
342 350
343static struct kobj_type ext4_ktype = { 351static struct kobj_type ext4_ktype = {
344 .sysfs_ops = &ext4_attr_ops, 352 .sysfs_ops = &ext4_attr_ops,
353 .release = ext4_kset_release,
345}; 354};
346 355
347static struct kset ext4_kset = { 356static struct kset *ext4_kset;
348 .kobj = {.ktype = &ext4_ktype},
349};
350 357
351static struct kobj_type ext4_feat_ktype = { 358static struct kobj_type ext4_feat_ktype = {
352 .default_attrs = ext4_feat_attrs, 359 .default_attrs = ext4_feat_attrs,
353 .sysfs_ops = &ext4_attr_ops, 360 .sysfs_ops = &ext4_attr_ops,
361 .release = (void (*)(struct kobject *))kfree,
354}; 362};
355 363
356static struct kobject ext4_feat = { 364static struct kobject *ext4_feat;
357 .kset = &ext4_kset,
358};
359 365
360#define PROC_FILE_SHOW_DEFN(name) \ 366#define PROC_FILE_SHOW_DEFN(name) \
361static int name##_open(struct inode *inode, struct file *file) \ 367static int name##_open(struct inode *inode, struct file *file) \
@@ -392,12 +398,15 @@ int ext4_register_sysfs(struct super_block *sb)
392 const struct ext4_proc_files *p; 398 const struct ext4_proc_files *p;
393 int err; 399 int err;
394 400
395 sbi->s_kobj.kset = &ext4_kset; 401 sbi->s_kobj.kset = ext4_kset;
396 init_completion(&sbi->s_kobj_unregister); 402 init_completion(&sbi->s_kobj_unregister);
397 err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, 403 err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL,
398 "%s", sb->s_id); 404 "%s", sb->s_id);
399 if (err) 405 if (err) {
406 kobject_put(&sbi->s_kobj);
407 wait_for_completion(&sbi->s_kobj_unregister);
400 return err; 408 return err;
409 }
401 410
402 if (ext4_proc_root) 411 if (ext4_proc_root)
403 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 412 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
@@ -427,25 +436,45 @@ int __init ext4_init_sysfs(void)
427{ 436{
428 int ret; 437 int ret;
429 438
430 kobject_set_name(&ext4_kset.kobj, "ext4"); 439 ext4_kset = kzalloc(sizeof(*ext4_kset), GFP_KERNEL);
431 ext4_kset.kobj.parent = fs_kobj; 440 if (!ext4_kset)
432 ret = kset_register(&ext4_kset); 441 return -ENOMEM;
442
443 kobject_set_name(&ext4_kset->kobj, "ext4");
444 ext4_kset->kobj.parent = fs_kobj;
445 ext4_kset->kobj.ktype = &ext4_ktype;
446 ret = kset_register(ext4_kset);
433 if (ret) 447 if (ret)
434 return ret; 448 goto kset_err;
449
450 ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL);
451 if (!ext4_feat) {
452 ret = -ENOMEM;
453 goto kset_err;
454 }
435 455
436 ret = kobject_init_and_add(&ext4_feat, &ext4_feat_ktype, 456 ext4_feat->kset = ext4_kset;
457 ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype,
437 NULL, "features"); 458 NULL, "features");
438 if (ret) 459 if (ret)
439 kset_unregister(&ext4_kset); 460 goto feat_err;
440 else 461
441 ext4_proc_root = proc_mkdir(proc_dirname, NULL); 462 ext4_proc_root = proc_mkdir(proc_dirname, NULL);
463 return ret;
464
465feat_err:
466 kobject_put(ext4_feat);
467kset_err:
468 kset_unregister(ext4_kset);
469 ext4_kset = NULL;
442 return ret; 470 return ret;
443} 471}
444 472
445void ext4_exit_sysfs(void) 473void ext4_exit_sysfs(void)
446{ 474{
447 kobject_put(&ext4_feat); 475 kobject_put(ext4_feat);
448 kset_unregister(&ext4_kset); 476 kset_unregister(ext4_kset);
477 ext4_kset = NULL;
449 remove_proc_entry(proc_dirname, NULL); 478 remove_proc_entry(proc_dirname, NULL);
450 ext4_proc_root = NULL; 479 ext4_proc_root = NULL;
451} 480}
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index b64a9fa0ff41..0cb13badf473 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * linux/fs/ext4/truncate.h 3 * linux/fs/ext4/truncate.h
4 * 4 *
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index f8cc07588ac9..dd54c4f995c8 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -1,4 +1,4 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 File: fs/ext4/xattr.h 3 File: fs/ext4/xattr.h
4 4
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 378c221d68a9..9a20ef42fadd 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -35,8 +35,7 @@ config F2FS_FS_XATTR
35 default y 35 default y
36 help 36 help
37 Extended attributes are name:value pairs associated with inodes by 37 Extended attributes are name:value pairs associated with inodes by
38 the kernel or by users (see the attr(5) manual page, or visit 38 the kernel or by users (see the attr(5) manual page for details).
39 <http://acl.bestbits.at/> for details).
40 39
41 If unsure, say N. 40 If unsure, say N.
42 41
@@ -49,9 +48,6 @@ config F2FS_FS_POSIX_ACL
49 Posix Access Control Lists (ACLs) support permissions for users and 48 Posix Access Control Lists (ACLs) support permissions for users and
50 groups beyond the owner/group/world scheme. 49 groups beyond the owner/group/world scheme.
51 50
52 To learn more about Access Control Lists, visit the POSIX ACLs for
53 Linux website <http://acl.bestbits.at/>.
54
55 If you don't know what Access Control Lists are, say N 51 If you don't know what Access Control Lists are, say N
56 52
57config F2FS_FS_SECURITY 53config F2FS_FS_SECURITY
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 89c838bfb067..205add3d0f3a 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -585,7 +585,7 @@ no_delete:
585 !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); 585 !exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
586 } 586 }
587out_clear: 587out_clear:
588 fscrypt_put_encryption_info(inode, NULL); 588 fscrypt_put_encryption_info(inode);
589 clear_inode(inode); 589 clear_inode(inode);
590} 590}
591 591
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index c4c94c7e9f4f..b68e7b03959f 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -486,27 +486,16 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
486 struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 486 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
487 struct inode *inode; 487 struct inode *inode;
488 size_t len = strlen(symname); 488 size_t len = strlen(symname);
489 struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); 489 struct fscrypt_str disk_link;
490 struct fscrypt_symlink_data *sd = NULL;
491 int err; 490 int err;
492 491
493 if (unlikely(f2fs_cp_error(sbi))) 492 if (unlikely(f2fs_cp_error(sbi)))
494 return -EIO; 493 return -EIO;
495 494
496 if (f2fs_encrypted_inode(dir)) { 495 err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
497 err = fscrypt_get_encryption_info(dir); 496 &disk_link);
498 if (err) 497 if (err)
499 return err; 498 return err;
500
501 if (!fscrypt_has_encryption_key(dir))
502 return -ENOKEY;
503
504 disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
505 sizeof(struct fscrypt_symlink_data));
506 }
507
508 if (disk_link.len > dir->i_sb->s_blocksize)
509 return -ENAMETOOLONG;
510 499
511 err = dquot_initialize(dir); 500 err = dquot_initialize(dir);
512 if (err) 501 if (err)
@@ -516,7 +505,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
516 if (IS_ERR(inode)) 505 if (IS_ERR(inode))
517 return PTR_ERR(inode); 506 return PTR_ERR(inode);
518 507
519 if (f2fs_encrypted_inode(inode)) 508 if (IS_ENCRYPTED(inode))
520 inode->i_op = &f2fs_encrypted_symlink_inode_operations; 509 inode->i_op = &f2fs_encrypted_symlink_inode_operations;
521 else 510 else
522 inode->i_op = &f2fs_symlink_inode_operations; 511 inode->i_op = &f2fs_symlink_inode_operations;
@@ -526,38 +515,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
526 f2fs_lock_op(sbi); 515 f2fs_lock_op(sbi);
527 err = f2fs_add_link(dentry, inode); 516 err = f2fs_add_link(dentry, inode);
528 if (err) 517 if (err)
529 goto out; 518 goto out_handle_failed_inode;
530 f2fs_unlock_op(sbi); 519 f2fs_unlock_op(sbi);
531 alloc_nid_done(sbi, inode->i_ino); 520 alloc_nid_done(sbi, inode->i_ino);
532 521
533 if (f2fs_encrypted_inode(inode)) { 522 err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
534 struct qstr istr = QSTR_INIT(symname, len); 523 if (err)
535 struct fscrypt_str ostr; 524 goto err_out;
536
537 sd = f2fs_kzalloc(sbi, disk_link.len, GFP_NOFS);
538 if (!sd) {
539 err = -ENOMEM;
540 goto err_out;
541 }
542
543 err = fscrypt_get_encryption_info(inode);
544 if (err)
545 goto err_out;
546
547 if (!fscrypt_has_encryption_key(inode)) {
548 err = -ENOKEY;
549 goto err_out;
550 }
551
552 ostr.name = sd->encrypted_path;
553 ostr.len = disk_link.len;
554 err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
555 if (err)
556 goto err_out;
557
558 sd->len = cpu_to_le16(ostr.len);
559 disk_link.name = (char *)sd;
560 }
561 525
562 err = page_symlink(inode, disk_link.name, disk_link.len); 526 err = page_symlink(inode, disk_link.name, disk_link.len);
563 527
@@ -584,12 +548,14 @@ err_out:
584 f2fs_unlink(dir, dentry); 548 f2fs_unlink(dir, dentry);
585 } 549 }
586 550
587 kfree(sd);
588
589 f2fs_balance_fs(sbi, true); 551 f2fs_balance_fs(sbi, true);
590 return err; 552 goto out_free_encrypted_link;
591out: 553
554out_handle_failed_inode:
592 handle_failed_inode(inode); 555 handle_failed_inode(inode);
556out_free_encrypted_link:
557 if (disk_link.name != (unsigned char *)symname)
558 kfree(disk_link.name);
593 return err; 559 return err;
594} 560}
595 561
@@ -1148,68 +1114,20 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
1148 struct inode *inode, 1114 struct inode *inode,
1149 struct delayed_call *done) 1115 struct delayed_call *done)
1150{ 1116{
1151 struct page *cpage = NULL; 1117 struct page *page;
1152 char *caddr, *paddr = NULL; 1118 const char *target;
1153 struct fscrypt_str cstr = FSTR_INIT(NULL, 0);
1154 struct fscrypt_str pstr = FSTR_INIT(NULL, 0);
1155 struct fscrypt_symlink_data *sd;
1156 u32 max_size = inode->i_sb->s_blocksize;
1157 int res;
1158 1119
1159 if (!dentry) 1120 if (!dentry)
1160 return ERR_PTR(-ECHILD); 1121 return ERR_PTR(-ECHILD);
1161 1122
1162 res = fscrypt_get_encryption_info(inode); 1123 page = read_mapping_page(inode->i_mapping, 0, NULL);
1163 if (res) 1124 if (IS_ERR(page))
1164 return ERR_PTR(res); 1125 return ERR_CAST(page);
1165
1166 cpage = read_mapping_page(inode->i_mapping, 0, NULL);
1167 if (IS_ERR(cpage))
1168 return ERR_CAST(cpage);
1169 caddr = page_address(cpage);
1170
1171 /* Symlink is encrypted */
1172 sd = (struct fscrypt_symlink_data *)caddr;
1173 cstr.name = sd->encrypted_path;
1174 cstr.len = le16_to_cpu(sd->len);
1175
1176 /* this is broken symlink case */
1177 if (unlikely(cstr.len == 0)) {
1178 res = -ENOENT;
1179 goto errout;
1180 }
1181
1182 if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) {
1183 /* Symlink data on the disk is corrupted */
1184 res = -EIO;
1185 goto errout;
1186 }
1187 res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
1188 if (res)
1189 goto errout;
1190
1191 res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
1192 if (res)
1193 goto errout;
1194
1195 /* this is broken symlink case */
1196 if (unlikely(pstr.name[0] == 0)) {
1197 res = -ENOENT;
1198 goto errout;
1199 }
1200
1201 paddr = pstr.name;
1202
1203 /* Null-terminate the name */
1204 paddr[pstr.len] = '\0';
1205 1126
1206 put_page(cpage); 1127 target = fscrypt_get_symlink(inode, page_address(page),
1207 set_delayed_call(done, kfree_link, paddr); 1128 inode->i_sb->s_blocksize, done);
1208 return paddr; 1129 put_page(page);
1209errout: 1130 return target;
1210 fscrypt_fname_free_buffer(&pstr);
1211 put_page(cpage);
1212 return ERR_PTR(res);
1213} 1131}
1214 1132
1215const struct inode_operations f2fs_encrypted_symlink_inode_operations = { 1133const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c7b9e0948107..e95fa0a352ea 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -418,7 +418,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
418 break; 418 break;
419 case F_ADD_SEALS: 419 case F_ADD_SEALS:
420 case F_GET_SEALS: 420 case F_GET_SEALS:
421 err = shmem_fcntl(filp, cmd, arg); 421 err = memfd_fcntl(filp, cmd, arg);
422 break; 422 break;
423 case F_GET_RW_HINT: 423 case F_GET_RW_HINT:
424 case F_SET_RW_HINT: 424 case F_SET_RW_HINT:
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 0ace128f5d23..0ee727485615 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -69,8 +69,7 @@ static long do_sys_name_to_handle(struct path *path,
69 } else 69 } else
70 retval = 0; 70 retval = 0;
71 /* copy the mount id */ 71 /* copy the mount id */
72 if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id, 72 if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
73 sizeof(*mnt_id)) ||
74 copy_to_user(ufh, handle, 73 copy_to_user(ufh, handle,
75 sizeof(struct file_handle) + handle_bytes)) 74 sizeof(struct file_handle) + handle_bytes))
76 retval = -EFAULT; 75 retval = -EFAULT;
diff --git a/fs/file.c b/fs/file.c
index fc0eeb812e2c..42f0db4bd0fb 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -11,18 +11,13 @@
11#include <linux/export.h> 11#include <linux/export.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/mmzone.h>
15#include <linux/time.h>
16#include <linux/sched/signal.h> 14#include <linux/sched/signal.h>
17#include <linux/slab.h> 15#include <linux/slab.h>
18#include <linux/vmalloc.h>
19#include <linux/file.h> 16#include <linux/file.h>
20#include <linux/fdtable.h> 17#include <linux/fdtable.h>
21#include <linux/bitops.h> 18#include <linux/bitops.h>
22#include <linux/interrupt.h>
23#include <linux/spinlock.h> 19#include <linux/spinlock.h>
24#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
25#include <linux/workqueue.h>
26 21
27unsigned int sysctl_nr_open __read_mostly = 1024*1024; 22unsigned int sysctl_nr_open __read_mostly = 1024*1024;
28unsigned int sysctl_nr_open_min = BITS_PER_LONG; 23unsigned int sysctl_nr_open_min = BITS_PER_LONG;
diff --git a/fs/file_table.c b/fs/file_table.c
index 2dc9f38bd195..7ec0b3e5f05d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -23,7 +23,6 @@
23#include <linux/sysctl.h> 23#include <linux/sysctl.h>
24#include <linux/percpu_counter.h> 24#include <linux/percpu_counter.h>
25#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/hardirq.h>
27#include <linux/task_work.h> 26#include <linux/task_work.h>
28#include <linux/ima.h> 27#include <linux/ima.h>
29#include <linux/swap.h> 28#include <linux/swap.h>
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index f989efa051a0..48b24bb50d02 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -332,9 +332,13 @@ vxfs_init(void)
332{ 332{
333 int rv; 333 int rv;
334 334
335 vxfs_inode_cachep = kmem_cache_create("vxfs_inode", 335 vxfs_inode_cachep = kmem_cache_create_usercopy("vxfs_inode",
336 sizeof(struct vxfs_inode_info), 0, 336 sizeof(struct vxfs_inode_info), 0,
337 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 337 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
338 offsetof(struct vxfs_inode_info, vii_immed.vi_immed),
339 sizeof_field(struct vxfs_inode_info,
340 vii_immed.vi_immed),
341 NULL);
338 if (!vxfs_inode_cachep) 342 if (!vxfs_inode_cachep)
339 return -ENOMEM; 343 return -ENOMEM;
340 rv = register_filesystem(&vxfs_fs_type); 344 rv = register_filesystem(&vxfs_fs_type);
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 43c827a7cce5..3ed2b088dcfd 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -3,6 +3,7 @@ config GFS2_FS
3 depends on (64BIT || LBDAF) 3 depends on (64BIT || LBDAF)
4 select FS_POSIX_ACL 4 select FS_POSIX_ACL
5 select CRC32 5 select CRC32
6 select LIBCRC32C
6 select QUOTACTL 7 select QUOTACTL
7 select FS_IOMAP 8 select FS_IOMAP
8 help 9 help
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1daf15a1f00c..2f725b4a386b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -39,18 +39,21 @@
39 39
40 40
41static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, 41static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
42 unsigned int from, unsigned int to) 42 unsigned int from, unsigned int len)
43{ 43{
44 struct buffer_head *head = page_buffers(page); 44 struct buffer_head *head = page_buffers(page);
45 unsigned int bsize = head->b_size; 45 unsigned int bsize = head->b_size;
46 struct buffer_head *bh; 46 struct buffer_head *bh;
47 unsigned int to = from + len;
47 unsigned int start, end; 48 unsigned int start, end;
48 49
49 for (bh = head, start = 0; bh != head || !start; 50 for (bh = head, start = 0; bh != head || !start;
50 bh = bh->b_this_page, start = end) { 51 bh = bh->b_this_page, start = end) {
51 end = start + bsize; 52 end = start + bsize;
52 if (end <= from || start >= to) 53 if (end <= from)
53 continue; 54 continue;
55 if (start >= to)
56 break;
54 if (gfs2_is_jdata(ip)) 57 if (gfs2_is_jdata(ip))
55 set_buffer_uptodate(bh); 58 set_buffer_uptodate(bh);
56 gfs2_trans_add_data(ip->i_gl, bh); 59 gfs2_trans_add_data(ip->i_gl, bh);
@@ -189,7 +192,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
189 create_empty_buffers(page, inode->i_sb->s_blocksize, 192 create_empty_buffers(page, inode->i_sb->s_blocksize,
190 BIT(BH_Dirty)|BIT(BH_Uptodate)); 193 BIT(BH_Dirty)|BIT(BH_Uptodate));
191 } 194 }
192 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); 195 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
193 } 196 }
194 return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc); 197 return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
195} 198}
@@ -255,7 +258,6 @@ static int gfs2_writepages(struct address_space *mapping,
255 * @wbc: The writeback control 258 * @wbc: The writeback control
256 * @pvec: The vector of pages 259 * @pvec: The vector of pages
257 * @nr_pages: The number of pages to write 260 * @nr_pages: The number of pages to write
258 * @end: End position
259 * @done_index: Page index 261 * @done_index: Page index
260 * 262 *
261 * Returns: non-zero if loop should terminate, zero otherwise 263 * Returns: non-zero if loop should terminate, zero otherwise
@@ -264,7 +266,7 @@ static int gfs2_writepages(struct address_space *mapping,
264static int gfs2_write_jdata_pagevec(struct address_space *mapping, 266static int gfs2_write_jdata_pagevec(struct address_space *mapping,
265 struct writeback_control *wbc, 267 struct writeback_control *wbc,
266 struct pagevec *pvec, 268 struct pagevec *pvec,
267 int nr_pages, pgoff_t end, 269 int nr_pages,
268 pgoff_t *done_index) 270 pgoff_t *done_index)
269{ 271{
270 struct inode *inode = mapping->host; 272 struct inode *inode = mapping->host;
@@ -402,7 +404,7 @@ retry:
402 if (nr_pages == 0) 404 if (nr_pages == 0)
403 break; 405 break;
404 406
405 ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index); 407 ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, &done_index);
406 if (ret) 408 if (ret)
407 done = 1; 409 done = 1;
408 if (ret > 0) 410 if (ret > 0)
@@ -446,7 +448,8 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
446 448
447 ret = gfs2_write_cache_jdata(mapping, wbc); 449 ret = gfs2_write_cache_jdata(mapping, wbc);
448 if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { 450 if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
449 gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); 451 gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
452 GFS2_LFC_JDATA_WPAGES);
450 ret = gfs2_write_cache_jdata(mapping, wbc); 453 ret = gfs2_write_cache_jdata(mapping, wbc);
451 } 454 }
452 return ret; 455 return ret;
@@ -483,8 +486,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
483 return error; 486 return error;
484 487
485 kaddr = kmap_atomic(page); 488 kaddr = kmap_atomic(page);
486 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 489 if (dsize > gfs2_max_stuffed_size(ip))
487 dsize = (dibh->b_size - sizeof(struct gfs2_dinode)); 490 dsize = gfs2_max_stuffed_size(ip);
488 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 491 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
489 memset(kaddr + dsize, 0, PAGE_SIZE - dsize); 492 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
490 kunmap_atomic(kaddr); 493 kunmap_atomic(kaddr);
@@ -501,10 +504,9 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
501 * @file: The file to read a page for 504 * @file: The file to read a page for
502 * @page: The page to read 505 * @page: The page to read
503 * 506 *
504 * This is the core of gfs2's readpage. Its used by the internal file 507 * This is the core of gfs2's readpage. It's used by the internal file
505 * reading code as in that case we already hold the glock. Also its 508 * reading code as in that case we already hold the glock. Also it's
506 * called by gfs2_readpage() once the required lock has been granted. 509 * called by gfs2_readpage() once the required lock has been granted.
507 *
508 */ 510 */
509 511
510static int __gfs2_readpage(void *file, struct page *page) 512static int __gfs2_readpage(void *file, struct page *page)
@@ -725,7 +727,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
725 727
726 if (gfs2_is_stuffed(ip)) { 728 if (gfs2_is_stuffed(ip)) {
727 error = 0; 729 error = 0;
728 if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { 730 if (pos + len > gfs2_max_stuffed_size(ip)) {
729 error = gfs2_unstuff_dinode(ip, page); 731 error = gfs2_unstuff_dinode(ip, page);
730 if (error == 0) 732 if (error == 0)
731 goto prepare_write; 733 goto prepare_write;
@@ -832,7 +834,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
832 void *kaddr; 834 void *kaddr;
833 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); 835 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
834 836
835 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); 837 BUG_ON(pos + len > gfs2_max_stuffed_size(ip));
838
836 kaddr = kmap_atomic(page); 839 kaddr = kmap_atomic(page);
837 memcpy(buf + pos, kaddr + pos, copied); 840 memcpy(buf + pos, kaddr + pos, copied);
838 flush_dcache_page(page); 841 flush_dcache_page(page);
@@ -890,8 +893,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
890 struct gfs2_sbd *sdp = GFS2_SB(inode); 893 struct gfs2_sbd *sdp = GFS2_SB(inode);
891 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 894 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
892 struct buffer_head *dibh; 895 struct buffer_head *dibh;
893 unsigned int from = pos & (PAGE_SIZE - 1);
894 unsigned int to = from + len;
895 int ret; 896 int ret;
896 struct gfs2_trans *tr = current->journal_info; 897 struct gfs2_trans *tr = current->journal_info;
897 BUG_ON(!tr); 898 BUG_ON(!tr);
@@ -909,7 +910,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
909 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); 910 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
910 911
911 if (!gfs2_is_writeback(ip)) 912 if (!gfs2_is_writeback(ip))
912 gfs2_page_add_databufs(ip, page, from, to); 913 gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
913 914
914 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); 915 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
915 if (tr->tr_num_buf_new) 916 if (tr->tr_num_buf_new)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d5f0d96169c5..86863792f36a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -69,8 +69,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
69 void *kaddr = kmap(page); 69 void *kaddr = kmap(page);
70 u64 dsize = i_size_read(inode); 70 u64 dsize = i_size_read(inode);
71 71
72 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 72 if (dsize > gfs2_max_stuffed_size(ip))
73 dsize = dibh->b_size - sizeof(struct gfs2_dinode); 73 dsize = gfs2_max_stuffed_size(ip);
74 74
75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize); 76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
@@ -279,14 +279,13 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp
279 return p + mp->mp_list[height]; 279 return p + mp->mp_list[height];
280} 280}
281 281
282static void gfs2_metapath_ra(struct gfs2_glock *gl, 282static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
283 const struct buffer_head *bh, const __be64 *pos)
284{ 283{
285 struct buffer_head *rabh;
286 const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
287 const __be64 *t; 284 const __be64 *t;
288 285
289 for (t = pos; t < endp; t++) { 286 for (t = start; t < end; t++) {
287 struct buffer_head *rabh;
288
290 if (!*t) 289 if (!*t)
291 continue; 290 continue;
292 291
@@ -305,21 +304,22 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
305 } 304 }
306} 305}
307 306
308/** 307static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
309 * lookup_mp_height - helper function for lookup_metapath 308 unsigned int x, unsigned int h)
310 * @ip: the inode
311 * @mp: the metapath
312 * @h: the height which needs looking up
313 */
314static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h)
315{ 309{
316 __be64 *ptr = metapointer(h, mp); 310 for (; x < h; x++) {
317 u64 dblock = be64_to_cpu(*ptr); 311 __be64 *ptr = metapointer(x, mp);
318 312 u64 dblock = be64_to_cpu(*ptr);
319 if (!dblock) 313 int ret;
320 return h + 1;
321 314
322 return gfs2_meta_indirect_buffer(ip, h + 1, dblock, &mp->mp_bh[h + 1]); 315 if (!dblock)
316 break;
317 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
318 if (ret)
319 return ret;
320 }
321 mp->mp_aheight = x + 1;
322 return 0;
323} 323}
324 324
325/** 325/**
@@ -336,25 +336,12 @@ static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h)
336 * at which it found the unallocated block. Blocks which are found are 336 * at which it found the unallocated block. Blocks which are found are
337 * added to the mp->mp_bh[] list. 337 * added to the mp->mp_bh[] list.
338 * 338 *
339 * Returns: error or height of metadata tree 339 * Returns: error
340 */ 340 */
341 341
342static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) 342static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
343{ 343{
344 unsigned int end_of_metadata = ip->i_height - 1; 344 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
345 unsigned int x;
346 int ret;
347
348 for (x = 0; x < end_of_metadata; x++) {
349 ret = lookup_mp_height(ip, mp, x);
350 if (ret)
351 goto out;
352 }
353
354 ret = ip->i_height;
355out:
356 mp->mp_aheight = ret;
357 return ret;
358} 345}
359 346
360/** 347/**
@@ -365,25 +352,25 @@ out:
365 * 352 *
366 * Similar to lookup_metapath, but does lookups for a range of heights 353 * Similar to lookup_metapath, but does lookups for a range of heights
367 * 354 *
368 * Returns: error or height of metadata tree 355 * Returns: error or the number of buffers filled
369 */ 356 */
370 357
371static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) 358static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
372{ 359{
373 unsigned int start_h = h - 1; 360 unsigned int x = 0;
374 int ret; 361 int ret;
375 362
376 if (h) { 363 if (h) {
377 /* find the first buffer we need to look up. */ 364 /* find the first buffer we need to look up. */
378 while (start_h > 0 && mp->mp_bh[start_h] == NULL) 365 for (x = h - 1; x > 0; x--) {
379 start_h--; 366 if (mp->mp_bh[x])
380 for (; start_h < h; start_h++) { 367 break;
381 ret = lookup_mp_height(ip, mp, start_h);
382 if (ret)
383 return ret;
384 } 368 }
385 } 369 }
386 return ip->i_height; 370 ret = __fillup_metapath(ip, mp, x, h);
371 if (ret)
372 return ret;
373 return mp->mp_aheight - x - 1;
387} 374}
388 375
389static inline void release_metapath(struct metapath *mp) 376static inline void release_metapath(struct metapath *mp)
@@ -474,13 +461,6 @@ enum alloc_state {
474 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ 461 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
475}; 462};
476 463
477static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
478{
479 if (hgt)
480 return sdp->sd_inptrs;
481 return sdp->sd_diptrs;
482}
483
484/** 464/**
485 * gfs2_bmap_alloc - Build a metadata tree of the requested height 465 * gfs2_bmap_alloc - Build a metadata tree of the requested height
486 * @inode: The GFS2 inode 466 * @inode: The GFS2 inode
@@ -788,7 +768,7 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
788 goto do_alloc; 768 goto do_alloc;
789 769
790 ret = lookup_metapath(ip, &mp); 770 ret = lookup_metapath(ip, &mp);
791 if (ret < 0) 771 if (ret)
792 goto out_release; 772 goto out_release;
793 773
794 if (mp.mp_aheight != ip->i_height) 774 if (mp.mp_aheight != ip->i_height)
@@ -913,17 +893,18 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
913} 893}
914 894
915/** 895/**
916 * gfs2_block_truncate_page - Deal with zeroing out data for truncate 896 * gfs2_block_zero_range - Deal with zeroing out data
917 * 897 *
918 * This is partly borrowed from ext3. 898 * This is partly borrowed from ext3.
919 */ 899 */
920static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) 900static int gfs2_block_zero_range(struct inode *inode, loff_t from,
901 unsigned int length)
921{ 902{
922 struct inode *inode = mapping->host; 903 struct address_space *mapping = inode->i_mapping;
923 struct gfs2_inode *ip = GFS2_I(inode); 904 struct gfs2_inode *ip = GFS2_I(inode);
924 unsigned long index = from >> PAGE_SHIFT; 905 unsigned long index = from >> PAGE_SHIFT;
925 unsigned offset = from & (PAGE_SIZE-1); 906 unsigned offset = from & (PAGE_SIZE-1);
926 unsigned blocksize, iblock, length, pos; 907 unsigned blocksize, iblock, pos;
927 struct buffer_head *bh; 908 struct buffer_head *bh;
928 struct page *page; 909 struct page *page;
929 int err; 910 int err;
@@ -933,7 +914,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
933 return 0; 914 return 0;
934 915
935 blocksize = inode->i_sb->s_blocksize; 916 blocksize = inode->i_sb->s_blocksize;
936 length = blocksize - (offset & (blocksize - 1));
937 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); 917 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
938 918
939 if (!page_has_buffers(page)) 919 if (!page_has_buffers(page))
@@ -1003,11 +983,24 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize
1003 int error; 983 int error;
1004 984
1005 while (oldsize != newsize) { 985 while (oldsize != newsize) {
986 struct gfs2_trans *tr;
987 unsigned int offs;
988
1006 chunk = oldsize - newsize; 989 chunk = oldsize - newsize;
1007 if (chunk > max_chunk) 990 if (chunk > max_chunk)
1008 chunk = max_chunk; 991 chunk = max_chunk;
992
993 offs = oldsize & ~PAGE_MASK;
994 if (offs && chunk > PAGE_SIZE)
995 chunk = offs + ((chunk - offs) & PAGE_MASK);
996
1009 truncate_pagecache(inode, oldsize - chunk); 997 truncate_pagecache(inode, oldsize - chunk);
1010 oldsize -= chunk; 998 oldsize -= chunk;
999
1000 tr = current->journal_info;
1001 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1002 continue;
1003
1011 gfs2_trans_end(sdp); 1004 gfs2_trans_end(sdp);
1012 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 1005 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1013 if (error) 1006 if (error)
@@ -1017,13 +1010,13 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize
1017 return 0; 1010 return 0;
1018} 1011}
1019 1012
1020static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) 1013static int trunc_start(struct inode *inode, u64 newsize)
1021{ 1014{
1022 struct gfs2_inode *ip = GFS2_I(inode); 1015 struct gfs2_inode *ip = GFS2_I(inode);
1023 struct gfs2_sbd *sdp = GFS2_SB(inode); 1016 struct gfs2_sbd *sdp = GFS2_SB(inode);
1024 struct address_space *mapping = inode->i_mapping; 1017 struct buffer_head *dibh = NULL;
1025 struct buffer_head *dibh;
1026 int journaled = gfs2_is_jdata(ip); 1018 int journaled = gfs2_is_jdata(ip);
1019 u64 oldsize = inode->i_size;
1027 int error; 1020 int error;
1028 1021
1029 if (journaled) 1022 if (journaled)
@@ -1042,10 +1035,13 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1042 if (gfs2_is_stuffed(ip)) { 1035 if (gfs2_is_stuffed(ip)) {
1043 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1036 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1044 } else { 1037 } else {
1045 if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { 1038 unsigned int blocksize = i_blocksize(inode);
1046 error = gfs2_block_truncate_page(mapping, newsize); 1039 unsigned int offs = newsize & (blocksize - 1);
1040 if (offs) {
1041 error = gfs2_block_zero_range(inode, newsize,
1042 blocksize - offs);
1047 if (error) 1043 if (error)
1048 goto out_brelse; 1044 goto out;
1049 } 1045 }
1050 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; 1046 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1051 } 1047 }
@@ -1059,15 +1055,10 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1059 else 1055 else
1060 truncate_pagecache(inode, newsize); 1056 truncate_pagecache(inode, newsize);
1061 1057
1062 if (error) {
1063 brelse(dibh);
1064 return error;
1065 }
1066
1067out_brelse:
1068 brelse(dibh);
1069out: 1058out:
1070 gfs2_trans_end(sdp); 1059 brelse(dibh);
1060 if (current->journal_info)
1061 gfs2_trans_end(sdp);
1071 return error; 1062 return error;
1072} 1063}
1073 1064
@@ -1075,10 +1066,11 @@ out:
1075 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein 1066 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1076 * @ip: inode 1067 * @ip: inode
1077 * @rg_gh: holder of resource group glock 1068 * @rg_gh: holder of resource group glock
1078 * @mp: current metapath fully populated with buffers 1069 * @bh: buffer head to sweep
1070 * @start: starting point in bh
1071 * @end: end point in bh
1072 * @meta: true if bh points to metadata (rather than data)
1079 * @btotal: place to keep count of total blocks freed 1073 * @btotal: place to keep count of total blocks freed
1080 * @hgt: height we're processing
1081 * @first: true if this is the first call to this function for this height
1082 * 1074 *
1083 * We sweep a metadata buffer (provided by the metapath) for blocks we need to 1075 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1084 * free, and free them all. However, we do it one rgrp at a time. If this 1076 * free, and free them all. However, we do it one rgrp at a time. If this
@@ -1093,47 +1085,46 @@ out:
1093 * *btotal has the total number of blocks freed 1085 * *btotal has the total number of blocks freed
1094 */ 1086 */
1095static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, 1087static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1096 const struct metapath *mp, u32 *btotal, int hgt, 1088 struct buffer_head *bh, __be64 *start, __be64 *end,
1097 bool preserve1) 1089 bool meta, u32 *btotal)
1098{ 1090{
1099 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1091 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1100 struct gfs2_rgrpd *rgd; 1092 struct gfs2_rgrpd *rgd;
1101 struct gfs2_trans *tr; 1093 struct gfs2_trans *tr;
1102 struct buffer_head *bh = mp->mp_bh[hgt]; 1094 __be64 *p;
1103 __be64 *top, *bottom, *p;
1104 int blks_outside_rgrp; 1095 int blks_outside_rgrp;
1105 u64 bn, bstart, isize_blks; 1096 u64 bn, bstart, isize_blks;
1106 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ 1097 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1107 int meta = ((hgt != ip->i_height - 1) ? 1 : 0);
1108 int ret = 0; 1098 int ret = 0;
1109 bool buf_in_tr = false; /* buffer was added to transaction */ 1099 bool buf_in_tr = false; /* buffer was added to transaction */
1110 1100
1111 if (gfs2_metatype_check(sdp, bh,
1112 (hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)))
1113 return -EIO;
1114
1115more_rgrps: 1101more_rgrps:
1102 rgd = NULL;
1103 if (gfs2_holder_initialized(rd_gh)) {
1104 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1105 gfs2_assert_withdraw(sdp,
1106 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1107 }
1116 blks_outside_rgrp = 0; 1108 blks_outside_rgrp = 0;
1117 bstart = 0; 1109 bstart = 0;
1118 blen = 0; 1110 blen = 0;
1119 top = metapointer(hgt, mp); /* first ptr from metapath */
1120 /* If we're keeping some data at the truncation point, we've got to
1121 preserve the metadata tree by adding 1 to the starting metapath. */
1122 if (preserve1)
1123 top++;
1124 1111
1125 bottom = (__be64 *)(bh->b_data + bh->b_size); 1112 for (p = start; p < end; p++) {
1126
1127 for (p = top; p < bottom; p++) {
1128 if (!*p) 1113 if (!*p)
1129 continue; 1114 continue;
1130 bn = be64_to_cpu(*p); 1115 bn = be64_to_cpu(*p);
1131 if (gfs2_holder_initialized(rd_gh)) { 1116
1132 rgd = gfs2_glock2rgrp(rd_gh->gh_gl); 1117 if (rgd) {
1133 gfs2_assert_withdraw(sdp, 1118 if (!rgrp_contains_block(rgd, bn)) {
1134 gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); 1119 blks_outside_rgrp++;
1120 continue;
1121 }
1135 } else { 1122 } else {
1136 rgd = gfs2_blk2rgrpd(sdp, bn, false); 1123 rgd = gfs2_blk2rgrpd(sdp, bn, true);
1124 if (unlikely(!rgd)) {
1125 ret = -EIO;
1126 goto out;
1127 }
1137 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1128 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1138 0, rd_gh); 1129 0, rd_gh);
1139 if (ret) 1130 if (ret)
@@ -1145,11 +1136,6 @@ more_rgrps:
1145 gfs2_rs_deltree(&ip->i_res); 1136 gfs2_rs_deltree(&ip->i_res);
1146 } 1137 }
1147 1138
1148 if (!rgrp_contains_block(rgd, bn)) {
1149 blks_outside_rgrp++;
1150 continue;
1151 }
1152
1153 /* The size of our transactions will be unknown until we 1139 /* The size of our transactions will be unknown until we
1154 actually process all the metadata blocks that relate to 1140 actually process all the metadata blocks that relate to
1155 the rgrp. So we estimate. We know it can't be more than 1141 the rgrp. So we estimate. We know it can't be more than
@@ -1168,7 +1154,7 @@ more_rgrps:
1168 jblocks_rqsted += isize_blks; 1154 jblocks_rqsted += isize_blks;
1169 revokes = jblocks_rqsted; 1155 revokes = jblocks_rqsted;
1170 if (meta) 1156 if (meta)
1171 revokes += hptrs(sdp, hgt); 1157 revokes += end - start;
1172 else if (ip->i_depth) 1158 else if (ip->i_depth)
1173 revokes += sdp->sd_inptrs; 1159 revokes += sdp->sd_inptrs;
1174 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); 1160 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
@@ -1226,7 +1212,11 @@ out_unlock:
1226 outside the rgrp we just processed, 1212 outside the rgrp we just processed,
1227 do it all over again. */ 1213 do it all over again. */
1228 if (current->journal_info) { 1214 if (current->journal_info) {
1229 struct buffer_head *dibh = mp->mp_bh[0]; 1215 struct buffer_head *dibh;
1216
1217 ret = gfs2_meta_inode_buffer(ip, &dibh);
1218 if (ret)
1219 goto out;
1230 1220
1231 /* Every transaction boundary, we rewrite the dinode 1221 /* Every transaction boundary, we rewrite the dinode
1232 to keep its di_blocks current in case of failure. */ 1222 to keep its di_blocks current in case of failure. */
@@ -1234,6 +1224,7 @@ out_unlock:
1234 current_time(&ip->i_inode); 1224 current_time(&ip->i_inode);
1235 gfs2_trans_add_meta(ip->i_gl, dibh); 1225 gfs2_trans_add_meta(ip->i_gl, dibh);
1236 gfs2_dinode_out(ip, dibh->b_data); 1226 gfs2_dinode_out(ip, dibh->b_data);
1227 brelse(dibh);
1237 up_write(&ip->i_rw_mutex); 1228 up_write(&ip->i_rw_mutex);
1238 gfs2_trans_end(sdp); 1229 gfs2_trans_end(sdp);
1239 } 1230 }
@@ -1245,38 +1236,48 @@ out:
1245 return ret; 1236 return ret;
1246} 1237}
1247 1238
1239static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1240{
1241 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1242 return false;
1243 return true;
1244}
1245
1248/** 1246/**
1249 * find_nonnull_ptr - find a non-null pointer given a metapath and height 1247 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1250 * assumes the metapath is valid (with buffers) out to height h
1251 * @mp: starting metapath 1248 * @mp: starting metapath
1252 * @h: desired height to search 1249 * @h: desired height to search
1253 * 1250 *
1251 * Assumes the metapath is valid (with buffers) out to height h.
1254 * Returns: true if a non-null pointer was found in the metapath buffer 1252 * Returns: true if a non-null pointer was found in the metapath buffer
1255 * false if all remaining pointers are NULL in the buffer 1253 * false if all remaining pointers are NULL in the buffer
1256 */ 1254 */
1257static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, 1255static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1258 unsigned int h) 1256 unsigned int h,
1257 __u16 *end_list, unsigned int end_aligned)
1259{ 1258{
1260 __be64 *ptr; 1259 struct buffer_head *bh = mp->mp_bh[h];
1261 unsigned int ptrs = hptrs(sdp, h) - 1; 1260 __be64 *first, *ptr, *end;
1261
1262 first = metaptr1(h, mp);
1263 ptr = first + mp->mp_list[h];
1264 end = (__be64 *)(bh->b_data + bh->b_size);
1265 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1266 bool keep_end = h < end_aligned;
1267 end = first + end_list[h] + keep_end;
1268 }
1262 1269
1263 while (true) { 1270 while (ptr < end) {
1264 ptr = metapointer(h, mp);
1265 if (*ptr) { /* if we have a non-null pointer */ 1271 if (*ptr) { /* if we have a non-null pointer */
1266 /* Now zero the metapath after the current height. */ 1272 mp->mp_list[h] = ptr - first;
1267 h++; 1273 h++;
1268 if (h < GFS2_MAX_META_HEIGHT) 1274 if (h < GFS2_MAX_META_HEIGHT)
1269 memset(&mp->mp_list[h], 0, 1275 mp->mp_list[h] = 0;
1270 (GFS2_MAX_META_HEIGHT - h) *
1271 sizeof(mp->mp_list[0]));
1272 return true; 1276 return true;
1273 } 1277 }
1274 1278 ptr++;
1275 if (mp->mp_list[h] < ptrs)
1276 mp->mp_list[h]++;
1277 else
1278 return false; /* no more pointers in this buffer */
1279 } 1279 }
1280 return false;
1280} 1281}
1281 1282
1282enum dealloc_states { 1283enum dealloc_states {
@@ -1286,49 +1287,126 @@ enum dealloc_states {
1286 DEALLOC_DONE = 3, /* process complete */ 1287 DEALLOC_DONE = 3, /* process complete */
1287}; 1288};
1288 1289
1289static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h) 1290static inline void
1291metapointer_range(struct metapath *mp, int height,
1292 __u16 *start_list, unsigned int start_aligned,
1293 __u16 *end_list, unsigned int end_aligned,
1294 __be64 **start, __be64 **end)
1290{ 1295{
1291 if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0]))) 1296 struct buffer_head *bh = mp->mp_bh[height];
1292 return false; 1297 __be64 *first;
1293 return true; 1298
1299 first = metaptr1(height, mp);
1300 *start = first;
1301 if (mp_eq_to_hgt(mp, start_list, height)) {
1302 bool keep_start = height < start_aligned;
1303 *start = first + start_list[height] + keep_start;
1304 }
1305 *end = (__be64 *)(bh->b_data + bh->b_size);
1306 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1307 bool keep_end = height < end_aligned;
1308 *end = first + end_list[height] + keep_end;
1309 }
1310}
1311
1312static inline bool walk_done(struct gfs2_sbd *sdp,
1313 struct metapath *mp, int height,
1314 __u16 *end_list, unsigned int end_aligned)
1315{
1316 __u16 end;
1317
1318 if (end_list) {
1319 bool keep_end = height < end_aligned;
1320 if (!mp_eq_to_hgt(mp, end_list, height))
1321 return false;
1322 end = end_list[height] + keep_end;
1323 } else
1324 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1325 return mp->mp_list[height] >= end;
1294} 1326}
1295 1327
1296/** 1328/**
1297 * trunc_dealloc - truncate a file down to a desired size 1329 * punch_hole - deallocate blocks in a file
1298 * @ip: inode to truncate 1330 * @ip: inode to truncate
1299 * @newsize: The desired size of the file 1331 * @offset: the start of the hole
1332 * @length: the size of the hole (or 0 for truncate)
1333 *
1334 * Punch a hole into a file or truncate a file at a given position. This
1335 * function operates in whole blocks (@offset and @length are rounded
1336 * accordingly); partially filled blocks must be cleared otherwise.
1300 * 1337 *
1301 * This function truncates a file to newsize. It works from the 1338 * This function works from the bottom up, and from the right to the left. In
1302 * bottom up, and from the right to the left. In other words, it strips off 1339 * other words, it strips off the highest layer (data) before stripping any of
1303 * the highest layer (data) before stripping any of the metadata. Doing it 1340 * the metadata. Doing it this way is best in case the operation is interrupted
1304 * this way is best in case the operation is interrupted by power failure, etc. 1341 * by power failure, etc. The dinode is rewritten in every transaction to
1305 * The dinode is rewritten in every transaction to guarantee integrity. 1342 * guarantee integrity.
1306 */ 1343 */
1307static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) 1344static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1308{ 1345{
1309 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1346 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1310 struct metapath mp; 1347 struct metapath mp = {};
1311 struct buffer_head *dibh, *bh; 1348 struct buffer_head *dibh, *bh;
1312 struct gfs2_holder rd_gh; 1349 struct gfs2_holder rd_gh;
1313 u64 lblock; 1350 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1314 __u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */ 1351 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1352 __u16 start_list[GFS2_MAX_META_HEIGHT];
1353 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1354 unsigned int start_aligned, uninitialized_var(end_aligned);
1315 unsigned int strip_h = ip->i_height - 1; 1355 unsigned int strip_h = ip->i_height - 1;
1316 u32 btotal = 0; 1356 u32 btotal = 0;
1317 int ret, state; 1357 int ret, state;
1318 int mp_h; /* metapath buffers are read in to this height */ 1358 int mp_h; /* metapath buffers are read in to this height */
1319 sector_t last_ra = 0;
1320 u64 prev_bnr = 0; 1359 u64 prev_bnr = 0;
1321 bool preserve1; /* need to preserve the first meta pointer? */ 1360 __be64 *start, *end;
1322 1361
1323 if (!newsize) 1362 /*
1324 lblock = 0; 1363 * The start position of the hole is defined by lblock, start_list, and
1325 else 1364 * start_aligned. The end position of the hole is defined by lend,
1326 lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift; 1365 * end_list, and end_aligned.
1366 *
1367 * start_aligned and end_aligned define down to which height the start
1368 * and end positions are aligned to the metadata tree (i.e., the
1369 * position is a multiple of the metadata granularity at the height
1370 * above). This determines at which heights additional meta pointers
1371 * needs to be preserved for the remaining data.
1372 */
1373
1374 if (length) {
1375 u64 maxsize = sdp->sd_heightsize[ip->i_height];
1376 u64 end_offset = offset + length;
1377 u64 lend;
1378
1379 /*
1380 * Clip the end at the maximum file size for the given height:
1381 * that's how far the metadata goes; files bigger than that
1382 * will have additional layers of indirection.
1383 */
1384 if (end_offset > maxsize)
1385 end_offset = maxsize;
1386 lend = end_offset >> bsize_shift;
1387
1388 if (lblock >= lend)
1389 return 0;
1390
1391 find_metapath(sdp, lend, &mp, ip->i_height);
1392 end_list = __end_list;
1393 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1394
1395 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1396 if (end_list[mp_h])
1397 break;
1398 }
1399 end_aligned = mp_h;
1400 }
1327 1401
1328 memset(&mp, 0, sizeof(mp));
1329 find_metapath(sdp, lblock, &mp, ip->i_height); 1402 find_metapath(sdp, lblock, &mp, ip->i_height);
1403 memcpy(start_list, mp.mp_list, sizeof(start_list));
1330 1404
1331 memcpy(&nbof, &mp.mp_list, sizeof(nbof)); 1405 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1406 if (start_list[mp_h])
1407 break;
1408 }
1409 start_aligned = mp_h;
1332 1410
1333 ret = gfs2_meta_inode_buffer(ip, &dibh); 1411 ret = gfs2_meta_inode_buffer(ip, &dibh);
1334 if (ret) 1412 if (ret)
@@ -1336,7 +1414,17 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1336 1414
1337 mp.mp_bh[0] = dibh; 1415 mp.mp_bh[0] = dibh;
1338 ret = lookup_metapath(ip, &mp); 1416 ret = lookup_metapath(ip, &mp);
1339 if (ret == ip->i_height) 1417 if (ret)
1418 goto out_metapath;
1419
1420 /* issue read-ahead on metadata */
1421 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1422 metapointer_range(&mp, mp_h, start_list, start_aligned,
1423 end_list, end_aligned, &start, &end);
1424 gfs2_metapath_ra(ip->i_gl, start, end);
1425 }
1426
1427 if (mp.mp_aheight == ip->i_height)
1340 state = DEALLOC_MP_FULL; /* We have a complete metapath */ 1428 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1341 else 1429 else
1342 state = DEALLOC_FILL_MP; /* deal with partial metapath */ 1430 state = DEALLOC_FILL_MP; /* deal with partial metapath */
@@ -1357,20 +1445,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1357 /* Truncate a full metapath at the given strip height. 1445 /* Truncate a full metapath at the given strip height.
1358 * Note that strip_h == mp_h in order to be in this state. */ 1446 * Note that strip_h == mp_h in order to be in this state. */
1359 case DEALLOC_MP_FULL: 1447 case DEALLOC_MP_FULL:
1360 if (mp_h > 0) { /* issue read-ahead on metadata */
1361 __be64 *top;
1362
1363 bh = mp.mp_bh[mp_h - 1];
1364 if (bh->b_blocknr != last_ra) {
1365 last_ra = bh->b_blocknr;
1366 top = metaptr1(mp_h - 1, &mp);
1367 gfs2_metapath_ra(ip->i_gl, bh, top);
1368 }
1369 }
1370 /* If we're truncating to a non-zero size and the mp is
1371 at the beginning of file for the strip height, we
1372 need to preserve the first metadata pointer. */
1373 preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
1374 bh = mp.mp_bh[mp_h]; 1448 bh = mp.mp_bh[mp_h];
1375 gfs2_assert_withdraw(sdp, bh); 1449 gfs2_assert_withdraw(sdp, bh);
1376 if (gfs2_assert_withdraw(sdp, 1450 if (gfs2_assert_withdraw(sdp,
@@ -1382,8 +1456,28 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1382 prev_bnr, ip->i_height, strip_h, mp_h); 1456 prev_bnr, ip->i_height, strip_h, mp_h);
1383 } 1457 }
1384 prev_bnr = bh->b_blocknr; 1458 prev_bnr = bh->b_blocknr;
1385 ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal, 1459
1386 mp_h, preserve1); 1460 if (gfs2_metatype_check(sdp, bh,
1461 (mp_h ? GFS2_METATYPE_IN :
1462 GFS2_METATYPE_DI))) {
1463 ret = -EIO;
1464 goto out;
1465 }
1466
1467 /*
1468 * Below, passing end_aligned as 0 gives us the
1469 * metapointer range excluding the end point: the end
1470 * point is the first metapath we must not deallocate!
1471 */
1472
1473 metapointer_range(&mp, mp_h, start_list, start_aligned,
1474 end_list, 0 /* end_aligned */,
1475 &start, &end);
1476 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1477 start, end,
1478 mp_h != ip->i_height - 1,
1479 &btotal);
1480
1387 /* If we hit an error or just swept dinode buffer, 1481 /* If we hit an error or just swept dinode buffer,
1388 just exit. */ 1482 just exit. */
1389 if (ret || !mp_h) { 1483 if (ret || !mp_h) {
@@ -1407,20 +1501,20 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1407 stripping the previous level of metadata. */ 1501 stripping the previous level of metadata. */
1408 if (mp_h == 0) { 1502 if (mp_h == 0) {
1409 strip_h--; 1503 strip_h--;
1410 memcpy(&mp.mp_list, &nbof, sizeof(nbof)); 1504 memcpy(mp.mp_list, start_list, sizeof(start_list));
1411 mp_h = strip_h; 1505 mp_h = strip_h;
1412 state = DEALLOC_FILL_MP; 1506 state = DEALLOC_FILL_MP;
1413 break; 1507 break;
1414 } 1508 }
1415 mp.mp_list[mp_h] = 0; 1509 mp.mp_list[mp_h] = 0;
1416 mp_h--; /* search one metadata height down */ 1510 mp_h--; /* search one metadata height down */
1417 if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1)
1418 break; /* loop around in the same state */
1419 mp.mp_list[mp_h]++; 1511 mp.mp_list[mp_h]++;
1512 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1513 break;
1420 /* Here we've found a part of the metapath that is not 1514 /* Here we've found a part of the metapath that is not
1421 * allocated. We need to search at that height for the 1515 * allocated. We need to search at that height for the
1422 * next non-null pointer. */ 1516 * next non-null pointer. */
1423 if (find_nonnull_ptr(sdp, &mp, mp_h)) { 1517 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1424 state = DEALLOC_FILL_MP; 1518 state = DEALLOC_FILL_MP;
1425 mp_h++; 1519 mp_h++;
1426 } 1520 }
@@ -1435,18 +1529,29 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1435 if (ret < 0) 1529 if (ret < 0)
1436 goto out; 1530 goto out;
1437 1531
1532 /* issue read-ahead on metadata */
1533 if (mp.mp_aheight > 1) {
1534 for (; ret > 1; ret--) {
1535 metapointer_range(&mp, mp.mp_aheight - ret,
1536 start_list, start_aligned,
1537 end_list, end_aligned,
1538 &start, &end);
1539 gfs2_metapath_ra(ip->i_gl, start, end);
1540 }
1541 }
1542
1438 /* If buffers found for the entire strip height */ 1543 /* If buffers found for the entire strip height */
1439 if ((ret == ip->i_height) && (mp_h == strip_h)) { 1544 if (mp.mp_aheight - 1 == strip_h) {
1440 state = DEALLOC_MP_FULL; 1545 state = DEALLOC_MP_FULL;
1441 break; 1546 break;
1442 } 1547 }
1443 if (ret < ip->i_height) /* We have a partial height */ 1548 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1444 mp_h = ret - 1; 1549 mp_h = mp.mp_aheight - 1;
1445 1550
1446 /* If we find a non-null block pointer, crawl a bit 1551 /* If we find a non-null block pointer, crawl a bit
1447 higher up in the metapath and try again, otherwise 1552 higher up in the metapath and try again, otherwise
1448 we need to look lower for a new starting point. */ 1553 we need to look lower for a new starting point. */
1449 if (find_nonnull_ptr(sdp, &mp, mp_h)) 1554 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1450 mp_h++; 1555 mp_h++;
1451 else 1556 else
1452 state = DEALLOC_MP_LOWER; 1557 state = DEALLOC_MP_LOWER;
@@ -1524,7 +1629,6 @@ out:
1524/** 1629/**
1525 * do_shrink - make a file smaller 1630 * do_shrink - make a file smaller
1526 * @inode: the inode 1631 * @inode: the inode
1527 * @oldsize: the current inode size
1528 * @newsize: the size to make the file 1632 * @newsize: the size to make the file
1529 * 1633 *
1530 * Called with an exclusive lock on @inode. The @size must 1634 * Called with an exclusive lock on @inode. The @size must
@@ -1533,18 +1637,18 @@ out:
1533 * Returns: errno 1637 * Returns: errno
1534 */ 1638 */
1535 1639
1536static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) 1640static int do_shrink(struct inode *inode, u64 newsize)
1537{ 1641{
1538 struct gfs2_inode *ip = GFS2_I(inode); 1642 struct gfs2_inode *ip = GFS2_I(inode);
1539 int error; 1643 int error;
1540 1644
1541 error = trunc_start(inode, oldsize, newsize); 1645 error = trunc_start(inode, newsize);
1542 if (error < 0) 1646 if (error < 0)
1543 return error; 1647 return error;
1544 if (gfs2_is_stuffed(ip)) 1648 if (gfs2_is_stuffed(ip))
1545 return 0; 1649 return 0;
1546 1650
1547 error = trunc_dealloc(ip, newsize); 1651 error = punch_hole(ip, newsize, 0);
1548 if (error == 0) 1652 if (error == 0)
1549 error = trunc_end(ip); 1653 error = trunc_end(ip);
1550 1654
@@ -1553,10 +1657,9 @@ static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize)
1553 1657
1554void gfs2_trim_blocks(struct inode *inode) 1658void gfs2_trim_blocks(struct inode *inode)
1555{ 1659{
1556 u64 size = inode->i_size;
1557 int ret; 1660 int ret;
1558 1661
1559 ret = do_shrink(inode, size, size); 1662 ret = do_shrink(inode, inode->i_size);
1560 WARN_ON(ret != 0); 1663 WARN_ON(ret != 0);
1561} 1664}
1562 1665
@@ -1589,8 +1692,7 @@ static int do_grow(struct inode *inode, u64 size)
1589 int error; 1692 int error;
1590 int unstuff = 0; 1693 int unstuff = 0;
1591 1694
1592 if (gfs2_is_stuffed(ip) && 1695 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
1593 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1594 error = gfs2_quota_lock_check(ip, &ap); 1696 error = gfs2_quota_lock_check(ip, &ap);
1595 if (error) 1697 if (error)
1596 return error; 1698 return error;
@@ -1650,7 +1752,6 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1650{ 1752{
1651 struct gfs2_inode *ip = GFS2_I(inode); 1753 struct gfs2_inode *ip = GFS2_I(inode);
1652 int ret; 1754 int ret;
1653 u64 oldsize;
1654 1755
1655 BUG_ON(!S_ISREG(inode->i_mode)); 1756 BUG_ON(!S_ISREG(inode->i_mode));
1656 1757
@@ -1664,13 +1765,12 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1664 if (ret) 1765 if (ret)
1665 goto out; 1766 goto out;
1666 1767
1667 oldsize = inode->i_size; 1768 if (newsize >= inode->i_size) {
1668 if (newsize >= oldsize) {
1669 ret = do_grow(inode, newsize); 1769 ret = do_grow(inode, newsize);
1670 goto out; 1770 goto out;
1671 } 1771 }
1672 1772
1673 ret = do_shrink(inode, oldsize, newsize); 1773 ret = do_shrink(inode, newsize);
1674out: 1774out:
1675 gfs2_rsqa_delete(ip, NULL); 1775 gfs2_rsqa_delete(ip, NULL);
1676 return ret; 1776 return ret;
@@ -1679,7 +1779,7 @@ out:
1679int gfs2_truncatei_resume(struct gfs2_inode *ip) 1779int gfs2_truncatei_resume(struct gfs2_inode *ip)
1680{ 1780{
1681 int error; 1781 int error;
1682 error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); 1782 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
1683 if (!error) 1783 if (!error)
1684 error = trunc_end(ip); 1784 error = trunc_end(ip);
1685 return error; 1785 return error;
@@ -1687,7 +1787,7 @@ int gfs2_truncatei_resume(struct gfs2_inode *ip)
1687 1787
1688int gfs2_file_dealloc(struct gfs2_inode *ip) 1788int gfs2_file_dealloc(struct gfs2_inode *ip)
1689{ 1789{
1690 return trunc_dealloc(ip, 0); 1790 return punch_hole(ip, 0, 0);
1691} 1791}
1692 1792
1693/** 1793/**
@@ -1827,8 +1927,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1827 return 0; 1927 return 0;
1828 1928
1829 if (gfs2_is_stuffed(ip)) { 1929 if (gfs2_is_stuffed(ip)) {
1830 if (offset + len > 1930 if (offset + len > gfs2_max_stuffed_size(ip))
1831 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1832 return 1; 1931 return 1;
1833 return 0; 1932 return 0;
1834 } 1933 }
@@ -1855,3 +1954,123 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1855 return 0; 1954 return 0;
1856} 1955}
1857 1956
1957static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
1958{
1959 struct gfs2_inode *ip = GFS2_I(inode);
1960 struct buffer_head *dibh;
1961 int error;
1962
1963 if (offset >= inode->i_size)
1964 return 0;
1965 if (offset + length > inode->i_size)
1966 length = inode->i_size - offset;
1967
1968 error = gfs2_meta_inode_buffer(ip, &dibh);
1969 if (error)
1970 return error;
1971 gfs2_trans_add_meta(ip->i_gl, dibh);
1972 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
1973 length);
1974 brelse(dibh);
1975 return 0;
1976}
1977
1978static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
1979 loff_t length)
1980{
1981 struct gfs2_sbd *sdp = GFS2_SB(inode);
1982 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1983 int error;
1984
1985 while (length) {
1986 struct gfs2_trans *tr;
1987 loff_t chunk;
1988 unsigned int offs;
1989
1990 chunk = length;
1991 if (chunk > max_chunk)
1992 chunk = max_chunk;
1993
1994 offs = offset & ~PAGE_MASK;
1995 if (offs && chunk > PAGE_SIZE)
1996 chunk = offs + ((chunk - offs) & PAGE_MASK);
1997
1998 truncate_pagecache_range(inode, offset, chunk);
1999 offset += chunk;
2000 length -= chunk;
2001
2002 tr = current->journal_info;
2003 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2004 continue;
2005
2006 gfs2_trans_end(sdp);
2007 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2008 if (error)
2009 return error;
2010 }
2011 return 0;
2012}
2013
2014int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2015{
2016 struct inode *inode = file_inode(file);
2017 struct gfs2_inode *ip = GFS2_I(inode);
2018 struct gfs2_sbd *sdp = GFS2_SB(inode);
2019 int error;
2020
2021 if (gfs2_is_jdata(ip))
2022 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2023 GFS2_JTRUNC_REVOKES);
2024 else
2025 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2026 if (error)
2027 return error;
2028
2029 if (gfs2_is_stuffed(ip)) {
2030 error = stuffed_zero_range(inode, offset, length);
2031 if (error)
2032 goto out;
2033 } else {
2034 unsigned int start_off, end_off, blocksize;
2035
2036 blocksize = i_blocksize(inode);
2037 start_off = offset & (blocksize - 1);
2038 end_off = (offset + length) & (blocksize - 1);
2039 if (start_off) {
2040 unsigned int len = length;
2041 if (length > blocksize - start_off)
2042 len = blocksize - start_off;
2043 error = gfs2_block_zero_range(inode, offset, len);
2044 if (error)
2045 goto out;
2046 if (start_off + length < blocksize)
2047 end_off = 0;
2048 }
2049 if (end_off) {
2050 error = gfs2_block_zero_range(inode,
2051 offset + length - end_off, end_off);
2052 if (error)
2053 goto out;
2054 }
2055 }
2056
2057 if (gfs2_is_jdata(ip)) {
2058 BUG_ON(!current->journal_info);
2059 gfs2_journaled_truncate_range(inode, offset, length);
2060 } else
2061 truncate_pagecache_range(inode, offset, offset + length - 1);
2062
2063 file_update_time(file);
2064 mark_inode_dirty(inode);
2065
2066 if (current->journal_info)
2067 gfs2_trans_end(sdp);
2068
2069 if (!gfs2_is_stuffed(ip))
2070 error = punch_hole(ip, offset, length);
2071
2072out:
2073 if (current->journal_info)
2074 gfs2_trans_end(sdp);
2075 return error;
2076}
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 443cc182cf18..c3402fe00653 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -61,5 +61,6 @@ extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
61 unsigned int len); 61 unsigned int len);
62extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); 62extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd);
63extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd); 63extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd);
64extern int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length);
64 65
65#endif /* __BMAP_DOT_H__ */ 66#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 06a0d1947c77..7c21aea0266b 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -170,8 +170,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
170 if (!size) 170 if (!size)
171 return 0; 171 return 0;
172 172
173 if (gfs2_is_stuffed(ip) && 173 if (gfs2_is_stuffed(ip) && offset + size <= gfs2_max_stuffed_size(ip))
174 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
175 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, 174 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
176 size); 175 size);
177 176
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 58705ef8643a..4f88e201b3f0 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -246,7 +246,9 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
246 } 246 }
247 if ((flags ^ new_flags) & GFS2_DIF_JDATA) { 247 if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
248 if (new_flags & GFS2_DIF_JDATA) 248 if (new_flags & GFS2_DIF_JDATA)
249 gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); 249 gfs2_log_flush(sdp, ip->i_gl,
250 GFS2_LOG_HEAD_FLUSH_NORMAL |
251 GFS2_LFC_SET_FLAGS);
250 error = filemap_fdatawrite(inode->i_mapping); 252 error = filemap_fdatawrite(inode->i_mapping);
251 if (error) 253 if (error)
252 goto out; 254 goto out;
@@ -924,7 +926,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
924 struct gfs2_holder gh; 926 struct gfs2_holder gh;
925 int ret; 927 int ret;
926 928
927 if (mode & ~FALLOC_FL_KEEP_SIZE) 929 if (mode & ~(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))
928 return -EOPNOTSUPP; 930 return -EOPNOTSUPP;
929 /* fallocate is needed by gfs2_grow to reserve space in the rindex */ 931 /* fallocate is needed by gfs2_grow to reserve space in the rindex */
930 if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex) 932 if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex)
@@ -948,13 +950,18 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
948 if (ret) 950 if (ret)
949 goto out_unlock; 951 goto out_unlock;
950 952
951 ret = gfs2_rsqa_alloc(ip); 953 if (mode & FALLOC_FL_PUNCH_HOLE) {
952 if (ret) 954 ret = __gfs2_punch_hole(file, offset, len);
953 goto out_putw; 955 } else {
956 ret = gfs2_rsqa_alloc(ip);
957 if (ret)
958 goto out_putw;
954 959
955 ret = __gfs2_fallocate(file, mode, offset, len); 960 ret = __gfs2_fallocate(file, mode, offset, len);
956 if (ret) 961
957 gfs2_rs_deltree(&ip->i_res); 962 if (ret)
963 gfs2_rs_deltree(&ip->i_res);
964 }
958 965
959out_putw: 966out_putw:
960 put_write_access(inode); 967 put_write_access(inode);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 11066d8647d2..82fb5583445c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1549,16 +1549,13 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1549 rhashtable_walk_enter(&gl_hash_table, &iter); 1549 rhashtable_walk_enter(&gl_hash_table, &iter);
1550 1550
1551 do { 1551 do {
1552 gl = ERR_PTR(rhashtable_walk_start(&iter)); 1552 rhashtable_walk_start(&iter);
1553 if (IS_ERR(gl))
1554 goto walk_stop;
1555 1553
1556 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) 1554 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
1557 if (gl->gl_name.ln_sbd == sdp && 1555 if (gl->gl_name.ln_sbd == sdp &&
1558 lockref_get_not_dead(&gl->gl_lockref)) 1556 lockref_get_not_dead(&gl->gl_lockref))
1559 examiner(gl); 1557 examiner(gl);
1560 1558
1561walk_stop:
1562 rhashtable_walk_stop(&iter); 1559 rhashtable_walk_stop(&iter);
1563 } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); 1560 } while (cond_resched(), gl == ERR_PTR(-EAGAIN));
1564 1561
@@ -1924,19 +1921,29 @@ void gfs2_glock_exit(void)
1924 destroy_workqueue(gfs2_delete_workqueue); 1921 destroy_workqueue(gfs2_delete_workqueue);
1925} 1922}
1926 1923
1927static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) 1924static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
1928{ 1925{
1929 while ((gi->gl = rhashtable_walk_next(&gi->hti))) { 1926 if (n == 0)
1930 if (IS_ERR(gi->gl)) { 1927 gi->gl = rhashtable_walk_peek(&gi->hti);
1931 if (PTR_ERR(gi->gl) == -EAGAIN) 1928 else {
1932 continue; 1929 gi->gl = rhashtable_walk_next(&gi->hti);
1933 gi->gl = NULL; 1930 n--;
1934 return; 1931 }
1932 for (;;) {
1933 if (IS_ERR_OR_NULL(gi->gl)) {
1934 if (!gi->gl)
1935 return;
1936 if (PTR_ERR(gi->gl) != -EAGAIN) {
1937 gi->gl = NULL;
1938 return;
1939 }
1940 n = 0;
1941 } else if (gi->sdp == gi->gl->gl_name.ln_sbd &&
1942 !__lockref_is_dead(&gi->gl->gl_lockref)) {
1943 if (!n--)
1944 break;
1935 } 1945 }
1936 /* Skip entries for other sb and dead entries */ 1946 gi->gl = rhashtable_walk_next(&gi->hti);
1937 if (gi->sdp == gi->gl->gl_name.ln_sbd &&
1938 !__lockref_is_dead(&gi->gl->gl_lockref))
1939 return;
1940 } 1947 }
1941} 1948}
1942 1949
@@ -1944,18 +1951,24 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1944 __acquires(RCU) 1951 __acquires(RCU)
1945{ 1952{
1946 struct gfs2_glock_iter *gi = seq->private; 1953 struct gfs2_glock_iter *gi = seq->private;
1947 loff_t n = *pos; 1954 loff_t n;
1948 1955
1949 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 1956 /*
1950 if (rhashtable_walk_start(&gi->hti) != 0) 1957 * We can either stay where we are, skip to the next hash table
1951 return NULL; 1958 * entry, or start from the beginning.
1959 */
1960 if (*pos < gi->last_pos) {
1961 rhashtable_walk_exit(&gi->hti);
1962 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
1963 n = *pos + 1;
1964 } else {
1965 n = *pos - gi->last_pos;
1966 }
1952 1967
1953 do { 1968 rhashtable_walk_start(&gi->hti);
1954 gfs2_glock_iter_next(gi);
1955 } while (gi->gl && n--);
1956 1969
1970 gfs2_glock_iter_next(gi, n);
1957 gi->last_pos = *pos; 1971 gi->last_pos = *pos;
1958
1959 return gi->gl; 1972 return gi->gl;
1960} 1973}
1961 1974
@@ -1966,8 +1979,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1966 1979
1967 (*pos)++; 1980 (*pos)++;
1968 gi->last_pos = *pos; 1981 gi->last_pos = *pos;
1969 gfs2_glock_iter_next(gi); 1982 gfs2_glock_iter_next(gi, 1);
1970
1971 return gi->gl; 1983 return gi->gl;
1972} 1984}
1973 1985
@@ -1978,7 +1990,6 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1978 1990
1979 gi->gl = NULL; 1991 gi->gl = NULL;
1980 rhashtable_walk_stop(&gi->hti); 1992 rhashtable_walk_stop(&gi->hti);
1981 rhashtable_walk_exit(&gi->hti);
1982} 1993}
1983 1994
1984static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1995static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2044,7 +2055,13 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
2044 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2055 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2045 if (seq->buf) 2056 if (seq->buf)
2046 seq->size = GFS2_SEQ_GOODSIZE; 2057 seq->size = GFS2_SEQ_GOODSIZE;
2058 /*
2059 * Initially, we are "before" the first hash table entry; the
2060 * first call to rhashtable_walk_next gets us the first entry.
2061 */
2062 gi->last_pos = -1;
2047 gi->gl = NULL; 2063 gi->gl = NULL;
2064 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2048 } 2065 }
2049 return ret; 2066 return ret;
2050} 2067}
@@ -2060,6 +2077,7 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
2060 struct gfs2_glock_iter *gi = seq->private; 2077 struct gfs2_glock_iter *gi = seq->private;
2061 2078
2062 gi->gl = NULL; 2079 gi->gl = NULL;
2080 rhashtable_walk_exit(&gi->hti);
2063 return seq_release_private(inode, file); 2081 return seq_release_private(inode, file);
2064} 2082}
2065 2083
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index cdd1c5f06f45..d8782a7a1e7d 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -107,7 +107,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
107 __gfs2_ail_flush(gl, 0, tr.tr_revokes); 107 __gfs2_ail_flush(gl, 0, tr.tr_revokes);
108 108
109 gfs2_trans_end(sdp); 109 gfs2_trans_end(sdp);
110 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 110 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
111 GFS2_LFC_AIL_EMPTY_GL);
111} 112}
112 113
113void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 114void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
@@ -128,7 +129,8 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
128 return; 129 return;
129 __gfs2_ail_flush(gl, fsync, max_revokes); 130 __gfs2_ail_flush(gl, fsync, max_revokes);
130 gfs2_trans_end(sdp); 131 gfs2_trans_end(sdp);
131 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 132 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
133 GFS2_LFC_AIL_FLUSH);
132} 134}
133 135
134/** 136/**
@@ -157,7 +159,8 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
157 return; 159 return;
158 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 160 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
159 161
160 gfs2_log_flush(sdp, gl, NORMAL_FLUSH); 162 gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
163 GFS2_LFC_RGRP_GO_SYNC);
161 filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 164 filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
162 error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 165 error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
163 mapping_set_error(mapping, error); 166 mapping_set_error(mapping, error);
@@ -252,7 +255,8 @@ static void inode_go_sync(struct gfs2_glock *gl)
252 255
253 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 256 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
254 257
255 gfs2_log_flush(gl->gl_name.ln_sbd, gl, NORMAL_FLUSH); 258 gfs2_log_flush(gl->gl_name.ln_sbd, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
259 GFS2_LFC_INODE_GO_SYNC);
256 filemap_fdatawrite(metamapping); 260 filemap_fdatawrite(metamapping);
257 if (isreg) { 261 if (isreg) {
258 struct address_space *mapping = ip->i_inode.i_mapping; 262 struct address_space *mapping = ip->i_inode.i_mapping;
@@ -303,7 +307,9 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
303 } 307 }
304 308
305 if (ip == GFS2_I(gl->gl_name.ln_sbd->sd_rindex)) { 309 if (ip == GFS2_I(gl->gl_name.ln_sbd->sd_rindex)) {
306 gfs2_log_flush(gl->gl_name.ln_sbd, NULL, NORMAL_FLUSH); 310 gfs2_log_flush(gl->gl_name.ln_sbd, NULL,
311 GFS2_LOG_HEAD_FLUSH_NORMAL |
312 GFS2_LFC_INODE_GO_INVAL);
307 gl->gl_name.ln_sbd->sd_rindex_uptodate = 0; 313 gl->gl_name.ln_sbd->sd_rindex_uptodate = 0;
308 } 314 }
309 if (ip && S_ISREG(ip->i_inode.i_mode)) 315 if (ip && S_ISREG(ip->i_inode.i_mode))
@@ -495,7 +501,8 @@ static void freeze_go_sync(struct gfs2_glock *gl)
495 gfs2_assert_withdraw(sdp, 0); 501 gfs2_assert_withdraw(sdp, 0);
496 } 502 }
497 queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); 503 queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
498 gfs2_log_flush(sdp, NULL, FREEZE_FLUSH); 504 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
505 GFS2_LFC_FREEZE_GO_SYNC);
499 } 506 }
500} 507}
501 508
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6e18e9793ec4..e0557b8a590a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -44,7 +44,6 @@ struct gfs2_log_header_host {
44 u32 lh_flags; /* GFS2_LOG_HEAD_... */ 44 u32 lh_flags; /* GFS2_LOG_HEAD_... */
45 u32 lh_tail; /* Block number of log tail */ 45 u32 lh_tail; /* Block number of log tail */
46 u32 lh_blkno; 46 u32 lh_blkno;
47 u32 lh_hash;
48}; 47};
49 48
50/* 49/*
@@ -861,5 +860,10 @@ static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
861 860
862extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl); 861extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl);
863 862
863static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip)
864{
865 return GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
866}
867
864#endif /* __INCORE_DOT_H__ */ 868#endif /* __INCORE_DOT_H__ */
865 869
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4e971b1c7f92..59e0560180ec 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1152,12 +1152,11 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1152 1152
1153 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); 1153 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1154 if (error) 1154 if (error)
1155 goto out_end_trans; 1155 goto out_gunlock;
1156 1156
1157 error = gfs2_unlink_inode(dip, dentry); 1157 error = gfs2_unlink_inode(dip, dentry);
1158
1159out_end_trans:
1160 gfs2_trans_end(sdp); 1158 gfs2_trans_end(sdp);
1159
1161out_gunlock: 1160out_gunlock:
1162 gfs2_glock_dq(ghs + 2); 1161 gfs2_glock_dq(ghs + 2);
1163out_rgrp: 1162out_rgrp:
@@ -1184,11 +1183,10 @@ out_inodes:
1184static int gfs2_symlink(struct inode *dir, struct dentry *dentry, 1183static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1185 const char *symname) 1184 const char *symname)
1186{ 1185{
1187 struct gfs2_sbd *sdp = GFS2_SB(dir);
1188 unsigned int size; 1186 unsigned int size;
1189 1187
1190 size = strlen(symname); 1188 size = strlen(symname);
1191 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) 1189 if (size >= gfs2_max_stuffed_size(GFS2_I(dir)))
1192 return -ENAMETOOLONG; 1190 return -ENAMETOOLONG;
1193 1191
1194 return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL); 1192 return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL);
@@ -1205,8 +1203,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1205 1203
1206static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 1204static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1207{ 1205{
1208 struct gfs2_sbd *sdp = GFS2_SB(dir); 1206 unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
1209 unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
1210 return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL); 1207 return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL);
1211} 1208}
1212 1209
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 65f33a0ac190..006c6164f759 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1091,7 +1091,7 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
1091 1091
1092 spin_lock(&ls->ls_recover_spin); 1092 spin_lock(&ls->ls_recover_spin);
1093 if (ls->ls_recover_size < jid + 1) { 1093 if (ls->ls_recover_size < jid + 1) {
1094 fs_err(sdp, "recover_slot jid %d gen %u short size %d", 1094 fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
1095 jid, ls->ls_recover_block, ls->ls_recover_size); 1095 jid, ls->ls_recover_block, ls->ls_recover_size);
1096 spin_unlock(&ls->ls_recover_spin); 1096 spin_unlock(&ls->ls_recover_spin);
1097 return; 1097 return;
@@ -1153,7 +1153,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
1153 return; 1153 return;
1154 } 1154 }
1155 if (ls->ls_recover_size < jid + 1) { 1155 if (ls->ls_recover_size < jid + 1) {
1156 fs_err(sdp, "recovery_result jid %d short size %d", 1156 fs_err(sdp, "recovery_result jid %d short size %d\n",
1157 jid, ls->ls_recover_size); 1157 jid, ls->ls_recover_size);
1158 spin_unlock(&ls->ls_recover_spin); 1158 spin_unlock(&ls->ls_recover_spin);
1159 return; 1159 return;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f72c44231406..cf6b46247df4 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -14,6 +14,7 @@
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 16#include <linux/crc32.h>
17#include <linux/crc32c.h>
17#include <linux/delay.h> 18#include <linux/delay.h>
18#include <linux/kthread.h> 19#include <linux/kthread.h>
19#include <linux/freezer.h> 20#include <linux/freezer.h>
@@ -538,9 +539,12 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
538 list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp); 539 list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
539 while (!list_empty(&sdp->sd_log_le_ordered)) { 540 while (!list_empty(&sdp->sd_log_le_ordered)) {
540 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered); 541 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
541 list_move(&ip->i_ordered, &written); 542 if (ip->i_inode.i_mapping->nrpages == 0) {
542 if (ip->i_inode.i_mapping->nrpages == 0) 543 test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
544 list_del(&ip->i_ordered);
543 continue; 545 continue;
546 }
547 list_move(&ip->i_ordered, &written);
544 spin_unlock(&sdp->sd_ordered_lock); 548 spin_unlock(&sdp->sd_ordered_lock);
545 filemap_fdatawrite(ip->i_inode.i_mapping); 549 filemap_fdatawrite(ip->i_inode.i_mapping);
546 spin_lock(&sdp->sd_ordered_lock); 550 spin_lock(&sdp->sd_ordered_lock);
@@ -648,49 +652,102 @@ out_of_blocks:
648} 652}
649 653
650/** 654/**
651 * log_write_header - Get and initialize a journal header buffer 655 * write_log_header - Write a journal log header buffer at sd_log_flush_head
652 * @sdp: The GFS2 superblock 656 * @sdp: The GFS2 superblock
657 * @jd: journal descriptor of the journal to which we are writing
658 * @seq: sequence number
659 * @tail: tail of the log
660 * @flags: log header flags GFS2_LOG_HEAD_*
661 * @op_flags: flags to pass to the bio
653 * 662 *
654 * Returns: the initialized log buffer descriptor 663 * Returns: the initialized log buffer descriptor
655 */ 664 */
656 665
657static void log_write_header(struct gfs2_sbd *sdp, u32 flags) 666void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
667 u64 seq, u32 tail, u32 flags, int op_flags)
658{ 668{
659 struct gfs2_log_header *lh; 669 struct gfs2_log_header *lh;
660 unsigned int tail; 670 u32 hash, crc;
661 u32 hash;
662 int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
663 struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); 671 struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
664 enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); 672 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
673 struct timespec64 tv;
674 struct super_block *sb = sdp->sd_vfs;
675 u64 addr;
676
665 lh = page_address(page); 677 lh = page_address(page);
666 clear_page(lh); 678 clear_page(lh);
667 679
668 gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
669
670 tail = current_tail(sdp);
671
672 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 680 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
673 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 681 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
674 lh->lh_header.__pad0 = cpu_to_be64(0); 682 lh->lh_header.__pad0 = cpu_to_be64(0);
675 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); 683 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
676 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); 684 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
677 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); 685 lh->lh_sequence = cpu_to_be64(seq);
678 lh->lh_flags = cpu_to_be32(flags); 686 lh->lh_flags = cpu_to_be32(flags);
679 lh->lh_tail = cpu_to_be32(tail); 687 lh->lh_tail = cpu_to_be32(tail);
680 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head); 688 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
681 hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header)); 689 hash = ~crc32(~0, lh, LH_V1_SIZE);
682 lh->lh_hash = cpu_to_be32(hash); 690 lh->lh_hash = cpu_to_be32(hash);
683 691
692 tv = current_kernel_time64();
693 lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
694 lh->lh_sec = cpu_to_be64(tv.tv_sec);
695 addr = gfs2_log_bmap(sdp);
696 lh->lh_addr = cpu_to_be64(addr);
697 lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr);
698
699 /* We may only write local statfs, quota, etc., when writing to our
700 own journal. The values are left 0 when recovering a journal
701 different from our own. */
702 if (!(flags & GFS2_LOG_HEAD_RECOVERY)) {
703 lh->lh_statfs_addr =
704 cpu_to_be64(GFS2_I(sdp->sd_sc_inode)->i_no_addr);
705 lh->lh_quota_addr =
706 cpu_to_be64(GFS2_I(sdp->sd_qc_inode)->i_no_addr);
707
708 spin_lock(&sdp->sd_statfs_spin);
709 lh->lh_local_total = cpu_to_be64(l_sc->sc_total);
710 lh->lh_local_free = cpu_to_be64(l_sc->sc_free);
711 lh->lh_local_dinodes = cpu_to_be64(l_sc->sc_dinodes);
712 spin_unlock(&sdp->sd_statfs_spin);
713 }
714
715 BUILD_BUG_ON(offsetof(struct gfs2_log_header, lh_crc) != LH_V1_SIZE);
716
717 crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
718 sb->s_blocksize - LH_V1_SIZE - 4);
719 lh->lh_crc = cpu_to_be32(crc);
720
721 gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
722 gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
723 log_flush_wait(sdp);
724}
725
726/**
727 * log_write_header - Get and initialize a journal header buffer
728 * @sdp: The GFS2 superblock
729 * @flags: The log header flags, including log header origin
730 *
731 * Returns: the initialized log buffer descriptor
732 */
733
734static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
735{
736 unsigned int tail;
737 int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
738 enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
739
740 gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
741 tail = current_tail(sdp);
742
684 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { 743 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
685 gfs2_ordered_wait(sdp); 744 gfs2_ordered_wait(sdp);
686 log_flush_wait(sdp); 745 log_flush_wait(sdp);
687 op_flags = REQ_SYNC | REQ_META | REQ_PRIO; 746 op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
688 } 747 }
689
690 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); 748 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
691 gfs2_log_write_page(sdp, page); 749 gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
692 gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags); 750 flags, op_flags);
693 log_flush_wait(sdp);
694 751
695 if (sdp->sd_log_tail != tail) 752 if (sdp->sd_log_tail != tail)
696 log_pull_tail(sdp, tail); 753 log_pull_tail(sdp, tail);
@@ -700,11 +757,11 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
700 * gfs2_log_flush - flush incore transaction(s) 757 * gfs2_log_flush - flush incore transaction(s)
701 * @sdp: the filesystem 758 * @sdp: the filesystem
702 * @gl: The glock structure to flush. If NULL, flush the whole incore log 759 * @gl: The glock structure to flush. If NULL, flush the whole incore log
760 * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags
703 * 761 *
704 */ 762 */
705 763
706void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, 764void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
707 enum gfs2_flush_type type)
708{ 765{
709 struct gfs2_trans *tr; 766 struct gfs2_trans *tr;
710 enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); 767 enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
@@ -716,9 +773,9 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
716 up_write(&sdp->sd_log_flush_lock); 773 up_write(&sdp->sd_log_flush_lock);
717 return; 774 return;
718 } 775 }
719 trace_gfs2_log_flush(sdp, 1); 776 trace_gfs2_log_flush(sdp, 1, flags);
720 777
721 if (type == SHUTDOWN_FLUSH) 778 if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
722 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 779 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
723 780
724 sdp->sd_log_flush_head = sdp->sd_log_head; 781 sdp->sd_log_flush_head = sdp->sd_log_head;
@@ -743,11 +800,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
743 800
744 if (sdp->sd_log_head != sdp->sd_log_flush_head) { 801 if (sdp->sd_log_head != sdp->sd_log_flush_head) {
745 log_flush_wait(sdp); 802 log_flush_wait(sdp);
746 log_write_header(sdp, 0); 803 log_write_header(sdp, flags);
747 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 804 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
748 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 805 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
749 trace_gfs2_log_blocks(sdp, -1); 806 trace_gfs2_log_blocks(sdp, -1);
750 log_write_header(sdp, 0); 807 log_write_header(sdp, flags);
751 } 808 }
752 lops_after_commit(sdp, tr); 809 lops_after_commit(sdp, tr);
753 810
@@ -764,7 +821,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
764 spin_unlock(&sdp->sd_ail_lock); 821 spin_unlock(&sdp->sd_ail_lock);
765 gfs2_log_unlock(sdp); 822 gfs2_log_unlock(sdp);
766 823
767 if (type != NORMAL_FLUSH) { 824 if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
768 if (!sdp->sd_log_idle) { 825 if (!sdp->sd_log_idle) {
769 for (;;) { 826 for (;;) {
770 gfs2_ail1_start(sdp); 827 gfs2_ail1_start(sdp);
@@ -774,16 +831,17 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
774 } 831 }
775 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 832 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
776 trace_gfs2_log_blocks(sdp, -1); 833 trace_gfs2_log_blocks(sdp, -1);
777 log_write_header(sdp, 0); 834 log_write_header(sdp, flags);
778 sdp->sd_log_head = sdp->sd_log_flush_head; 835 sdp->sd_log_head = sdp->sd_log_flush_head;
779 } 836 }
780 if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH) 837 if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
838 GFS2_LOG_HEAD_FLUSH_FREEZE))
781 gfs2_log_shutdown(sdp); 839 gfs2_log_shutdown(sdp);
782 if (type == FREEZE_FLUSH) 840 if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
783 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); 841 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
784 } 842 }
785 843
786 trace_gfs2_log_flush(sdp, 0); 844 trace_gfs2_log_flush(sdp, 0, flags);
787 up_write(&sdp->sd_log_flush_lock); 845 up_write(&sdp->sd_log_flush_lock);
788 846
789 kfree(tr); 847 kfree(tr);
@@ -879,7 +937,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
879 937
880 sdp->sd_log_flush_head = sdp->sd_log_head; 938 sdp->sd_log_flush_head = sdp->sd_log_head;
881 939
882 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT); 940 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
883 941
884 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); 942 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
885 gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); 943 gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
@@ -935,7 +993,8 @@ int gfs2_logd(void *data)
935 did_flush = false; 993 did_flush = false;
936 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 994 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
937 gfs2_ail1_empty(sdp); 995 gfs2_ail1_empty(sdp);
938 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 996 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
997 GFS2_LFC_LOGD_JFLUSH_REQD);
939 did_flush = true; 998 did_flush = true;
940 } 999 }
941 1000
@@ -943,7 +1002,8 @@ int gfs2_logd(void *data)
943 gfs2_ail1_start(sdp); 1002 gfs2_ail1_start(sdp);
944 gfs2_ail1_wait(sdp); 1003 gfs2_ail1_wait(sdp);
945 gfs2_ail1_empty(sdp); 1004 gfs2_ail1_empty(sdp);
946 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 1005 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
1006 GFS2_LFC_LOGD_AIL_FLUSH_REQD);
947 did_flush = true; 1007 did_flush = true;
948 } 1008 }
949 1009
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 9499a6049212..93b52ac1ca1f 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -65,14 +65,10 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
65 65
66extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); 66extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
67extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 67extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
68enum gfs2_flush_type { 68extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
69 NORMAL_FLUSH = 0, 69 u64 seq, u32 tail, u32 flags, int op_flags);
70 SYNC_FLUSH,
71 SHUTDOWN_FLUSH,
72 FREEZE_FLUSH
73};
74extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, 70extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
75 enum gfs2_flush_type type); 71 u32 type);
76extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 72extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
77extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 73extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
78extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); 74extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c8ff7b7954f0..4d6567990baf 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -18,6 +18,7 @@
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/list_sort.h> 19#include <linux/list_sort.h>
20 20
21#include "dir.h"
21#include "gfs2.h" 22#include "gfs2.h"
22#include "incore.h" 23#include "incore.h"
23#include "inode.h" 24#include "inode.h"
@@ -138,7 +139,7 @@ static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
138 sdp->sd_log_flush_head = 0; 139 sdp->sd_log_flush_head = 0;
139} 140}
140 141
141static u64 gfs2_log_bmap(struct gfs2_sbd *sdp) 142u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
142{ 143{
143 unsigned int lbn = sdp->sd_log_flush_head; 144 unsigned int lbn = sdp->sd_log_flush_head;
144 struct gfs2_journal_extent *je; 145 struct gfs2_journal_extent *je;
@@ -161,7 +162,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
161 * @bvec: The bio_vec 162 * @bvec: The bio_vec
162 * @error: The i/o status 163 * @error: The i/o status
163 * 164 *
164 * This finds the relavent buffers and unlocks then and sets the 165 * This finds the relevant buffers and unlocks them and sets the
165 * error flag according to the status of the i/o request. This is 166 * error flag according to the status of the i/o request. This is
166 * used when the log is writing data which has an in-place version 167 * used when the log is writing data which has an in-place version
167 * that is pinned in the pagecache. 168 * that is pinned in the pagecache.
@@ -306,23 +307,22 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
306 return gfs2_log_alloc_bio(sdp, blkno); 307 return gfs2_log_alloc_bio(sdp, blkno);
307} 308}
308 309
309
310/** 310/**
311 * gfs2_log_write - write to log 311 * gfs2_log_write - write to log
312 * @sdp: the filesystem 312 * @sdp: the filesystem
313 * @page: the page to write 313 * @page: the page to write
314 * @size: the size of the data to write 314 * @size: the size of the data to write
315 * @offset: the offset within the page 315 * @offset: the offset within the page
316 * @blkno: block number of the log entry
316 * 317 *
317 * Try and add the page segment to the current bio. If that fails, 318 * Try and add the page segment to the current bio. If that fails,
318 * submit the current bio to the device and create a new one, and 319 * submit the current bio to the device and create a new one, and
319 * then add the page segment to that. 320 * then add the page segment to that.
320 */ 321 */
321 322
322static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page, 323void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
323 unsigned size, unsigned offset) 324 unsigned size, unsigned offset, u64 blkno)
324{ 325{
325 u64 blkno = gfs2_log_bmap(sdp);
326 struct bio *bio; 326 struct bio *bio;
327 int ret; 327 int ret;
328 328
@@ -348,7 +348,8 @@ static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
348 348
349static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh) 349static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
350{ 350{
351 gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh)); 351 gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh),
352 gfs2_log_bmap(sdp));
352} 353}
353 354
354/** 355/**
@@ -365,7 +366,8 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
365void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page) 366void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
366{ 367{
367 struct super_block *sb = sdp->sd_vfs; 368 struct super_block *sb = sdp->sd_vfs;
368 gfs2_log_write(sdp, page, sb->s_blocksize, 0); 369 gfs2_log_write(sdp, page, sb->s_blocksize, 0,
370 gfs2_log_bmap(sdp));
369} 371}
370 372
371static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, 373static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index e529f536c117..e4949394f054 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -26,6 +26,9 @@ extern const struct gfs2_log_operations gfs2_revoke_lops;
26extern const struct gfs2_log_operations gfs2_databuf_lops; 26extern const struct gfs2_log_operations gfs2_databuf_lops;
27 27
28extern const struct gfs2_log_operations *gfs2_log_ops[]; 28extern const struct gfs2_log_operations *gfs2_log_ops[];
29extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
30extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
31 unsigned size, unsigned offset, u64 blkno);
29extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); 32extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
30extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags); 33extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
31extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); 34extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 0a89e6f7a314..2d55e2c3333c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -93,7 +93,7 @@ static int __init init_gfs2_fs(void)
93 93
94 error = gfs2_glock_init(); 94 error = gfs2_glock_init();
95 if (error) 95 if (error)
96 goto fail; 96 goto fail_glock;
97 97
98 error = -ENOMEM; 98 error = -ENOMEM;
99 gfs2_glock_cachep = kmem_cache_create("gfs2_glock", 99 gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
@@ -101,7 +101,7 @@ static int __init init_gfs2_fs(void)
101 0, 0, 101 0, 0,
102 gfs2_init_glock_once); 102 gfs2_init_glock_once);
103 if (!gfs2_glock_cachep) 103 if (!gfs2_glock_cachep)
104 goto fail; 104 goto fail_cachep1;
105 105
106 gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock(aspace)", 106 gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock(aspace)",
107 sizeof(struct gfs2_glock) + 107 sizeof(struct gfs2_glock) +
@@ -109,7 +109,7 @@ static int __init init_gfs2_fs(void)
109 0, 0, gfs2_init_gl_aspace_once); 109 0, 0, gfs2_init_gl_aspace_once);
110 110
111 if (!gfs2_glock_aspace_cachep) 111 if (!gfs2_glock_aspace_cachep)
112 goto fail; 112 goto fail_cachep2;
113 113
114 gfs2_inode_cachep = kmem_cache_create("gfs2_inode", 114 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
115 sizeof(struct gfs2_inode), 115 sizeof(struct gfs2_inode),
@@ -118,107 +118,105 @@ static int __init init_gfs2_fs(void)
118 SLAB_ACCOUNT, 118 SLAB_ACCOUNT,
119 gfs2_init_inode_once); 119 gfs2_init_inode_once);
120 if (!gfs2_inode_cachep) 120 if (!gfs2_inode_cachep)
121 goto fail; 121 goto fail_cachep3;
122 122
123 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata", 123 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
124 sizeof(struct gfs2_bufdata), 124 sizeof(struct gfs2_bufdata),
125 0, 0, NULL); 125 0, 0, NULL);
126 if (!gfs2_bufdata_cachep) 126 if (!gfs2_bufdata_cachep)
127 goto fail; 127 goto fail_cachep4;
128 128
129 gfs2_rgrpd_cachep = kmem_cache_create("gfs2_rgrpd", 129 gfs2_rgrpd_cachep = kmem_cache_create("gfs2_rgrpd",
130 sizeof(struct gfs2_rgrpd), 130 sizeof(struct gfs2_rgrpd),
131 0, 0, NULL); 131 0, 0, NULL);
132 if (!gfs2_rgrpd_cachep) 132 if (!gfs2_rgrpd_cachep)
133 goto fail; 133 goto fail_cachep5;
134 134
135 gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad", 135 gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad",
136 sizeof(struct gfs2_quota_data), 136 sizeof(struct gfs2_quota_data),
137 0, 0, NULL); 137 0, 0, NULL);
138 if (!gfs2_quotad_cachep) 138 if (!gfs2_quotad_cachep)
139 goto fail; 139 goto fail_cachep6;
140 140
141 gfs2_qadata_cachep = kmem_cache_create("gfs2_qadata", 141 gfs2_qadata_cachep = kmem_cache_create("gfs2_qadata",
142 sizeof(struct gfs2_qadata), 142 sizeof(struct gfs2_qadata),
143 0, 0, NULL); 143 0, 0, NULL);
144 if (!gfs2_qadata_cachep) 144 if (!gfs2_qadata_cachep)
145 goto fail; 145 goto fail_cachep7;
146 146
147 error = register_shrinker(&gfs2_qd_shrinker); 147 error = register_shrinker(&gfs2_qd_shrinker);
148 if (error) 148 if (error)
149 goto fail; 149 goto fail_shrinker;
150 150
151 error = register_filesystem(&gfs2_fs_type); 151 error = register_filesystem(&gfs2_fs_type);
152 if (error) 152 if (error)
153 goto fail; 153 goto fail_fs1;
154 154
155 error = register_filesystem(&gfs2meta_fs_type); 155 error = register_filesystem(&gfs2meta_fs_type);
156 if (error) 156 if (error)
157 goto fail_unregister; 157 goto fail_fs2;
158 158
159 error = -ENOMEM; 159 error = -ENOMEM;
160 gfs_recovery_wq = alloc_workqueue("gfs_recovery", 160 gfs_recovery_wq = alloc_workqueue("gfs_recovery",
161 WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); 161 WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
162 if (!gfs_recovery_wq) 162 if (!gfs_recovery_wq)
163 goto fail_wq; 163 goto fail_wq1;
164 164
165 gfs2_control_wq = alloc_workqueue("gfs2_control", 165 gfs2_control_wq = alloc_workqueue("gfs2_control",
166 WQ_UNBOUND | WQ_FREEZABLE, 0); 166 WQ_UNBOUND | WQ_FREEZABLE, 0);
167 if (!gfs2_control_wq) 167 if (!gfs2_control_wq)
168 goto fail_recovery; 168 goto fail_wq2;
169 169
170 gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0); 170 gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
171 171
172 if (!gfs2_freeze_wq) 172 if (!gfs2_freeze_wq)
173 goto fail_control; 173 goto fail_wq3;
174 174
175 gfs2_page_pool = mempool_create_page_pool(64, 0); 175 gfs2_page_pool = mempool_create_page_pool(64, 0);
176 if (!gfs2_page_pool) 176 if (!gfs2_page_pool)
177 goto fail_freeze; 177 goto fail_mempool;
178 178
179 gfs2_register_debugfs(); 179 error = gfs2_register_debugfs();
180 if (error)
181 goto fail_debugfs;
180 182
181 pr_info("GFS2 installed\n"); 183 pr_info("GFS2 installed\n");
182 184
183 return 0; 185 return 0;
184 186
185fail_freeze: 187fail_debugfs:
188 mempool_destroy(gfs2_page_pool);
189fail_mempool:
186 destroy_workqueue(gfs2_freeze_wq); 190 destroy_workqueue(gfs2_freeze_wq);
187fail_control: 191fail_wq3:
188 destroy_workqueue(gfs2_control_wq); 192 destroy_workqueue(gfs2_control_wq);
189fail_recovery: 193fail_wq2:
190 destroy_workqueue(gfs_recovery_wq); 194 destroy_workqueue(gfs_recovery_wq);
191fail_wq: 195fail_wq1:
192 unregister_filesystem(&gfs2meta_fs_type); 196 unregister_filesystem(&gfs2meta_fs_type);
193fail_unregister: 197fail_fs2:
194 unregister_filesystem(&gfs2_fs_type); 198 unregister_filesystem(&gfs2_fs_type);
195fail: 199fail_fs1:
196 list_lru_destroy(&gfs2_qd_lru);
197fail_lru:
198 unregister_shrinker(&gfs2_qd_shrinker); 200 unregister_shrinker(&gfs2_qd_shrinker);
201fail_shrinker:
202 kmem_cache_destroy(gfs2_qadata_cachep);
203fail_cachep7:
204 kmem_cache_destroy(gfs2_quotad_cachep);
205fail_cachep6:
206 kmem_cache_destroy(gfs2_rgrpd_cachep);
207fail_cachep5:
208 kmem_cache_destroy(gfs2_bufdata_cachep);
209fail_cachep4:
210 kmem_cache_destroy(gfs2_inode_cachep);
211fail_cachep3:
212 kmem_cache_destroy(gfs2_glock_aspace_cachep);
213fail_cachep2:
214 kmem_cache_destroy(gfs2_glock_cachep);
215fail_cachep1:
199 gfs2_glock_exit(); 216 gfs2_glock_exit();
200 217fail_glock:
201 if (gfs2_qadata_cachep) 218 list_lru_destroy(&gfs2_qd_lru);
202 kmem_cache_destroy(gfs2_qadata_cachep); 219fail_lru:
203
204 if (gfs2_quotad_cachep)
205 kmem_cache_destroy(gfs2_quotad_cachep);
206
207 if (gfs2_rgrpd_cachep)
208 kmem_cache_destroy(gfs2_rgrpd_cachep);
209
210 if (gfs2_bufdata_cachep)
211 kmem_cache_destroy(gfs2_bufdata_cachep);
212
213 if (gfs2_inode_cachep)
214 kmem_cache_destroy(gfs2_inode_cachep);
215
216 if (gfs2_glock_aspace_cachep)
217 kmem_cache_destroy(gfs2_glock_aspace_cachep);
218
219 if (gfs2_glock_cachep)
220 kmem_cache_destroy(gfs2_glock_cachep);
221
222 gfs2_sys_uninit(); 220 gfs2_sys_uninit();
223 return error; 221 return error;
224} 222}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ad55eb86a250..e6a0a8a89ea7 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1382,7 +1382,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1382 return; 1382 return;
1383 } 1383 }
1384 1384
1385 gfs2_log_flush(sdp, NULL, SYNC_FLUSH); 1385 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SYNC | GFS2_LFC_KILL_SB);
1386 dput(sdp->sd_root_dir); 1386 dput(sdp->sd_root_dir);
1387 dput(sdp->sd_master_dir); 1387 dput(sdp->sd_master_dir);
1388 sdp->sd_root_dir = NULL; 1388 sdp->sd_root_dir = NULL;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e700fb162664..7a98abd340ee 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -955,7 +955,8 @@ out:
955 gfs2_glock_dq_uninit(&ghs[qx]); 955 gfs2_glock_dq_uninit(&ghs[qx]);
956 inode_unlock(&ip->i_inode); 956 inode_unlock(&ip->i_inode);
957 kfree(ghs); 957 kfree(ghs);
958 gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH); 958 gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
959 GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
959 return error; 960 return error;
960} 961}
961 962
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 9395a3db1a60..b6b258998bcd 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -14,12 +14,14 @@
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 16#include <linux/crc32.h>
17#include <linux/crc32c.h>
17 18
18#include "gfs2.h" 19#include "gfs2.h"
19#include "incore.h" 20#include "incore.h"
20#include "bmap.h" 21#include "bmap.h"
21#include "glock.h" 22#include "glock.h"
22#include "glops.h" 23#include "glops.h"
24#include "log.h"
23#include "lops.h" 25#include "lops.h"
24#include "meta_io.h" 26#include "meta_io.h"
25#include "recovery.h" 27#include "recovery.h"
@@ -117,22 +119,6 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
117 } 119 }
118} 120}
119 121
120static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
121{
122 const struct gfs2_log_header *str = buf;
123
124 if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
125 str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
126 return 1;
127
128 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
129 lh->lh_flags = be32_to_cpu(str->lh_flags);
130 lh->lh_tail = be32_to_cpu(str->lh_tail);
131 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
132 lh->lh_hash = be32_to_cpu(str->lh_hash);
133 return 0;
134}
135
136/** 122/**
137 * get_log_header - read the log header for a given segment 123 * get_log_header - read the log header for a given segment
138 * @jd: the journal 124 * @jd: the journal
@@ -150,29 +136,37 @@ static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
150static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, 136static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
151 struct gfs2_log_header_host *head) 137 struct gfs2_log_header_host *head)
152{ 138{
139 struct gfs2_log_header *lh;
153 struct buffer_head *bh; 140 struct buffer_head *bh;
154 struct gfs2_log_header_host uninitialized_var(lh); 141 u32 hash, crc;
155 const u32 nothing = 0;
156 u32 hash;
157 int error; 142 int error;
158 143
159 error = gfs2_replay_read_block(jd, blk, &bh); 144 error = gfs2_replay_read_block(jd, blk, &bh);
160 if (error) 145 if (error)
161 return error; 146 return error;
147 lh = (void *)bh->b_data;
162 148
163 hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) - 149 hash = crc32(~0, lh, LH_V1_SIZE - 4);
164 sizeof(u32)); 150 hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
165 hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
166 hash ^= (u32)~0;
167 error = gfs2_log_header_in(&lh, bh->b_data);
168 brelse(bh);
169 151
170 if (error || lh.lh_blkno != blk || lh.lh_hash != hash) 152 crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
171 return 1; 153 bh->b_size - LH_V1_SIZE - 4);
172 154
173 *head = lh; 155 error = lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
156 lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
157 be32_to_cpu(lh->lh_blkno) != blk ||
158 be32_to_cpu(lh->lh_hash) != hash ||
159 (lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc);
174 160
175 return 0; 161 brelse(bh);
162
163 if (!error) {
164 head->lh_sequence = be64_to_cpu(lh->lh_sequence);
165 head->lh_flags = be32_to_cpu(lh->lh_flags);
166 head->lh_tail = be32_to_cpu(lh->lh_tail);
167 head->lh_blkno = be32_to_cpu(lh->lh_blkno);
168 }
169 return error;
176} 170}
177 171
178/** 172/**
@@ -370,62 +364,22 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
370 364
371/** 365/**
372 * clean_journal - mark a dirty journal as being clean 366 * clean_journal - mark a dirty journal as being clean
373 * @sdp: the filesystem
374 * @jd: the journal 367 * @jd: the journal
375 * @gl: the journal's glock
376 * @head: the head journal to start from 368 * @head: the head journal to start from
377 * 369 *
378 * Returns: errno 370 * Returns: errno
379 */ 371 */
380 372
381static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) 373static void clean_journal(struct gfs2_jdesc *jd,
374 struct gfs2_log_header_host *head)
382{ 375{
383 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
384 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 376 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
385 unsigned int lblock;
386 struct gfs2_log_header *lh;
387 u32 hash;
388 struct buffer_head *bh;
389 int error;
390 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
391
392 lblock = head->lh_blkno;
393 gfs2_replay_incr_blk(jd, &lblock);
394 bh_map.b_size = 1 << ip->i_inode.i_blkbits;
395 error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
396 if (error)
397 return error;
398 if (!bh_map.b_blocknr) {
399 gfs2_consist_inode(ip);
400 return -EIO;
401 }
402
403 bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
404 lock_buffer(bh);
405 memset(bh->b_data, 0, bh->b_size);
406 set_buffer_uptodate(bh);
407 clear_buffer_dirty(bh);
408 unlock_buffer(bh);
409
410 lh = (struct gfs2_log_header *)bh->b_data;
411 memset(lh, 0, sizeof(struct gfs2_log_header));
412 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
413 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
414 lh->lh_header.__pad0 = cpu_to_be64(0);
415 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
416 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
417 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
418 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
419 lh->lh_blkno = cpu_to_be32(lblock);
420 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
421 lh->lh_hash = cpu_to_be32(hash);
422
423 set_buffer_dirty(bh);
424 if (sync_dirty_buffer(bh))
425 gfs2_io_error_bh(sdp, bh);
426 brelse(bh);
427 377
428 return error; 378 sdp->sd_log_flush_head = head->lh_blkno;
379 gfs2_replay_incr_blk(jd, &sdp->sd_log_flush_head);
380 gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0,
381 GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
382 REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
429} 383}
430 384
431 385
@@ -552,9 +506,7 @@ void gfs2_recover_func(struct work_struct *work)
552 goto fail_gunlock_thaw; 506 goto fail_gunlock_thaw;
553 } 507 }
554 508
555 error = clean_journal(jd, &head); 509 clean_journal(jd, &head);
556 if (error)
557 goto fail_gunlock_thaw;
558 510
559 gfs2_glock_dq_uninit(&thaw_gh); 511 gfs2_glock_dq_uninit(&thaw_gh);
560 t = DIV_ROUND_UP(jiffies - t, HZ); 512 t = DIV_ROUND_UP(jiffies - t, HZ);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 95b2a57ded33..8b683917a27e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -34,6 +34,7 @@
34#include "log.h" 34#include "log.h"
35#include "inode.h" 35#include "inode.h"
36#include "trace_gfs2.h" 36#include "trace_gfs2.h"
37#include "dir.h"
37 38
38#define BFITNOENT ((u32)~0) 39#define BFITNOENT ((u32)~0)
39#define NO_BLOCK ((u64)~0) 40#define NO_BLOCK ((u64)~0)
@@ -489,6 +490,13 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
489 * @blk: The data block number 490 * @blk: The data block number
490 * @exact: True if this needs to be an exact match 491 * @exact: True if this needs to be an exact match
491 * 492 *
493 * The @exact argument should be set to true by most callers. The exception
494 * is when we need to match blocks which are not represented by the rgrp
495 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
496 * there for alignment purposes. Another way of looking at it is that @exact
497 * matches only valid data/metadata blocks, but with @exact false, it will
498 * match any block within the extent of the rgrp.
499 *
492 * Returns: The resource group, or NULL if not found 500 * Returns: The resource group, or NULL if not found
493 */ 501 */
494 502
@@ -1040,17 +1048,30 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1040 rgd->rd_free = be32_to_cpu(str->rg_free); 1048 rgd->rd_free = be32_to_cpu(str->rg_free);
1041 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 1049 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1042 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 1050 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1051 /* rd_data0, rd_data and rd_bitbytes already set from rindex */
1043} 1052}
1044 1053
1045static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 1054static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1046{ 1055{
1056 struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
1047 struct gfs2_rgrp *str = buf; 1057 struct gfs2_rgrp *str = buf;
1058 u32 crc;
1048 1059
1049 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 1060 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1050 str->rg_free = cpu_to_be32(rgd->rd_free); 1061 str->rg_free = cpu_to_be32(rgd->rd_free);
1051 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 1062 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1052 str->__pad = cpu_to_be32(0); 1063 if (next == NULL)
1064 str->rg_skip = 0;
1065 else if (next->rd_addr > rgd->rd_addr)
1066 str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
1053 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 1067 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1068 str->rg_data0 = cpu_to_be64(rgd->rd_data0);
1069 str->rg_data = cpu_to_be32(rgd->rd_data);
1070 str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
1071 str->rg_crc = 0;
1072 crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
1073 str->rg_crc = cpu_to_be32(crc);
1074
1054 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 1075 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1055} 1076}
1056 1077
@@ -1318,7 +1339,7 @@ start_new_extent:
1318 1339
1319fail: 1340fail:
1320 if (sdp->sd_args.ar_discard) 1341 if (sdp->sd_args.ar_discard)
1321 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 1342 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
1322 sdp->sd_args.ar_discard = 0; 1343 sdp->sd_args.ar_discard = 0;
1323 return -EIO; 1344 return -EIO;
1324} 1345}
@@ -2072,7 +2093,8 @@ next_rgrp:
2072 } 2093 }
2073 /* Flushing the log may release space */ 2094 /* Flushing the log may release space */
2074 if (loops == 2) 2095 if (loops == 2)
2075 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 2096 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
2097 GFS2_LFC_INPLACE_RESERVE);
2076 } 2098 }
2077 2099
2078 return -ENOSPC; 2100 return -ENOSPC;
@@ -2453,12 +2475,12 @@ void gfs2_unlink_di(struct inode *inode)
2453 update_rgrp_lvb_unlinked(rgd, 1); 2475 update_rgrp_lvb_unlinked(rgd, 1);
2454} 2476}
2455 2477
2456static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 2478void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2457{ 2479{
2458 struct gfs2_sbd *sdp = rgd->rd_sbd; 2480 struct gfs2_sbd *sdp = rgd->rd_sbd;
2459 struct gfs2_rgrpd *tmp_rgd; 2481 struct gfs2_rgrpd *tmp_rgd;
2460 2482
2461 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); 2483 tmp_rgd = rgblk_free(sdp, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2462 if (!tmp_rgd) 2484 if (!tmp_rgd)
2463 return; 2485 return;
2464 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 2486 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
@@ -2474,12 +2496,6 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2474 update_rgrp_lvb_unlinked(rgd, -1); 2496 update_rgrp_lvb_unlinked(rgd, -1);
2475 2497
2476 gfs2_statfs_change(sdp, 0, +1, -1); 2498 gfs2_statfs_change(sdp, 0, +1, -1);
2477}
2478
2479
2480void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2481{
2482 gfs2_free_uninit_di(rgd, ip->i_no_addr);
2483 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); 2499 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2484 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 2500 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2485 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 2501 gfs2_meta_wipe(ip, ip->i_no_addr, 1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d81d46e19726..620be0521866 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -757,7 +757,9 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
757 bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip)); 757 bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
758 758
759 if (flush_all) 759 if (flush_all)
760 gfs2_log_flush(GFS2_SB(inode), ip->i_gl, NORMAL_FLUSH); 760 gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
761 GFS2_LOG_HEAD_FLUSH_NORMAL |
762 GFS2_LFC_WRITE_INODE);
761 if (bdi->wb.dirty_exceeded) 763 if (bdi->wb.dirty_exceeded)
762 gfs2_ail1_flush(sdp, wbc); 764 gfs2_ail1_flush(sdp, wbc);
763 else 765 else
@@ -766,6 +768,12 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
766 ret = filemap_fdatawait(metamapping); 768 ret = filemap_fdatawait(metamapping);
767 if (ret) 769 if (ret)
768 mark_inode_dirty_sync(inode); 770 mark_inode_dirty_sync(inode);
771 else {
772 spin_lock(&inode->i_lock);
773 if (!(inode->i_flags & I_DIRTY))
774 gfs2_ordered_del_inode(ip);
775 spin_unlock(&inode->i_lock);
776 }
769 return ret; 777 return ret;
770} 778}
771 779
@@ -853,7 +861,8 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
853 gfs2_quota_sync(sdp->sd_vfs, 0); 861 gfs2_quota_sync(sdp->sd_vfs, 0);
854 gfs2_statfs_sync(sdp->sd_vfs, 0); 862 gfs2_statfs_sync(sdp->sd_vfs, 0);
855 863
856 gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH); 864 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
865 GFS2_LFC_MAKE_FS_RO);
857 wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); 866 wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
858 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); 867 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
859 868
@@ -946,7 +955,8 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
946 955
947 gfs2_quota_sync(sb, -1); 956 gfs2_quota_sync(sb, -1);
948 if (wait) 957 if (wait)
949 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 958 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
959 GFS2_LFC_SYNC_FS);
950 return sdp->sd_log_error; 960 return sdp->sd_log_error;
951} 961}
952 962
@@ -1650,7 +1660,8 @@ alloc_failed:
1650 goto out_unlock; 1660 goto out_unlock;
1651 1661
1652out_truncate: 1662out_truncate:
1653 gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); 1663 gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
1664 GFS2_LFC_EVICT_INODE);
1654 metamapping = gfs2_glock2aspace(ip->i_gl); 1665 metamapping = gfs2_glock2aspace(ip->i_gl);
1655 if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { 1666 if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
1656 filemap_fdatawrite(metamapping); 1667 filemap_fdatawrite(metamapping);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9eb9d0a1abd9..c191fa58a1df 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -112,7 +112,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
112 } 112 }
113 113
114 if (error) { 114 if (error) {
115 fs_warn(sdp, "freeze %d error %d", n, error); 115 fs_warn(sdp, "freeze %d error %d\n", n, error);
116 return error; 116 return error;
117 } 117 }
118 118
@@ -679,7 +679,7 @@ fail_tune:
679 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 679 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
680fail_reg: 680fail_reg:
681 free_percpu(sdp->sd_lkstats); 681 free_percpu(sdp->sd_lkstats);
682 fs_err(sdp, "error %d adding sysfs files", error); 682 fs_err(sdp, "error %d adding sysfs files\n", error);
683 if (sysfs_frees_sdp) 683 if (sysfs_frees_sdp)
684 kobject_put(&sdp->sd_kobj); 684 kobject_put(&sdp->sd_kobj);
685 else 685 else
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index f67a709589d3..b9318b49ff8f 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -353,26 +353,29 @@ TRACE_EVENT(gfs2_pin,
353/* Flushing the log */ 353/* Flushing the log */
354TRACE_EVENT(gfs2_log_flush, 354TRACE_EVENT(gfs2_log_flush,
355 355
356 TP_PROTO(const struct gfs2_sbd *sdp, int start), 356 TP_PROTO(const struct gfs2_sbd *sdp, int start, u32 flags),
357 357
358 TP_ARGS(sdp, start), 358 TP_ARGS(sdp, start, flags),
359 359
360 TP_STRUCT__entry( 360 TP_STRUCT__entry(
361 __field( dev_t, dev ) 361 __field( dev_t, dev )
362 __field( int, start ) 362 __field( int, start )
363 __field( u64, log_seq ) 363 __field( u64, log_seq )
364 __field( u32, flags )
364 ), 365 ),
365 366
366 TP_fast_assign( 367 TP_fast_assign(
367 __entry->dev = sdp->sd_vfs->s_dev; 368 __entry->dev = sdp->sd_vfs->s_dev;
368 __entry->start = start; 369 __entry->start = start;
369 __entry->log_seq = sdp->sd_log_sequence; 370 __entry->log_seq = sdp->sd_log_sequence;
371 __entry->flags = flags;
370 ), 372 ),
371 373
372 TP_printk("%u,%u log flush %s %llu", 374 TP_printk("%u,%u log flush %s %llu %llx",
373 MAJOR(__entry->dev), MINOR(__entry->dev), 375 MAJOR(__entry->dev), MINOR(__entry->dev),
374 __entry->start ? "start" : "end", 376 __entry->start ? "start" : "end",
375 (unsigned long long)__entry->log_seq) 377 (unsigned long long)__entry->log_seq,
378 (unsigned long long)__entry->flags)
376); 379);
377 380
378/* Reserving/releasing blocks in the log */ 381/* Reserving/releasing blocks in the log */
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ca8b72d0a831..c75cacaa349b 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -92,7 +92,6 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
92 s64 nbuf; 92 s64 nbuf;
93 int alloced = test_bit(TR_ALLOCED, &tr->tr_flags); 93 int alloced = test_bit(TR_ALLOCED, &tr->tr_flags);
94 94
95 BUG_ON(!tr);
96 current->journal_info = NULL; 95 current->journal_info = NULL;
97 96
98 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { 97 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
@@ -118,7 +117,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
118 up_read(&sdp->sd_log_flush_lock); 117 up_read(&sdp->sd_log_flush_lock);
119 118
120 if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS) 119 if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
121 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 120 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
121 GFS2_LFC_TRANS_END);
122 if (alloced) 122 if (alloced)
123 sb_end_intwrite(sdp->sd_vfs); 123 sb_end_intwrite(sdp->sd_vfs);
124} 124}
diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig
index 24bc20fd42f7..7cc8b4acf66a 100644
--- a/fs/hfsplus/Kconfig
+++ b/fs/hfsplus/Kconfig
@@ -20,9 +20,6 @@ config HFSPLUS_FS_POSIX_ACL
20 POSIX Access Control Lists (ACLs) support permissions for users and 20 POSIX Access Control Lists (ACLs) support permissions for users and
21 groups beyond the owner/group/world scheme. 21 groups beyond the owner/group/world scheme.
22 22
23 To learn more about Access Control Lists, visit the POSIX ACLs for
24 Linux website <http://acl.bestbits.at/>.
25
26 It needs to understand that POSIX ACLs are treated only under 23 It needs to understand that POSIX ACLs are treated only under
27 Linux. POSIX ACLs doesn't mean something under Mac OS X. 24 Linux. POSIX ACLs doesn't mean something under Mac OS X.
28 Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs, 25 Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8a85f3f53446..8fe1b0aa2896 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -55,16 +55,6 @@ struct hugetlbfs_config {
55 umode_t mode; 55 umode_t mode;
56}; 56};
57 57
58struct hugetlbfs_inode_info {
59 struct shared_policy policy;
60 struct inode vfs_inode;
61};
62
63static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
64{
65 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
66}
67
68int sysctl_hugetlb_shm_group; 58int sysctl_hugetlb_shm_group;
69 59
70enum { 60enum {
@@ -520,8 +510,16 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
520 510
521 if (hole_end > hole_start) { 511 if (hole_end > hole_start) {
522 struct address_space *mapping = inode->i_mapping; 512 struct address_space *mapping = inode->i_mapping;
513 struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
523 514
524 inode_lock(inode); 515 inode_lock(inode);
516
517 /* protected by i_mutex */
518 if (info->seals & F_SEAL_WRITE) {
519 inode_unlock(inode);
520 return -EPERM;
521 }
522
525 i_mmap_lock_write(mapping); 523 i_mmap_lock_write(mapping);
526 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) 524 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
527 hugetlb_vmdelete_list(&mapping->i_mmap, 525 hugetlb_vmdelete_list(&mapping->i_mmap,
@@ -539,6 +537,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
539 loff_t len) 537 loff_t len)
540{ 538{
541 struct inode *inode = file_inode(file); 539 struct inode *inode = file_inode(file);
540 struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
542 struct address_space *mapping = inode->i_mapping; 541 struct address_space *mapping = inode->i_mapping;
543 struct hstate *h = hstate_inode(inode); 542 struct hstate *h = hstate_inode(inode);
544 struct vm_area_struct pseudo_vma; 543 struct vm_area_struct pseudo_vma;
@@ -570,6 +569,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
570 if (error) 569 if (error)
571 goto out; 570 goto out;
572 571
572 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
573 error = -EPERM;
574 goto out;
575 }
576
573 /* 577 /*
574 * Initialize a pseudo vma as this is required by the huge page 578 * Initialize a pseudo vma as this is required by the huge page
575 * allocation routines. If NUMA is configured, use page index 579 * allocation routines. If NUMA is configured, use page index
@@ -660,6 +664,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
660 struct hstate *h = hstate_inode(inode); 664 struct hstate *h = hstate_inode(inode);
661 int error; 665 int error;
662 unsigned int ia_valid = attr->ia_valid; 666 unsigned int ia_valid = attr->ia_valid;
667 struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
663 668
664 BUG_ON(!inode); 669 BUG_ON(!inode);
665 670
@@ -668,9 +673,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
668 return error; 673 return error;
669 674
670 if (ia_valid & ATTR_SIZE) { 675 if (ia_valid & ATTR_SIZE) {
671 if (attr->ia_size & ~huge_page_mask(h)) 676 loff_t oldsize = inode->i_size;
677 loff_t newsize = attr->ia_size;
678
679 if (newsize & ~huge_page_mask(h))
672 return -EINVAL; 680 return -EINVAL;
673 error = hugetlb_vmtruncate(inode, attr->ia_size); 681 /* protected by i_mutex */
682 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
683 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
684 return -EPERM;
685 error = hugetlb_vmtruncate(inode, newsize);
674 if (error) 686 if (error)
675 return error; 687 return error;
676 } 688 }
@@ -722,6 +734,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
722 734
723 inode = new_inode(sb); 735 inode = new_inode(sb);
724 if (inode) { 736 if (inode) {
737 struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
738
725 inode->i_ino = get_next_ino(); 739 inode->i_ino = get_next_ino();
726 inode_init_owner(inode, dir, mode); 740 inode_init_owner(inode, dir, mode);
727 lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, 741 lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
@@ -729,6 +743,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
729 inode->i_mapping->a_ops = &hugetlbfs_aops; 743 inode->i_mapping->a_ops = &hugetlbfs_aops;
730 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 744 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
731 inode->i_mapping->private_data = resv_map; 745 inode->i_mapping->private_data = resv_map;
746 info->seals = F_SEAL_SEAL;
732 switch (mode & S_IFMT) { 747 switch (mode & S_IFMT) {
733 default: 748 default:
734 init_special_inode(inode, mode, dev); 749 init_special_inode(inode, mode, dev);
diff --git a/fs/iomap.c b/fs/iomap.c
index 47d29ccffaef..afd163586aa0 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -65,6 +65,8 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
65 return ret; 65 return ret;
66 if (WARN_ON(iomap.offset > pos)) 66 if (WARN_ON(iomap.offset > pos))
67 return -EIO; 67 return -EIO;
68 if (WARN_ON(iomap.length == 0))
69 return -EIO;
68 70
69 /* 71 /*
70 * Cut down the length to the one actually provided by the filesystem, 72 * Cut down the length to the one actually provided by the filesystem,
@@ -753,7 +755,8 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
753 err = invalidate_inode_pages2_range(inode->i_mapping, 755 err = invalidate_inode_pages2_range(inode->i_mapping,
754 offset >> PAGE_SHIFT, 756 offset >> PAGE_SHIFT,
755 (offset + dio->size - 1) >> PAGE_SHIFT); 757 (offset + dio->size - 1) >> PAGE_SHIFT);
756 WARN_ON_ONCE(err); 758 if (err)
759 dio_warn_stale_pagecache(iocb->ki_filp);
757 } 760 }
758 761
759 inode_dio_end(file_inode(iocb->ki_filp)); 762 inode_dio_end(file_inode(iocb->ki_filp));
@@ -1018,9 +1021,16 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1018 if (ret) 1021 if (ret)
1019 goto out_free_dio; 1022 goto out_free_dio;
1020 1023
1024 /*
1025 * Try to invalidate cache pages for the range we're direct
1026 * writing. If this invalidation fails, tough, the write will
1027 * still work, but racing two incompatible write paths is a
1028 * pretty crazy thing to do, so we don't support it 100%.
1029 */
1021 ret = invalidate_inode_pages2_range(mapping, 1030 ret = invalidate_inode_pages2_range(mapping,
1022 start >> PAGE_SHIFT, end >> PAGE_SHIFT); 1031 start >> PAGE_SHIFT, end >> PAGE_SHIFT);
1023 WARN_ON_ONCE(ret); 1032 if (ret)
1033 dio_warn_stale_pagecache(iocb->ki_filp);
1024 ret = 0; 1034 ret = 0;
1025 1035
1026 if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && 1036 if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 4055f51617ef..c125d662777c 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * linux/fs/jbd2/checkpoint.c 3 * linux/fs/jbd2/checkpoint.c
3 * 4 *
@@ -5,10 +6,6 @@
5 * 6 *
6 * Copyright 1999 Red Hat Software --- All Rights Reserved 7 * Copyright 1999 Red Hat Software --- All Rights Reserved
7 * 8 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Checkpoint routines for the generic filesystem journaling code. 9 * Checkpoint routines for the generic filesystem journaling code.
13 * Part of the ext2fs journaling system. 10 * Part of the ext2fs journaling system.
14 * 11 *
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3c1c31321d9b..8de0e7723316 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * linux/fs/jbd2/commit.c 3 * linux/fs/jbd2/commit.c
3 * 4 *
@@ -5,10 +6,6 @@
5 * 6 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * Copyright 1998 Red Hat corp --- All Rights Reserved
7 * 8 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Journal commit routines for the generic filesystem journaling code; 9 * Journal commit routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system. 10 * part of the ext2fs journaling system.
14 */ 11 */
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 67546c7ad473..3fbf48ec2188 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * linux/fs/jbd2/journal.c 3 * linux/fs/jbd2/journal.c
3 * 4 *
@@ -5,10 +6,6 @@
5 * 6 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * Copyright 1998 Red Hat corp --- All Rights Reserved
7 * 8 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Generic filesystem journal-writing code; part of the ext2fs 9 * Generic filesystem journal-writing code; part of the ext2fs
13 * journaling system. 10 * journaling system.
14 * 11 *
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 02dd3360cb20..f99910b69c78 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * linux/fs/jbd2/recovery.c 3 * linux/fs/jbd2/recovery.c
3 * 4 *
@@ -5,10 +6,6 @@
5 * 6 *
6 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
7 * 8 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Journal recovery routines for the generic filesystem journaling code; 9 * Journal recovery routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system. 10 * part of the ext2fs journaling system.
14 */ 11 */
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index f9aefcda5854..696ef15ec942 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * linux/fs/jbd2/revoke.c 3 * linux/fs/jbd2/revoke.c
3 * 4 *
@@ -5,10 +6,6 @@
5 * 6 *
6 * Copyright 2000 Red Hat corp --- All Rights Reserved 7 * Copyright 2000 Red Hat corp --- All Rights Reserved
7 * 8 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Journal revoke routines for the generic filesystem journaling code; 9 * Journal revoke routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system. 10 * part of the ext2fs journaling system.
14 * 11 *
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8b08044b3120..ac311037d7a5 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * linux/fs/jbd2/transaction.c 3 * linux/fs/jbd2/transaction.c
3 * 4 *
@@ -5,10 +6,6 @@
5 * 6 *
6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * Copyright 1998 Red Hat corp --- All Rights Reserved
7 * 8 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Generic filesystem transaction handling code; part of the ext2fs 9 * Generic filesystem transaction handling code; part of the ext2fs
13 * journaling system. 10 * journaling system.
14 * 11 *
@@ -495,8 +492,10 @@ void jbd2_journal_free_reserved(handle_t *handle)
495EXPORT_SYMBOL(jbd2_journal_free_reserved); 492EXPORT_SYMBOL(jbd2_journal_free_reserved);
496 493
497/** 494/**
498 * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle 495 * int jbd2_journal_start_reserved() - start reserved handle
499 * @handle: handle to start 496 * @handle: handle to start
497 * @type: for handle statistics
498 * @line_no: for handle statistics
500 * 499 *
501 * Start handle that has been previously reserved with jbd2_journal_reserve(). 500 * Start handle that has been previously reserved with jbd2_journal_reserve().
502 * This attaches @handle to the running transaction (or creates one if there's 501 * This attaches @handle to the running transaction (or creates one if there's
@@ -626,6 +625,7 @@ error_out:
626 * int jbd2_journal_restart() - restart a handle . 625 * int jbd2_journal_restart() - restart a handle .
627 * @handle: handle to restart 626 * @handle: handle to restart
628 * @nblocks: nr credits requested 627 * @nblocks: nr credits requested
628 * @gfp_mask: memory allocation flags (for start_this_handle)
629 * 629 *
630 * Restart a handle for a multi-transaction filesystem 630 * Restart a handle for a multi-transaction filesystem
631 * operation. 631 * operation.
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
index d8bb6c411e96..ad850c5bf2ca 100644
--- a/fs/jffs2/Kconfig
+++ b/fs/jffs2/Kconfig
@@ -68,8 +68,7 @@ config JFFS2_FS_XATTR
68 default n 68 default n
69 help 69 help
70 Extended attributes are name:value pairs associated with inodes by 70 Extended attributes are name:value pairs associated with inodes by
71 the kernel or by users (see the attr(5) manual page, or visit 71 the kernel or by users (see the attr(5) manual page for details).
72 <http://acl.bestbits.at/> for details).
73 72
74 If unsure, say N. 73 If unsure, say N.
75 74
@@ -82,9 +81,6 @@ config JFFS2_FS_POSIX_ACL
82 Posix Access Control Lists (ACLs) support permissions for users and 81 Posix Access Control Lists (ACLs) support permissions for users and
83 groups beyond the owner/group/world scheme. 82 groups beyond the owner/group/world scheme.
84 83
85 To learn more about Access Control Lists, visit the Posix ACLs for
86 Linux website <http://acl.bestbits.at/>.
87
88 If you don't know what Access Control Lists are, say N 84 If you don't know what Access Control Lists are, say N
89 85
90config JFFS2_FS_SECURITY 86config JFFS2_FS_SECURITY
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d8c274d39ddb..eab04eca95a3 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -362,7 +362,6 @@ error_io:
362 ret = -EIO; 362 ret = -EIO;
363error: 363error:
364 mutex_unlock(&f->sem); 364 mutex_unlock(&f->sem);
365 jffs2_do_clear_inode(c, f);
366 iget_failed(inode); 365 iget_failed(inode);
367 return ERR_PTR(ret); 366 return ERR_PTR(ret);
368} 367}
diff --git a/fs/jfs/Kconfig b/fs/jfs/Kconfig
index 57cef19951db..851de78fdabb 100644
--- a/fs/jfs/Kconfig
+++ b/fs/jfs/Kconfig
@@ -16,9 +16,6 @@ config JFS_POSIX_ACL
16 Posix Access Control Lists (ACLs) support permissions for users and 16 Posix Access Control Lists (ACLs) support permissions for users and
17 groups beyond the owner/group/world scheme. 17 groups beyond the owner/group/world scheme.
18 18
19 To learn more about Access Control Lists, visit the Posix ACLs for
20 Linux website <http://acl.bestbits.at/>.
21
22 If you don't know what Access Control Lists are, say N 19 If you don't know what Access Control Lists are, say N
23 20
24config JFS_SECURITY 21config JFS_SECURITY
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 90373aebfdca..1b9264fd54b6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -965,9 +965,11 @@ static int __init init_jfs_fs(void)
965 int rc; 965 int rc;
966 966
967 jfs_inode_cachep = 967 jfs_inode_cachep =
968 kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 968 kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info),
969 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, 969 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
970 init_once); 970 offsetof(struct jfs_inode_info, i_inline),
971 sizeof_field(struct jfs_inode_info, i_inline),
972 init_once);
971 if (jfs_inode_cachep == NULL) 973 if (jfs_inode_cachep == NULL)
972 return -ENOMEM; 974 return -ENOMEM;
973 975
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b8b8b9ced9f8..bf41e2e72c18 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -94,6 +94,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
94 entry->e_key = key; 94 entry->e_key = key;
95 entry->e_value = value; 95 entry->e_value = value;
96 entry->e_reusable = reusable; 96 entry->e_reusable = reusable;
97 entry->e_referenced = 0;
97 head = mb_cache_entry_head(cache, key); 98 head = mb_cache_entry_head(cache, key);
98 hlist_bl_lock(head); 99 hlist_bl_lock(head);
99 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { 100 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
@@ -238,7 +239,9 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
238 spin_lock(&cache->c_list_lock); 239 spin_lock(&cache->c_list_lock);
239 if (!list_empty(&entry->e_list)) { 240 if (!list_empty(&entry->e_list)) {
240 list_del_init(&entry->e_list); 241 list_del_init(&entry->e_list);
241 cache->c_entry_count--; 242 if (!WARN_ONCE(cache->c_entry_count == 0,
243 "mbcache: attempt to decrement c_entry_count past zero"))
244 cache->c_entry_count--;
242 atomic_dec(&entry->e_refcnt); 245 atomic_dec(&entry->e_refcnt);
243 } 246 }
244 spin_unlock(&cache->c_list_lock); 247 spin_unlock(&cache->c_list_lock);
@@ -269,9 +272,6 @@ static unsigned long mb_cache_count(struct shrinker *shrink,
269 struct mb_cache *cache = container_of(shrink, struct mb_cache, 272 struct mb_cache *cache = container_of(shrink, struct mb_cache,
270 c_shrink); 273 c_shrink);
271 274
272 /* Unlikely, but not impossible */
273 if (unlikely(cache->c_entry_count < 0))
274 return 0;
275 return cache->c_entry_count; 275 return cache->c_entry_count;
276} 276}
277 277
diff --git a/fs/namei.c b/fs/namei.c
index 7c221fb0836b..921ae32dbc80 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -391,50 +391,6 @@ static inline int do_inode_permission(struct inode *inode, int mask)
391} 391}
392 392
393/** 393/**
394 * __inode_permission - Check for access rights to a given inode
395 * @inode: Inode to check permission on
396 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
397 *
398 * Check for read/write/execute permissions on an inode.
399 *
400 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
401 *
402 * This does not check for a read-only file system. You probably want
403 * inode_permission().
404 */
405int __inode_permission(struct inode *inode, int mask)
406{
407 int retval;
408
409 if (unlikely(mask & MAY_WRITE)) {
410 /*
411 * Nobody gets write access to an immutable file.
412 */
413 if (IS_IMMUTABLE(inode))
414 return -EPERM;
415
416 /*
417 * Updating mtime will likely cause i_uid and i_gid to be
418 * written back improperly if their true value is unknown
419 * to the vfs.
420 */
421 if (HAS_UNMAPPED_ID(inode))
422 return -EACCES;
423 }
424
425 retval = do_inode_permission(inode, mask);
426 if (retval)
427 return retval;
428
429 retval = devcgroup_inode_permission(inode, mask);
430 if (retval)
431 return retval;
432
433 return security_inode_permission(inode, mask);
434}
435EXPORT_SYMBOL(__inode_permission);
436
437/**
438 * sb_permission - Check superblock-level permissions 394 * sb_permission - Check superblock-level permissions
439 * @sb: Superblock of inode to check permission on 395 * @sb: Superblock of inode to check permission on
440 * @inode: Inode to check permission on 396 * @inode: Inode to check permission on
@@ -472,7 +428,32 @@ int inode_permission(struct inode *inode, int mask)
472 retval = sb_permission(inode->i_sb, inode, mask); 428 retval = sb_permission(inode->i_sb, inode, mask);
473 if (retval) 429 if (retval)
474 return retval; 430 return retval;
475 return __inode_permission(inode, mask); 431
432 if (unlikely(mask & MAY_WRITE)) {
433 /*
434 * Nobody gets write access to an immutable file.
435 */
436 if (IS_IMMUTABLE(inode))
437 return -EPERM;
438
439 /*
440 * Updating mtime will likely cause i_uid and i_gid to be
441 * written back improperly if their true value is unknown
442 * to the vfs.
443 */
444 if (HAS_UNMAPPED_ID(inode))
445 return -EACCES;
446 }
447
448 retval = do_inode_permission(inode, mask);
449 if (retval)
450 return retval;
451
452 retval = devcgroup_inode_permission(inode, mask);
453 if (retval)
454 return retval;
455
456 return security_inode_permission(inode, mask);
476} 457}
477EXPORT_SYMBOL(inode_permission); 458EXPORT_SYMBOL(inode_permission);
478 459
diff --git a/fs/ncpfs/Kconfig b/fs/ncpfs/Kconfig
deleted file mode 100644
index c931cf22a1f6..000000000000
--- a/fs/ncpfs/Kconfig
+++ /dev/null
@@ -1,108 +0,0 @@
1#
2# NCP Filesystem configuration
3#
4config NCP_FS
5 tristate "NCP file system support (to mount NetWare volumes)"
6 depends on IPX!=n || INET
7 help
8 NCP (NetWare Core Protocol) is a protocol that runs over IPX and is
9 used by Novell NetWare clients to talk to file servers. It is to
10 IPX what NFS is to TCP/IP, if that helps. Saying Y here allows you
11 to mount NetWare file server volumes and to access them just like
12 any other Unix directory. For details, please read the file
13 <file:Documentation/filesystems/ncpfs.txt> in the kernel source and
14 the IPX-HOWTO from <http://www.tldp.org/docs.html#howto>.
15
16 You do not have to say Y here if you want your Linux box to act as a
17 file *server* for Novell NetWare clients.
18
19 General information about how to connect Linux, Windows machines and
20 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
21
22 To compile this as a module, choose M here: the module will be called
23 ncpfs. Say N unless you are connected to a Novell network.
24
25config NCPFS_PACKET_SIGNING
26 bool "Packet signatures"
27 depends on NCP_FS
28 help
29 NCP allows packets to be signed for stronger security. If you want
30 security, say Y. Normal users can leave it off. To be able to use
31 packet signing you must use ncpfs > 2.0.12.
32
33config NCPFS_IOCTL_LOCKING
34 bool "Proprietary file locking"
35 depends on NCP_FS
36 help
37 Allows locking of records on remote volumes. Say N unless you have
38 special applications which are able to utilize this locking scheme.
39
40config NCPFS_STRONG
41 bool "Clear remove/delete inhibit when needed"
42 depends on NCP_FS
43 help
44 Allows manipulation of files flagged as Delete or Rename Inhibit.
45 To use this feature you must mount volumes with the ncpmount
46 parameter "-s" (ncpfs-2.0.12 and newer). Say Y unless you are not
47 mounting volumes with -f 444.
48
49config NCPFS_NFS_NS
50 bool "Use NFS namespace if available"
51 depends on NCP_FS
52 help
53 Allows you to utilize NFS namespace on NetWare servers. It brings
54 you case sensitive filenames. Say Y. You can disable it at
55 mount-time with the `-N nfs' parameter of ncpmount.
56
57config NCPFS_OS2_NS
58 bool "Use LONG (OS/2) namespace if available"
59 depends on NCP_FS
60 help
61 Allows you to utilize OS2/LONG namespace on NetWare servers.
62 Filenames in this namespace are limited to 255 characters, they are
63 case insensitive, and case in names is preserved. Say Y. You can
64 disable it at mount time with the -N os2 parameter of ncpmount.
65
66config NCPFS_SMALLDOS
67 bool "Lowercase DOS filenames"
68 depends on NCP_FS
69 ---help---
70 If you say Y here, every filename on a NetWare server volume using
71 the OS2/LONG namespace and created under DOS or on a volume using
72 DOS namespace will be converted to lowercase characters.
73 Saying N here will give you these filenames in uppercase.
74
75 This is only a cosmetic option since the OS2/LONG namespace is case
76 insensitive. The only major reason for this option is backward
77 compatibility when moving from DOS to OS2/LONG namespace support.
78 Long filenames (created by Win95) will not be affected.
79
80 This option does not solve the problem that filenames appear
81 differently under Linux and under Windows, since Windows does an
82 additional conversions on the client side. You can achieve similar
83 effects by saying Y to "Allow using of Native Language Support"
84 below.
85
86config NCPFS_NLS
87 bool "Use Native Language Support"
88 depends on NCP_FS
89 select NLS
90 help
91 Allows you to use codepages and I/O charsets for file name
92 translation between the server file system and input/output. This
93 may be useful, if you want to access the server with other operating
94 systems, e.g. Windows 95. See also NLS for more Information.
95
96 To select codepages and I/O charsets use ncpfs-2.2.0.13 or newer.
97
98config NCPFS_EXTRAS
99 bool "Enable symbolic links and execute flags"
100 depends on NCP_FS
101 help
102 This enables the use of symbolic links and an execute permission
103 bit on NCPFS. The file server need not have long name space or NFS
104 name space loaded for these to work.
105
106 To use the new attributes, it is recommended to use the flags
107 '-f 600 -d 755' on the ncpmount command line.
108
diff --git a/fs/ncpfs/Makefile b/fs/ncpfs/Makefile
deleted file mode 100644
index 66fe5f878817..000000000000
--- a/fs/ncpfs/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Makefile for the linux ncp filesystem routines.
4#
5
6obj-$(CONFIG_NCP_FS) += ncpfs.o
7
8ncpfs-y := dir.o file.o inode.o ioctl.o mmap.o ncplib_kernel.o sock.o \
9 ncpsign_kernel.o getopt.o
10
11ncpfs-$(CONFIG_NCPFS_EXTRAS) += symlink.o
12ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o
13
14# If you want debugging output, please uncomment the following line
15# ccflags-y := -DDEBUG_NCP=1
16
17CFLAGS_ncplib_kernel.o := -finline-functions
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
deleted file mode 100644
index 0c57c5c5d40a..000000000000
--- a/fs/ncpfs/dir.c
+++ /dev/null
@@ -1,1232 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * dir.c
4 *
5 * Copyright (C) 1995, 1996 by Volker Lendecke
6 * Modified for big endian by J.F. Chadima and David S. Miller
7 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
8 * Modified 1998, 1999 Wolfram Pienkoss for NLS
9 * Modified 1999 Wolfram Pienkoss for directory caching
10 * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info
11 *
12 */
13
14
15#include <linux/time.h>
16#include <linux/errno.h>
17#include <linux/stat.h>
18#include <linux/kernel.h>
19#include <linux/vmalloc.h>
20#include <linux/mm.h>
21#include <linux/namei.h>
22#include <linux/uaccess.h>
23#include <asm/byteorder.h>
24
25#include "ncp_fs.h"
26
27static void ncp_read_volume_list(struct file *, struct dir_context *,
28 struct ncp_cache_control *);
29static void ncp_do_readdir(struct file *, struct dir_context *,
30 struct ncp_cache_control *);
31
32static int ncp_readdir(struct file *, struct dir_context *);
33
34static int ncp_create(struct inode *, struct dentry *, umode_t, bool);
35static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int);
36static int ncp_unlink(struct inode *, struct dentry *);
37static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
38static int ncp_rmdir(struct inode *, struct dentry *);
39static int ncp_rename(struct inode *, struct dentry *,
40 struct inode *, struct dentry *, unsigned int);
41static int ncp_mknod(struct inode * dir, struct dentry *dentry,
42 umode_t mode, dev_t rdev);
43#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
44extern int ncp_symlink(struct inode *, struct dentry *, const char *);
45#else
46#define ncp_symlink NULL
47#endif
48
49const struct file_operations ncp_dir_operations =
50{
51 .llseek = generic_file_llseek,
52 .read = generic_read_dir,
53 .iterate = ncp_readdir,
54 .unlocked_ioctl = ncp_ioctl,
55#ifdef CONFIG_COMPAT
56 .compat_ioctl = ncp_compat_ioctl,
57#endif
58};
59
60const struct inode_operations ncp_dir_inode_operations =
61{
62 .create = ncp_create,
63 .lookup = ncp_lookup,
64 .unlink = ncp_unlink,
65 .symlink = ncp_symlink,
66 .mkdir = ncp_mkdir,
67 .rmdir = ncp_rmdir,
68 .mknod = ncp_mknod,
69 .rename = ncp_rename,
70 .setattr = ncp_notify_change,
71};
72
73/*
74 * Dentry operations routines
75 */
76static int ncp_lookup_validate(struct dentry *, unsigned int);
77static int ncp_hash_dentry(const struct dentry *, struct qstr *);
78static int ncp_compare_dentry(const struct dentry *,
79 unsigned int, const char *, const struct qstr *);
80static int ncp_delete_dentry(const struct dentry *);
81static void ncp_d_prune(struct dentry *dentry);
82
83const struct dentry_operations ncp_dentry_operations =
84{
85 .d_revalidate = ncp_lookup_validate,
86 .d_hash = ncp_hash_dentry,
87 .d_compare = ncp_compare_dentry,
88 .d_delete = ncp_delete_dentry,
89 .d_prune = ncp_d_prune,
90};
91
92#define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber])
93
94static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
95{
96#ifdef CONFIG_NCPFS_SMALLDOS
97 int ns = ncp_namespace(i);
98
99 if ((ns == NW_NS_DOS)
100#ifdef CONFIG_NCPFS_OS2_NS
101 || ((ns == NW_NS_OS2) && (nscreator == NW_NS_DOS))
102#endif /* CONFIG_NCPFS_OS2_NS */
103 )
104 return 0;
105#endif /* CONFIG_NCPFS_SMALLDOS */
106 return 1;
107}
108
109#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS)
110
111static inline int ncp_case_sensitive(const struct inode *i)
112{
113#ifdef CONFIG_NCPFS_NFS_NS
114 return ncp_namespace(i) == NW_NS_NFS;
115#else
116 return 0;
117#endif /* CONFIG_NCPFS_NFS_NS */
118}
119
120/*
121 * Note: leave the hash unchanged if the directory
122 * is case-sensitive.
123 */
124static int
125ncp_hash_dentry(const struct dentry *dentry, struct qstr *this)
126{
127 struct inode *inode = d_inode_rcu(dentry);
128
129 if (!inode)
130 return 0;
131
132 if (!ncp_case_sensitive(inode)) {
133 struct nls_table *t;
134 unsigned long hash;
135 int i;
136
137 t = NCP_IO_TABLE(dentry->d_sb);
138 hash = init_name_hash(dentry);
139 for (i=0; i<this->len ; i++)
140 hash = partial_name_hash(ncp_tolower(t, this->name[i]),
141 hash);
142 this->hash = end_name_hash(hash);
143 }
144 return 0;
145}
146
147static int
148ncp_compare_dentry(const struct dentry *dentry,
149 unsigned int len, const char *str, const struct qstr *name)
150{
151 struct inode *pinode;
152
153 if (len != name->len)
154 return 1;
155
156 pinode = d_inode_rcu(dentry->d_parent);
157 if (!pinode)
158 return 1;
159
160 if (ncp_case_sensitive(pinode))
161 return strncmp(str, name->name, len);
162
163 return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len);
164}
165
166/*
167 * This is the callback from dput() when d_count is going to 0.
168 * We use this to unhash dentries with bad inodes.
169 * Closing files can be safely postponed until iput() - it's done there anyway.
170 */
171static int
172ncp_delete_dentry(const struct dentry * dentry)
173{
174 struct inode *inode = d_inode(dentry);
175
176 if (inode) {
177 if (is_bad_inode(inode))
178 return 1;
179 } else
180 {
181 /* N.B. Unhash negative dentries? */
182 }
183 return 0;
184}
185
186static inline int
187ncp_single_volume(struct ncp_server *server)
188{
189 return (server->m.mounted_vol[0] != '\0');
190}
191
192static inline int ncp_is_server_root(struct inode *inode)
193{
194 return !ncp_single_volume(NCP_SERVER(inode)) &&
195 is_root_inode(inode);
196}
197
198
199/*
200 * This is the callback when the dcache has a lookup hit.
201 */
202
203
204#ifdef CONFIG_NCPFS_STRONG
205/* try to delete a readonly file (NW R bit set) */
206
207static int
208ncp_force_unlink(struct inode *dir, struct dentry* dentry)
209{
210 int res=0x9c,res2;
211 struct nw_modify_dos_info info;
212 __le32 old_nwattr;
213 struct inode *inode;
214
215 memset(&info, 0, sizeof(info));
216
217 /* remove the Read-Only flag on the NW server */
218 inode = d_inode(dentry);
219
220 old_nwattr = NCP_FINFO(inode)->nwattr;
221 info.attributes = old_nwattr & ~(aRONLY|aDELETEINHIBIT|aRENAMEINHIBIT);
222 res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(inode), inode, NULL, DM_ATTRIBUTES, &info);
223 if (res2)
224 goto leave_me;
225
226 /* now try again the delete operation */
227 res = ncp_del_file_or_subdir2(NCP_SERVER(dir), dentry);
228
229 if (res) /* delete failed, set R bit again */
230 {
231 info.attributes = old_nwattr;
232 res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(inode), inode, NULL, DM_ATTRIBUTES, &info);
233 if (res2)
234 goto leave_me;
235 }
236leave_me:
237 return(res);
238}
239#endif /* CONFIG_NCPFS_STRONG */
240
241#ifdef CONFIG_NCPFS_STRONG
242static int
243ncp_force_rename(struct inode *old_dir, struct dentry* old_dentry, char *_old_name,
244 struct inode *new_dir, struct dentry* new_dentry, char *_new_name)
245{
246 struct nw_modify_dos_info info;
247 int res=0x90,res2;
248 struct inode *old_inode = d_inode(old_dentry);
249 __le32 old_nwattr = NCP_FINFO(old_inode)->nwattr;
250 __le32 new_nwattr = 0; /* shut compiler warning */
251 int old_nwattr_changed = 0;
252 int new_nwattr_changed = 0;
253
254 memset(&info, 0, sizeof(info));
255
256 /* remove the Read-Only flag on the NW server */
257
258 info.attributes = old_nwattr & ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT);
259 res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(old_inode), old_inode, NULL, DM_ATTRIBUTES, &info);
260 if (!res2)
261 old_nwattr_changed = 1;
262 if (new_dentry && d_really_is_positive(new_dentry)) {
263 new_nwattr = NCP_FINFO(d_inode(new_dentry))->nwattr;
264 info.attributes = new_nwattr & ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT);
265 res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(new_dir), new_dir, _new_name, DM_ATTRIBUTES, &info);
266 if (!res2)
267 new_nwattr_changed = 1;
268 }
269 /* now try again the rename operation */
270 /* but only if something really happened */
271 if (new_nwattr_changed || old_nwattr_changed) {
272 res = ncp_ren_or_mov_file_or_subdir(NCP_SERVER(old_dir),
273 old_dir, _old_name,
274 new_dir, _new_name);
275 }
276 if (res)
277 goto leave_me;
278 /* file was successfully renamed, so:
279 do not set attributes on old file - it no longer exists
280 copy attributes from old file to new */
281 new_nwattr_changed = old_nwattr_changed;
282 new_nwattr = old_nwattr;
283 old_nwattr_changed = 0;
284
285leave_me:;
286 if (old_nwattr_changed) {
287 info.attributes = old_nwattr;
288 res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(old_inode), old_inode, NULL, DM_ATTRIBUTES, &info);
289 /* ignore errors */
290 }
291 if (new_nwattr_changed) {
292 info.attributes = new_nwattr;
293 res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(new_dir), new_dir, _new_name, DM_ATTRIBUTES, &info);
294 /* ignore errors */
295 }
296 return(res);
297}
298#endif /* CONFIG_NCPFS_STRONG */
299
300
301static int
302ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
303{
304 struct ncp_server *server;
305 struct dentry *parent;
306 struct inode *dir;
307 struct ncp_entry_info finfo;
308 int res, val = 0, len;
309 __u8 __name[NCP_MAXPATHLEN + 1];
310
311 if (dentry == dentry->d_sb->s_root)
312 return 1;
313
314 if (flags & LOOKUP_RCU)
315 return -ECHILD;
316
317 parent = dget_parent(dentry);
318 dir = d_inode(parent);
319
320 if (d_really_is_negative(dentry))
321 goto finished;
322
323 server = NCP_SERVER(dir);
324
325 /*
326 * Inspired by smbfs:
327 * The default validation is based on dentry age:
328 * We set the max age at mount time. (But each
329 * successful server lookup renews the timestamp.)
330 */
331 val = NCP_TEST_AGE(server, dentry);
332 if (val)
333 goto finished;
334
335 ncp_dbg(2, "%pd2 not valid, age=%ld, server lookup\n",
336 dentry, NCP_GET_AGE(dentry));
337
338 len = sizeof(__name);
339 if (ncp_is_server_root(dir)) {
340 res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
341 dentry->d_name.len, 1);
342 if (!res) {
343 res = ncp_lookup_volume(server, __name, &(finfo.i));
344 if (!res)
345 ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
346 }
347 } else {
348 res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
349 dentry->d_name.len, !ncp_preserve_case(dir));
350 if (!res)
351 res = ncp_obtain_info(server, dir, __name, &(finfo.i));
352 }
353 finfo.volume = finfo.i.volNumber;
354 ncp_dbg(2, "looked for %pd/%s, res=%d\n",
355 dentry->d_parent, __name, res);
356 /*
357 * If we didn't find it, or if it has a different dirEntNum to
358 * what we remember, it's not valid any more.
359 */
360 if (!res) {
361 struct inode *inode = d_inode(dentry);
362
363 inode_lock(inode);
364 if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
365 ncp_new_dentry(dentry);
366 val=1;
367 } else
368 ncp_dbg(2, "found, but dirEntNum changed\n");
369
370 ncp_update_inode2(inode, &finfo);
371 inode_unlock(inode);
372 }
373
374finished:
375 ncp_dbg(2, "result=%d\n", val);
376 dput(parent);
377 return val;
378}
379
380static time_t ncp_obtain_mtime(struct dentry *dentry)
381{
382 struct inode *inode = d_inode(dentry);
383 struct ncp_server *server = NCP_SERVER(inode);
384 struct nw_info_struct i;
385
386 if (!ncp_conn_valid(server) || ncp_is_server_root(inode))
387 return 0;
388
389 if (ncp_obtain_info(server, inode, NULL, &i))
390 return 0;
391
392 return ncp_date_dos2unix(i.modifyTime, i.modifyDate);
393}
394
395static inline void
396ncp_invalidate_dircache_entries(struct dentry *parent)
397{
398 struct ncp_server *server = NCP_SERVER(d_inode(parent));
399 struct dentry *dentry;
400
401 spin_lock(&parent->d_lock);
402 list_for_each_entry(dentry, &parent->d_subdirs, d_child) {
403 dentry->d_fsdata = NULL;
404 ncp_age_dentry(server, dentry);
405 }
406 spin_unlock(&parent->d_lock);
407}
408
409static int ncp_readdir(struct file *file, struct dir_context *ctx)
410{
411 struct dentry *dentry = file->f_path.dentry;
412 struct inode *inode = d_inode(dentry);
413 struct page *page = NULL;
414 struct ncp_server *server = NCP_SERVER(inode);
415 union ncp_dir_cache *cache = NULL;
416 struct ncp_cache_control ctl;
417 int result, mtime_valid = 0;
418 time_t mtime = 0;
419
420 ctl.page = NULL;
421 ctl.cache = NULL;
422
423 ncp_dbg(2, "reading %pD2, pos=%d\n", file, (int)ctx->pos);
424
425 result = -EIO;
426 /* Do not generate '.' and '..' when server is dead. */
427 if (!ncp_conn_valid(server))
428 goto out;
429
430 result = 0;
431 if (!dir_emit_dots(file, ctx))
432 goto out;
433
434 page = grab_cache_page(&inode->i_data, 0);
435 if (!page)
436 goto read_really;
437
438 ctl.cache = cache = kmap(page);
439 ctl.head = cache->head;
440
441 if (!PageUptodate(page) || !ctl.head.eof)
442 goto init_cache;
443
444 if (ctx->pos == 2) {
445 if (jiffies - ctl.head.time >= NCP_MAX_AGE(server))
446 goto init_cache;
447
448 mtime = ncp_obtain_mtime(dentry);
449 mtime_valid = 1;
450 if ((!mtime) || (mtime != ctl.head.mtime))
451 goto init_cache;
452 }
453
454 if (ctx->pos > ctl.head.end)
455 goto finished;
456
457 ctl.fpos = ctx->pos + (NCP_DIRCACHE_START - 2);
458 ctl.ofs = ctl.fpos / NCP_DIRCACHE_SIZE;
459 ctl.idx = ctl.fpos % NCP_DIRCACHE_SIZE;
460
461 for (;;) {
462 if (ctl.ofs != 0) {
463 ctl.page = find_lock_page(&inode->i_data, ctl.ofs);
464 if (!ctl.page)
465 goto invalid_cache;
466 ctl.cache = kmap(ctl.page);
467 if (!PageUptodate(ctl.page))
468 goto invalid_cache;
469 }
470 while (ctl.idx < NCP_DIRCACHE_SIZE) {
471 struct dentry *dent;
472 bool over;
473
474 spin_lock(&dentry->d_lock);
475 if (!(NCP_FINFO(inode)->flags & NCPI_DIR_CACHE)) {
476 spin_unlock(&dentry->d_lock);
477 goto invalid_cache;
478 }
479 dent = ctl.cache->dentry[ctl.idx];
480 if (unlikely(!lockref_get_not_dead(&dent->d_lockref))) {
481 spin_unlock(&dentry->d_lock);
482 goto invalid_cache;
483 }
484 spin_unlock(&dentry->d_lock);
485 if (d_really_is_negative(dent)) {
486 dput(dent);
487 goto invalid_cache;
488 }
489 over = !dir_emit(ctx, dent->d_name.name,
490 dent->d_name.len,
491 d_inode(dent)->i_ino, DT_UNKNOWN);
492 dput(dent);
493 if (over)
494 goto finished;
495 ctx->pos += 1;
496 ctl.idx += 1;
497 if (ctx->pos > ctl.head.end)
498 goto finished;
499 }
500 if (ctl.page) {
501 kunmap(ctl.page);
502 SetPageUptodate(ctl.page);
503 unlock_page(ctl.page);
504 put_page(ctl.page);
505 ctl.page = NULL;
506 }
507 ctl.idx = 0;
508 ctl.ofs += 1;
509 }
510invalid_cache:
511 if (ctl.page) {
512 kunmap(ctl.page);
513 unlock_page(ctl.page);
514 put_page(ctl.page);
515 ctl.page = NULL;
516 }
517 ctl.cache = cache;
518init_cache:
519 ncp_invalidate_dircache_entries(dentry);
520 if (!mtime_valid) {
521 mtime = ncp_obtain_mtime(dentry);
522 mtime_valid = 1;
523 }
524 ctl.head.mtime = mtime;
525 ctl.head.time = jiffies;
526 ctl.head.eof = 0;
527 ctl.fpos = 2;
528 ctl.ofs = 0;
529 ctl.idx = NCP_DIRCACHE_START;
530 ctl.filled = 0;
531 ctl.valid = 1;
532read_really:
533 spin_lock(&dentry->d_lock);
534 NCP_FINFO(inode)->flags |= NCPI_DIR_CACHE;
535 spin_unlock(&dentry->d_lock);
536 if (ncp_is_server_root(inode)) {
537 ncp_read_volume_list(file, ctx, &ctl);
538 } else {
539 ncp_do_readdir(file, ctx, &ctl);
540 }
541 ctl.head.end = ctl.fpos - 1;
542 ctl.head.eof = ctl.valid;
543finished:
544 if (ctl.page) {
545 kunmap(ctl.page);
546 SetPageUptodate(ctl.page);
547 unlock_page(ctl.page);
548 put_page(ctl.page);
549 }
550 if (page) {
551 cache->head = ctl.head;
552 kunmap(page);
553 SetPageUptodate(page);
554 unlock_page(page);
555 put_page(page);
556 }
557out:
558 return result;
559}
560
561static void ncp_d_prune(struct dentry *dentry)
562{
563 if (!dentry->d_fsdata) /* not referenced from page cache */
564 return;
565 NCP_FINFO(d_inode(dentry->d_parent))->flags &= ~NCPI_DIR_CACHE;
566}
567
568static int
569ncp_fill_cache(struct file *file, struct dir_context *ctx,
570 struct ncp_cache_control *ctrl, struct ncp_entry_info *entry,
571 int inval_childs)
572{
573 struct dentry *newdent, *dentry = file->f_path.dentry;
574 struct inode *dir = d_inode(dentry);
575 struct ncp_cache_control ctl = *ctrl;
576 struct qstr qname;
577 int valid = 0;
578 int hashed = 0;
579 ino_t ino = 0;
580 __u8 __name[NCP_MAXPATHLEN + 1];
581
582 qname.len = sizeof(__name);
583 if (ncp_vol2io(NCP_SERVER(dir), __name, &qname.len,
584 entry->i.entryName, entry->i.nameLen,
585 !ncp_preserve_entry_case(dir, entry->i.NSCreator)))
586 return 1; /* I'm not sure */
587
588 qname.name = __name;
589
590 newdent = d_hash_and_lookup(dentry, &qname);
591 if (IS_ERR(newdent))
592 goto end_advance;
593 if (!newdent) {
594 newdent = d_alloc(dentry, &qname);
595 if (!newdent)
596 goto end_advance;
597 } else {
598 hashed = 1;
599
600 /* If case sensitivity changed for this volume, all entries below this one
601 should be thrown away. This entry itself is not affected, as its case
602 sensitivity is controlled by its own parent. */
603 if (inval_childs)
604 shrink_dcache_parent(newdent);
605
606 /*
607 * NetWare's OS2 namespace is case preserving yet case
608 * insensitive. So we update dentry's name as received from
609 * server. Parent dir's i_mutex is locked because we're in
610 * readdir.
611 */
612 dentry_update_name_case(newdent, &qname);
613 }
614
615 if (d_really_is_negative(newdent)) {
616 struct inode *inode;
617
618 entry->opened = 0;
619 entry->ino = iunique(dir->i_sb, 2);
620 inode = ncp_iget(dir->i_sb, entry);
621 if (inode) {
622 d_instantiate(newdent, inode);
623 if (!hashed)
624 d_rehash(newdent);
625 } else {
626 spin_lock(&dentry->d_lock);
627 NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE;
628 spin_unlock(&dentry->d_lock);
629 }
630 } else {
631 struct inode *inode = d_inode(newdent);
632
633 inode_lock_nested(inode, I_MUTEX_CHILD);
634 ncp_update_inode2(inode, entry);
635 inode_unlock(inode);
636 }
637
638 if (ctl.idx >= NCP_DIRCACHE_SIZE) {
639 if (ctl.page) {
640 kunmap(ctl.page);
641 SetPageUptodate(ctl.page);
642 unlock_page(ctl.page);
643 put_page(ctl.page);
644 }
645 ctl.cache = NULL;
646 ctl.idx -= NCP_DIRCACHE_SIZE;
647 ctl.ofs += 1;
648 ctl.page = grab_cache_page(&dir->i_data, ctl.ofs);
649 if (ctl.page)
650 ctl.cache = kmap(ctl.page);
651 }
652 if (ctl.cache) {
653 if (d_really_is_positive(newdent)) {
654 newdent->d_fsdata = newdent;
655 ctl.cache->dentry[ctl.idx] = newdent;
656 ino = d_inode(newdent)->i_ino;
657 ncp_new_dentry(newdent);
658 }
659 valid = 1;
660 }
661 dput(newdent);
662end_advance:
663 if (!valid)
664 ctl.valid = 0;
665 if (!ctl.filled && (ctl.fpos == ctx->pos)) {
666 if (!ino)
667 ino = iunique(dir->i_sb, 2);
668 ctl.filled = !dir_emit(ctx, qname.name, qname.len,
669 ino, DT_UNKNOWN);
670 if (!ctl.filled)
671 ctx->pos += 1;
672 }
673 ctl.fpos += 1;
674 ctl.idx += 1;
675 *ctrl = ctl;
676 return (ctl.valid || !ctl.filled);
677}
678
679static void
680ncp_read_volume_list(struct file *file, struct dir_context *ctx,
681 struct ncp_cache_control *ctl)
682{
683 struct inode *inode = file_inode(file);
684 struct ncp_server *server = NCP_SERVER(inode);
685 struct ncp_volume_info info;
686 struct ncp_entry_info entry;
687 int i;
688
689 ncp_dbg(1, "pos=%ld\n", (unsigned long)ctx->pos);
690
691 for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) {
692 int inval_dentry;
693
694 if (ncp_get_volume_info_with_number(server, i, &info) != 0)
695 return;
696 if (!strlen(info.volume_name))
697 continue;
698
699 ncp_dbg(1, "found vol: %s\n", info.volume_name);
700
701 if (ncp_lookup_volume(server, info.volume_name,
702 &entry.i)) {
703 ncp_dbg(1, "could not lookup vol %s\n",
704 info.volume_name);
705 continue;
706 }
707 inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL);
708 entry.volume = entry.i.volNumber;
709 if (!ncp_fill_cache(file, ctx, ctl, &entry, inval_dentry))
710 return;
711 }
712}
713
714static void
715ncp_do_readdir(struct file *file, struct dir_context *ctx,
716 struct ncp_cache_control *ctl)
717{
718 struct inode *dir = file_inode(file);
719 struct ncp_server *server = NCP_SERVER(dir);
720 struct nw_search_sequence seq;
721 struct ncp_entry_info entry;
722 int err;
723 void* buf;
724 int more;
725 size_t bufsize;
726
727 ncp_dbg(1, "%pD2, fpos=%ld\n", file, (unsigned long)ctx->pos);
728 ncp_vdbg("init %pD, volnum=%d, dirent=%u\n",
729 file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
730
731 err = ncp_initialize_search(server, dir, &seq);
732 if (err) {
733 ncp_dbg(1, "init failed, err=%d\n", err);
734 return;
735 }
736 /* We MUST NOT use server->buffer_size handshaked with server if we are
737 using UDP, as for UDP server uses max. buffer size determined by
738 MTU, and for TCP server uses hardwired value 65KB (== 66560 bytes).
739 So we use 128KB, just to be sure, as there is no way how to know
740 this value in advance. */
741 bufsize = 131072;
742 buf = vmalloc(bufsize);
743 if (!buf)
744 return;
745 do {
746 int cnt;
747 char* rpl;
748 size_t rpls;
749
750 err = ncp_search_for_fileset(server, &seq, &more, &cnt, buf, bufsize, &rpl, &rpls);
751 if (err) /* Error */
752 break;
753 if (!cnt) /* prevent endless loop */
754 break;
755 while (cnt--) {
756 size_t onerpl;
757
758 if (rpls < offsetof(struct nw_info_struct, entryName))
759 break; /* short packet */
760 ncp_extract_file_info(rpl, &entry.i);
761 onerpl = offsetof(struct nw_info_struct, entryName) + entry.i.nameLen;
762 if (rpls < onerpl)
763 break; /* short packet */
764 (void)ncp_obtain_nfs_info(server, &entry.i);
765 rpl += onerpl;
766 rpls -= onerpl;
767 entry.volume = entry.i.volNumber;
768 if (!ncp_fill_cache(file, ctx, ctl, &entry, 0))
769 break;
770 }
771 } while (more);
772 vfree(buf);
773 return;
774}
775
776int ncp_conn_logged_in(struct super_block *sb)
777{
778 struct ncp_server* server = NCP_SBP(sb);
779 int result;
780
781 if (ncp_single_volume(server)) {
782 int len;
783 struct dentry* dent;
784 __u32 volNumber;
785 __le32 dirEntNum;
786 __le32 DosDirNum;
787 __u8 __name[NCP_MAXPATHLEN + 1];
788
789 len = sizeof(__name);
790 result = ncp_io2vol(server, __name, &len, server->m.mounted_vol,
791 strlen(server->m.mounted_vol), 1);
792 if (result)
793 goto out;
794 result = -ENOENT;
795 if (ncp_get_volume_root(server, __name, &volNumber, &dirEntNum, &DosDirNum)) {
796 ncp_vdbg("%s not found\n", server->m.mounted_vol);
797 goto out;
798 }
799 dent = sb->s_root;
800 if (dent) {
801 struct inode* ino = d_inode(dent);
802 if (ino) {
803 ncp_update_known_namespace(server, volNumber, NULL);
804 NCP_FINFO(ino)->volNumber = volNumber;
805 NCP_FINFO(ino)->dirEntNum = dirEntNum;
806 NCP_FINFO(ino)->DosDirNum = DosDirNum;
807 result = 0;
808 } else {
809 ncp_dbg(1, "d_inode(sb->s_root) == NULL!\n");
810 }
811 } else {
812 ncp_dbg(1, "sb->s_root == NULL!\n");
813 }
814 } else
815 result = 0;
816
817out:
818 return result;
819}
820
821static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
822{
823 struct ncp_server *server = NCP_SERVER(dir);
824 struct inode *inode = NULL;
825 struct ncp_entry_info finfo;
826 int error, res, len;
827 __u8 __name[NCP_MAXPATHLEN + 1];
828
829 error = -EIO;
830 if (!ncp_conn_valid(server))
831 goto finished;
832
833 ncp_vdbg("server lookup for %pd2\n", dentry);
834
835 len = sizeof(__name);
836 if (ncp_is_server_root(dir)) {
837 res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
838 dentry->d_name.len, 1);
839 if (!res)
840 res = ncp_lookup_volume(server, __name, &(finfo.i));
841 if (!res)
842 ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
843 } else {
844 res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
845 dentry->d_name.len, !ncp_preserve_case(dir));
846 if (!res)
847 res = ncp_obtain_info(server, dir, __name, &(finfo.i));
848 }
849 ncp_vdbg("looked for %pd2, res=%d\n", dentry, res);
850 /*
851 * If we didn't find an entry, make a negative dentry.
852 */
853 if (res)
854 goto add_entry;
855
856 /*
857 * Create an inode for the entry.
858 */
859 finfo.opened = 0;
860 finfo.ino = iunique(dir->i_sb, 2);
861 finfo.volume = finfo.i.volNumber;
862 error = -EACCES;
863 inode = ncp_iget(dir->i_sb, &finfo);
864
865 if (inode) {
866 ncp_new_dentry(dentry);
867add_entry:
868 d_add(dentry, inode);
869 error = 0;
870 }
871
872finished:
873 ncp_vdbg("result=%d\n", error);
874 return ERR_PTR(error);
875}
876
877/*
878 * This code is common to create, mkdir, and mknod.
879 */
880static int ncp_instantiate(struct inode *dir, struct dentry *dentry,
881 struct ncp_entry_info *finfo)
882{
883 struct inode *inode;
884 int error = -EINVAL;
885
886 finfo->ino = iunique(dir->i_sb, 2);
887 inode = ncp_iget(dir->i_sb, finfo);
888 if (!inode)
889 goto out_close;
890 d_instantiate(dentry,inode);
891 error = 0;
892out:
893 return error;
894
895out_close:
896 ncp_vdbg("%pd2 failed, closing file\n", dentry);
897 ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
898 goto out;
899}
900
901int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
902 dev_t rdev, __le32 attributes)
903{
904 struct ncp_server *server = NCP_SERVER(dir);
905 struct ncp_entry_info finfo;
906 int error, result, len;
907 int opmode;
908 __u8 __name[NCP_MAXPATHLEN + 1];
909
910 ncp_vdbg("creating %pd2, mode=%hx\n", dentry, mode);
911
912 ncp_age_dentry(server, dentry);
913 len = sizeof(__name);
914 error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
915 dentry->d_name.len, !ncp_preserve_case(dir));
916 if (error)
917 goto out;
918
919 error = -EACCES;
920
921 if (S_ISREG(mode) &&
922 (server->m.flags & NCP_MOUNT_EXTRAS) &&
923 (mode & S_IXUGO))
924 attributes |= aSYSTEM | aSHARED;
925
926 result = ncp_open_create_file_or_subdir(server, dir, __name,
927 OC_MODE_CREATE | OC_MODE_OPEN | OC_MODE_REPLACE,
928 attributes, AR_READ | AR_WRITE, &finfo);
929 opmode = O_RDWR;
930 if (result) {
931 result = ncp_open_create_file_or_subdir(server, dir, __name,
932 OC_MODE_CREATE | OC_MODE_OPEN | OC_MODE_REPLACE,
933 attributes, AR_WRITE, &finfo);
934 if (result) {
935 if (result == 0x87)
936 error = -ENAMETOOLONG;
937 else if (result < 0)
938 error = result;
939 ncp_dbg(1, "%pd2 failed\n", dentry);
940 goto out;
941 }
942 opmode = O_WRONLY;
943 }
944 finfo.access = opmode;
945 if (ncp_is_nfs_extras(server, finfo.volume)) {
946 finfo.i.nfs.mode = mode;
947 finfo.i.nfs.rdev = new_encode_dev(rdev);
948 if (ncp_modify_nfs_info(server, finfo.volume,
949 finfo.i.dirEntNum,
950 mode, new_encode_dev(rdev)) != 0)
951 goto out;
952 }
953
954 error = ncp_instantiate(dir, dentry, &finfo);
955out:
956 return error;
957}
958
959static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
960 bool excl)
961{
962 return ncp_create_new(dir, dentry, mode, 0, 0);
963}
964
965static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
966{
967 struct ncp_entry_info finfo;
968 struct ncp_server *server = NCP_SERVER(dir);
969 int error, len;
970 __u8 __name[NCP_MAXPATHLEN + 1];
971
972 ncp_dbg(1, "making %pd2\n", dentry);
973
974 ncp_age_dentry(server, dentry);
975 len = sizeof(__name);
976 error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
977 dentry->d_name.len, !ncp_preserve_case(dir));
978 if (error)
979 goto out;
980
981 error = ncp_open_create_file_or_subdir(server, dir, __name,
982 OC_MODE_CREATE, aDIR,
983 cpu_to_le16(0xffff),
984 &finfo);
985 if (error == 0) {
986 if (ncp_is_nfs_extras(server, finfo.volume)) {
987 mode |= S_IFDIR;
988 finfo.i.nfs.mode = mode;
989 if (ncp_modify_nfs_info(server,
990 finfo.volume,
991 finfo.i.dirEntNum,
992 mode, 0) != 0)
993 goto out;
994 }
995 error = ncp_instantiate(dir, dentry, &finfo);
996 } else if (error > 0) {
997 error = -EACCES;
998 }
999out:
1000 return error;
1001}
1002
1003static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
1004{
1005 struct ncp_server *server = NCP_SERVER(dir);
1006 int error, result, len;
1007 __u8 __name[NCP_MAXPATHLEN + 1];
1008
1009 ncp_dbg(1, "removing %pd2\n", dentry);
1010
1011 len = sizeof(__name);
1012 error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
1013 dentry->d_name.len, !ncp_preserve_case(dir));
1014 if (error)
1015 goto out;
1016
1017 result = ncp_del_file_or_subdir(server, dir, __name);
1018 switch (result) {
1019 case 0x00:
1020 error = 0;
1021 break;
1022 case 0x85: /* unauthorized to delete file */
1023 case 0x8A: /* unauthorized to delete file */
1024 error = -EACCES;
1025 break;
1026 case 0x8F:
1027 case 0x90: /* read only */
1028 error = -EPERM;
1029 break;
1030 case 0x9F: /* in use by another client */
1031 error = -EBUSY;
1032 break;
1033 case 0xA0: /* directory not empty */
1034 error = -ENOTEMPTY;
1035 break;
1036 case 0xFF: /* someone deleted file */
1037 error = -ENOENT;
1038 break;
1039 default:
1040 error = result < 0 ? result : -EACCES;
1041 break;
1042 }
1043out:
1044 return error;
1045}
1046
1047static int ncp_unlink(struct inode *dir, struct dentry *dentry)
1048{
1049 struct inode *inode = d_inode(dentry);
1050 struct ncp_server *server;
1051 int error;
1052
1053 server = NCP_SERVER(dir);
1054 ncp_dbg(1, "unlinking %pd2\n", dentry);
1055
1056 /*
1057 * Check whether to close the file ...
1058 */
1059 if (inode) {
1060 ncp_vdbg("closing file\n");
1061 ncp_make_closed(inode);
1062 }
1063
1064 error = ncp_del_file_or_subdir2(server, dentry);
1065#ifdef CONFIG_NCPFS_STRONG
1066 /* 9C is Invalid path.. It should be 8F, 90 - read only, but
1067 it is not :-( */
1068 if ((error == 0x9C || error == 0x90) && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */
1069 error = ncp_force_unlink(dir, dentry);
1070 }
1071#endif
1072 switch (error) {
1073 case 0x00:
1074 ncp_dbg(1, "removed %pd2\n", dentry);
1075 break;
1076 case 0x85:
1077 case 0x8A:
1078 error = -EACCES;
1079 break;
1080 case 0x8D: /* some files in use */
1081 case 0x8E: /* all files in use */
1082 error = -EBUSY;
1083 break;
1084 case 0x8F: /* some read only */
1085 case 0x90: /* all read only */
1086 case 0x9C: /* !!! returned when in-use or read-only by NW4 */
1087 error = -EPERM;
1088 break;
1089 case 0xFF:
1090 error = -ENOENT;
1091 break;
1092 default:
1093 error = error < 0 ? error : -EACCES;
1094 break;
1095 }
1096 return error;
1097}
1098
1099static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
1100 struct inode *new_dir, struct dentry *new_dentry,
1101 unsigned int flags)
1102{
1103 struct ncp_server *server = NCP_SERVER(old_dir);
1104 int error;
1105 int old_len, new_len;
1106 __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
1107
1108 if (flags)
1109 return -EINVAL;
1110
1111 ncp_dbg(1, "%pd2 to %pd2\n", old_dentry, new_dentry);
1112
1113 ncp_age_dentry(server, old_dentry);
1114 ncp_age_dentry(server, new_dentry);
1115
1116 old_len = sizeof(__old_name);
1117 error = ncp_io2vol(server, __old_name, &old_len,
1118 old_dentry->d_name.name, old_dentry->d_name.len,
1119 !ncp_preserve_case(old_dir));
1120 if (error)
1121 goto out;
1122
1123 new_len = sizeof(__new_name);
1124 error = ncp_io2vol(server, __new_name, &new_len,
1125 new_dentry->d_name.name, new_dentry->d_name.len,
1126 !ncp_preserve_case(new_dir));
1127 if (error)
1128 goto out;
1129
1130 error = ncp_ren_or_mov_file_or_subdir(server, old_dir, __old_name,
1131 new_dir, __new_name);
1132#ifdef CONFIG_NCPFS_STRONG
1133 if ((error == 0x90 || error == 0x8B || error == -EACCES) &&
1134 server->m.flags & NCP_MOUNT_STRONG) { /* RO */
1135 error = ncp_force_rename(old_dir, old_dentry, __old_name,
1136 new_dir, new_dentry, __new_name);
1137 }
1138#endif
1139 switch (error) {
1140 case 0x00:
1141 ncp_dbg(1, "renamed %pd -> %pd\n",
1142 old_dentry, new_dentry);
1143 ncp_d_prune(old_dentry);
1144 ncp_d_prune(new_dentry);
1145 break;
1146 case 0x9E:
1147 error = -ENAMETOOLONG;
1148 break;
1149 case 0xFF:
1150 error = -ENOENT;
1151 break;
1152 default:
1153 error = error < 0 ? error : -EACCES;
1154 break;
1155 }
1156out:
1157 return error;
1158}
1159
1160static int ncp_mknod(struct inode * dir, struct dentry *dentry,
1161 umode_t mode, dev_t rdev)
1162{
1163 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
1164 ncp_dbg(1, "mode = 0%ho\n", mode);
1165 return ncp_create_new(dir, dentry, mode, rdev, 0);
1166 }
1167 return -EPERM; /* Strange, but true */
1168}
1169
1170/* The following routines are taken directly from msdos-fs */
1171
1172/* Linear day numbers of the respective 1sts in non-leap years. */
1173
1174static int day_n[] =
1175{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
1176/* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */
1177
1178static int utc2local(int time)
1179{
1180 return time - sys_tz.tz_minuteswest * 60;
1181}
1182
1183static int local2utc(int time)
1184{
1185 return time + sys_tz.tz_minuteswest * 60;
1186}
1187
1188/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
1189int
1190ncp_date_dos2unix(__le16 t, __le16 d)
1191{
1192 unsigned short time = le16_to_cpu(t), date = le16_to_cpu(d);
1193 int month, year, secs;
1194
1195 /* first subtract and mask after that... Otherwise, if
1196 date == 0, bad things happen */
1197 month = ((date >> 5) - 1) & 15;
1198 year = date >> 9;
1199 secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 +
1200 86400 * ((date & 31) - 1 + day_n[month] + (year / 4) +
1201 year * 365 - ((year & 3) == 0 && month < 2 ? 1 : 0) + 3653);
1202 /* days since 1.1.70 plus 80's leap day */
1203 return local2utc(secs);
1204}
1205
1206
1207/* Convert linear UNIX date to a MS-DOS time/date pair. */
1208void
1209ncp_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
1210{
1211 int day, year, nl_day, month;
1212
1213 unix_date = utc2local(unix_date);
1214 *time = cpu_to_le16(
1215 (unix_date % 60) / 2 + (((unix_date / 60) % 60) << 5) +
1216 (((unix_date / 3600) % 24) << 11));
1217 day = unix_date / 86400 - 3652;
1218 year = day / 365;
1219 if ((year + 3) / 4 + 365 * year > day)
1220 year--;
1221 day -= (year + 3) / 4 + 365 * year;
1222 if (day == 59 && !(year & 3)) {
1223 nl_day = day;
1224 month = 2;
1225 } else {
1226 nl_day = (year & 3) || day <= 59 ? day : day - 1;
1227 for (month = 1; month < 12; month++)
1228 if (day_n[month] > nl_day)
1229 break;
1230 }
1231 *date = cpu_to_le16(nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9));
1232}
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
deleted file mode 100644
index 8f8cc0334ddd..000000000000
--- a/fs/ncpfs/file.c
+++ /dev/null
@@ -1,263 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * file.c
4 *
5 * Copyright (C) 1995, 1996 by Volker Lendecke
6 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
7 *
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/uaccess.h>
13
14#include <linux/time.h>
15#include <linux/kernel.h>
16#include <linux/errno.h>
17#include <linux/fcntl.h>
18#include <linux/stat.h>
19#include <linux/mm.h>
20#include <linux/vmalloc.h>
21#include <linux/sched.h>
22
23#include "ncp_fs.h"
24
25static int ncp_fsync(struct file *file, loff_t start, loff_t end, int datasync)
26{
27 return file_write_and_wait_range(file, start, end);
28}
29
30/*
31 * Open a file with the specified read/write mode.
32 */
33int ncp_make_open(struct inode *inode, int right)
34{
35 int error;
36 int access;
37
38 error = -EINVAL;
39 if (!inode) {
40 pr_err("%s: got NULL inode\n", __func__);
41 goto out;
42 }
43
44 ncp_dbg(1, "opened=%d, volume # %u, dir entry # %u\n",
45 atomic_read(&NCP_FINFO(inode)->opened),
46 NCP_FINFO(inode)->volNumber,
47 NCP_FINFO(inode)->dirEntNum);
48 error = -EACCES;
49 mutex_lock(&NCP_FINFO(inode)->open_mutex);
50 if (!atomic_read(&NCP_FINFO(inode)->opened)) {
51 struct ncp_entry_info finfo;
52 int result;
53
54 /* tries max. rights */
55 finfo.access = O_RDWR;
56 result = ncp_open_create_file_or_subdir(NCP_SERVER(inode),
57 inode, NULL, OC_MODE_OPEN,
58 0, AR_READ | AR_WRITE, &finfo);
59 if (!result)
60 goto update;
61 /* RDWR did not succeeded, try readonly or writeonly as requested */
62 switch (right) {
63 case O_RDONLY:
64 finfo.access = O_RDONLY;
65 result = ncp_open_create_file_or_subdir(NCP_SERVER(inode),
66 inode, NULL, OC_MODE_OPEN,
67 0, AR_READ, &finfo);
68 break;
69 case O_WRONLY:
70 finfo.access = O_WRONLY;
71 result = ncp_open_create_file_or_subdir(NCP_SERVER(inode),
72 inode, NULL, OC_MODE_OPEN,
73 0, AR_WRITE, &finfo);
74 break;
75 }
76 if (result) {
77 ncp_vdbg("failed, result=%d\n", result);
78 goto out_unlock;
79 }
80 /*
81 * Update the inode information.
82 */
83 update:
84 ncp_update_inode(inode, &finfo);
85 atomic_set(&NCP_FINFO(inode)->opened, 1);
86 }
87
88 access = NCP_FINFO(inode)->access;
89 ncp_vdbg("file open, access=%x\n", access);
90 if (access == right || access == O_RDWR) {
91 atomic_inc(&NCP_FINFO(inode)->opened);
92 error = 0;
93 }
94
95out_unlock:
96 mutex_unlock(&NCP_FINFO(inode)->open_mutex);
97out:
98 return error;
99}
100
101static ssize_t
102ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
103{
104 struct file *file = iocb->ki_filp;
105 struct inode *inode = file_inode(file);
106 size_t already_read = 0;
107 off_t pos = iocb->ki_pos;
108 size_t bufsize;
109 int error;
110 void *freepage;
111 size_t freelen;
112
113 ncp_dbg(1, "enter %pD2\n", file);
114
115 if (!iov_iter_count(to))
116 return 0;
117 if (pos > inode->i_sb->s_maxbytes)
118 return 0;
119 iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos);
120
121 error = ncp_make_open(inode, O_RDONLY);
122 if (error) {
123 ncp_dbg(1, "open failed, error=%d\n", error);
124 return error;
125 }
126
127 bufsize = NCP_SERVER(inode)->buffer_size;
128
129 error = -EIO;
130 freelen = ncp_read_bounce_size(bufsize);
131 freepage = vmalloc(freelen);
132 if (!freepage)
133 goto outrel;
134 error = 0;
135 /* First read in as much as possible for each bufsize. */
136 while (iov_iter_count(to)) {
137 int read_this_time;
138 size_t to_read = min_t(size_t,
139 bufsize - (pos % bufsize),
140 iov_iter_count(to));
141
142 error = ncp_read_bounce(NCP_SERVER(inode),
143 NCP_FINFO(inode)->file_handle,
144 pos, to_read, to, &read_this_time,
145 freepage, freelen);
146 if (error) {
147 error = -EIO; /* NW errno -> Linux errno */
148 break;
149 }
150 pos += read_this_time;
151 already_read += read_this_time;
152
153 if (read_this_time != to_read)
154 break;
155 }
156 vfree(freepage);
157
158 iocb->ki_pos = pos;
159
160 file_accessed(file);
161
162 ncp_dbg(1, "exit %pD2\n", file);
163outrel:
164 ncp_inode_close(inode);
165 return already_read ? already_read : error;
166}
167
168static ssize_t
169ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
170{
171 struct file *file = iocb->ki_filp;
172 struct inode *inode = file_inode(file);
173 size_t already_written = 0;
174 size_t bufsize;
175 int errno;
176 void *bouncebuffer;
177 off_t pos;
178
179 ncp_dbg(1, "enter %pD2\n", file);
180 errno = generic_write_checks(iocb, from);
181 if (errno <= 0)
182 return errno;
183
184 errno = ncp_make_open(inode, O_WRONLY);
185 if (errno) {
186 ncp_dbg(1, "open failed, error=%d\n", errno);
187 return errno;
188 }
189 bufsize = NCP_SERVER(inode)->buffer_size;
190
191 errno = file_update_time(file);
192 if (errno)
193 goto outrel;
194
195 bouncebuffer = vmalloc(bufsize);
196 if (!bouncebuffer) {
197 errno = -EIO; /* -ENOMEM */
198 goto outrel;
199 }
200 pos = iocb->ki_pos;
201 while (iov_iter_count(from)) {
202 int written_this_time;
203 size_t to_write = min_t(size_t,
204 bufsize - (pos % bufsize),
205 iov_iter_count(from));
206
207 if (!copy_from_iter_full(bouncebuffer, to_write, from)) {
208 errno = -EFAULT;
209 break;
210 }
211 if (ncp_write_kernel(NCP_SERVER(inode),
212 NCP_FINFO(inode)->file_handle,
213 pos, to_write, bouncebuffer, &written_this_time) != 0) {
214 errno = -EIO;
215 break;
216 }
217 pos += written_this_time;
218 already_written += written_this_time;
219
220 if (written_this_time != to_write)
221 break;
222 }
223 vfree(bouncebuffer);
224
225 iocb->ki_pos = pos;
226
227 if (pos > i_size_read(inode)) {
228 inode_lock(inode);
229 if (pos > i_size_read(inode))
230 i_size_write(inode, pos);
231 inode_unlock(inode);
232 }
233 ncp_dbg(1, "exit %pD2\n", file);
234outrel:
235 ncp_inode_close(inode);
236 return already_written ? already_written : errno;
237}
238
239static int ncp_release(struct inode *inode, struct file *file) {
240 if (ncp_make_closed(inode)) {
241 ncp_dbg(1, "failed to close\n");
242 }
243 return 0;
244}
245
246const struct file_operations ncp_file_operations =
247{
248 .llseek = generic_file_llseek,
249 .read_iter = ncp_file_read_iter,
250 .write_iter = ncp_file_write_iter,
251 .unlocked_ioctl = ncp_ioctl,
252#ifdef CONFIG_COMPAT
253 .compat_ioctl = ncp_compat_ioctl,
254#endif
255 .mmap = ncp_mmap,
256 .release = ncp_release,
257 .fsync = ncp_fsync,
258};
259
260const struct inode_operations ncp_file_inode_operations =
261{
262 .setattr = ncp_notify_change,
263};
diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c
deleted file mode 100644
index 5c941bef14c4..000000000000
--- a/fs/ncpfs/getopt.c
+++ /dev/null
@@ -1,76 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * getopt.c
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/kernel.h>
9#include <linux/string.h>
10
11#include <asm/errno.h>
12
13#include "getopt.h"
14
15/**
16 * ncp_getopt - option parser
17 * @caller: name of the caller, for error messages
18 * @options: the options string
19 * @opts: an array of &struct option entries controlling parser operations
20 * @optopt: output; will contain the current option
21 * @optarg: output; will contain the value (if one exists)
22 * @value: output; may be NULL; will be overwritten with the integer value
23 * of the current argument.
24 *
25 * Helper to parse options on the format used by mount ("a=b,c=d,e,f").
26 * Returns opts->val if a matching entry in the 'opts' array is found,
27 * 0 when no more tokens are found, -1 if an error is encountered.
28 */
29int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts,
30 char **optopt, char **optarg, unsigned long *value)
31{
32 char *token;
33 char *val;
34
35 do {
36 if ((token = strsep(options, ",")) == NULL)
37 return 0;
38 } while (*token == '\0');
39 if (optopt)
40 *optopt = token;
41
42 if ((val = strchr (token, '=')) != NULL) {
43 *val++ = 0;
44 }
45 *optarg = val;
46 for (; opts->name; opts++) {
47 if (!strcmp(opts->name, token)) {
48 if (!val) {
49 if (opts->has_arg & OPT_NOPARAM) {
50 return opts->val;
51 }
52 pr_info("%s: the %s option requires an argument\n",
53 caller, token);
54 return -EINVAL;
55 }
56 if (opts->has_arg & OPT_INT) {
57 int rc = kstrtoul(val, 0, value);
58
59 if (rc) {
60 pr_info("%s: invalid numeric value in %s=%s\n",
61 caller, token, val);
62 return rc;
63 }
64 return opts->val;
65 }
66 if (opts->has_arg & OPT_STRING) {
67 return opts->val;
68 }
69 pr_info("%s: unexpected argument %s to the %s option\n",
70 caller, val, token);
71 return -EINVAL;
72 }
73 }
74 pr_info("%s: Unrecognized mount option %s\n", caller, token);
75 return -EOPNOTSUPP;
76}
diff --git a/fs/ncpfs/getopt.h b/fs/ncpfs/getopt.h
deleted file mode 100644
index 30f0da317670..000000000000
--- a/fs/ncpfs/getopt.h
+++ /dev/null
@@ -1,17 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_GETOPT_H
3#define _LINUX_GETOPT_H
4
5#define OPT_NOPARAM 1
6#define OPT_INT 2
7#define OPT_STRING 4
8struct ncp_option {
9 const char *name;
10 unsigned int has_arg;
11 int val;
12};
13
14extern int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts,
15 char **optopt, char **optarg, unsigned long *value);
16
17#endif /* _LINUX_GETOPT_H */
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
deleted file mode 100644
index 41de88cdc053..000000000000
--- a/fs/ncpfs/inode.c
+++ /dev/null
@@ -1,1066 +0,0 @@
1/*
2 * inode.c
3 *
4 * Copyright (C) 1995, 1996 by Volker Lendecke
5 * Modified for big endian by J.F. Chadima and David S. Miller
6 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
7 * Modified 1998 Wolfram Pienkoss for NLS
8 * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info
9 *
10 */
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/module.h>
15
16#include <linux/uaccess.h>
17#include <asm/byteorder.h>
18
19#include <linux/time.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
22#include <linux/string.h>
23#include <linux/stat.h>
24#include <linux/errno.h>
25#include <linux/file.h>
26#include <linux/fcntl.h>
27#include <linux/slab.h>
28#include <linux/vmalloc.h>
29#include <linux/init.h>
30#include <linux/vfs.h>
31#include <linux/mount.h>
32#include <linux/seq_file.h>
33#include <linux/sched/signal.h>
34#include <linux/namei.h>
35
36#include <net/sock.h>
37
38#include "ncp_fs.h"
39#include "getopt.h"
40
41#define NCP_DEFAULT_FILE_MODE 0600
42#define NCP_DEFAULT_DIR_MODE 0700
43#define NCP_DEFAULT_TIME_OUT 10
44#define NCP_DEFAULT_RETRY_COUNT 20
45
46static void ncp_evict_inode(struct inode *);
47static void ncp_put_super(struct super_block *);
48static int ncp_statfs(struct dentry *, struct kstatfs *);
49static int ncp_show_options(struct seq_file *, struct dentry *);
50
51static struct kmem_cache * ncp_inode_cachep;
52
53static struct inode *ncp_alloc_inode(struct super_block *sb)
54{
55 struct ncp_inode_info *ei;
56 ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, GFP_KERNEL);
57 if (!ei)
58 return NULL;
59 return &ei->vfs_inode;
60}
61
62static void ncp_i_callback(struct rcu_head *head)
63{
64 struct inode *inode = container_of(head, struct inode, i_rcu);
65 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
66}
67
68static void ncp_destroy_inode(struct inode *inode)
69{
70 call_rcu(&inode->i_rcu, ncp_i_callback);
71}
72
73static void init_once(void *foo)
74{
75 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
76
77 mutex_init(&ei->open_mutex);
78 inode_init_once(&ei->vfs_inode);
79}
80
81static int init_inodecache(void)
82{
83 ncp_inode_cachep = kmem_cache_create("ncp_inode_cache",
84 sizeof(struct ncp_inode_info),
85 0, (SLAB_RECLAIM_ACCOUNT|
86 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
87 init_once);
88 if (ncp_inode_cachep == NULL)
89 return -ENOMEM;
90 return 0;
91}
92
93static void destroy_inodecache(void)
94{
95 /*
96 * Make sure all delayed rcu free inodes are flushed before we
97 * destroy cache.
98 */
99 rcu_barrier();
100 kmem_cache_destroy(ncp_inode_cachep);
101}
102
103static int ncp_remount(struct super_block *sb, int *flags, char* data)
104{
105 sync_filesystem(sb);
106 *flags |= SB_NODIRATIME;
107 return 0;
108}
109
110static const struct super_operations ncp_sops =
111{
112 .alloc_inode = ncp_alloc_inode,
113 .destroy_inode = ncp_destroy_inode,
114 .drop_inode = generic_delete_inode,
115 .evict_inode = ncp_evict_inode,
116 .put_super = ncp_put_super,
117 .statfs = ncp_statfs,
118 .remount_fs = ncp_remount,
119 .show_options = ncp_show_options,
120};
121
122/*
123 * Fill in the ncpfs-specific information in the inode.
124 */
125static void ncp_update_dirent(struct inode *inode, struct ncp_entry_info *nwinfo)
126{
127 NCP_FINFO(inode)->DosDirNum = nwinfo->i.DosDirNum;
128 NCP_FINFO(inode)->dirEntNum = nwinfo->i.dirEntNum;
129 NCP_FINFO(inode)->volNumber = nwinfo->volume;
130}
131
132void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
133{
134 ncp_update_dirent(inode, nwinfo);
135 NCP_FINFO(inode)->nwattr = nwinfo->i.attributes;
136 NCP_FINFO(inode)->access = nwinfo->access;
137 memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle,
138 sizeof(nwinfo->file_handle));
139 ncp_dbg(1, "updated %s, volnum=%d, dirent=%u\n",
140 nwinfo->i.entryName, NCP_FINFO(inode)->volNumber,
141 NCP_FINFO(inode)->dirEntNum);
142}
143
144static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi)
145{
146 /* NFS namespace mode overrides others if it's set. */
147 ncp_dbg(1, "(%s) nfs.mode=0%o\n", nwi->entryName, nwi->nfs.mode);
148 if (nwi->nfs.mode) {
149 /* XXX Security? */
150 inode->i_mode = nwi->nfs.mode;
151 }
152
153 inode->i_blocks = (i_size_read(inode) + NCP_BLOCK_SIZE - 1) >> NCP_BLOCK_SHIFT;
154
155 inode->i_mtime.tv_sec = ncp_date_dos2unix(nwi->modifyTime, nwi->modifyDate);
156 inode->i_ctime.tv_sec = ncp_date_dos2unix(nwi->creationTime, nwi->creationDate);
157 inode->i_atime.tv_sec = ncp_date_dos2unix(0, nwi->lastAccessDate);
158 inode->i_atime.tv_nsec = 0;
159 inode->i_mtime.tv_nsec = 0;
160 inode->i_ctime.tv_nsec = 0;
161}
162
163static void ncp_update_attrs(struct inode *inode, struct ncp_entry_info *nwinfo)
164{
165 struct nw_info_struct *nwi = &nwinfo->i;
166 struct ncp_server *server = NCP_SERVER(inode);
167
168 if (nwi->attributes & aDIR) {
169 inode->i_mode = server->m.dir_mode;
170 /* for directories dataStreamSize seems to be some
171 Object ID ??? */
172 i_size_write(inode, NCP_BLOCK_SIZE);
173 } else {
174 u32 size;
175
176 inode->i_mode = server->m.file_mode;
177 size = le32_to_cpu(nwi->dataStreamSize);
178 i_size_write(inode, size);
179#ifdef CONFIG_NCPFS_EXTRAS
180 if ((server->m.flags & (NCP_MOUNT_EXTRAS|NCP_MOUNT_SYMLINKS))
181 && (nwi->attributes & aSHARED)) {
182 switch (nwi->attributes & (aHIDDEN|aSYSTEM)) {
183 case aHIDDEN:
184 if (server->m.flags & NCP_MOUNT_SYMLINKS) {
185 if (/* (size >= NCP_MIN_SYMLINK_SIZE)
186 && */ (size <= NCP_MAX_SYMLINK_SIZE)) {
187 inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFLNK;
188 NCP_FINFO(inode)->flags |= NCPI_KLUDGE_SYMLINK;
189 break;
190 }
191 }
192 /* FALLTHROUGH */
193 case 0:
194 if (server->m.flags & NCP_MOUNT_EXTRAS)
195 inode->i_mode |= S_IRUGO;
196 break;
197 case aSYSTEM:
198 if (server->m.flags & NCP_MOUNT_EXTRAS)
199 inode->i_mode |= (inode->i_mode >> 2) & S_IXUGO;
200 break;
201 /* case aSYSTEM|aHIDDEN: */
202 default:
203 /* reserved combination */
204 break;
205 }
206 }
207#endif
208 }
209 if (nwi->attributes & aRONLY) inode->i_mode &= ~S_IWUGO;
210}
211
212void ncp_update_inode2(struct inode* inode, struct ncp_entry_info *nwinfo)
213{
214 NCP_FINFO(inode)->flags = 0;
215 if (!atomic_read(&NCP_FINFO(inode)->opened)) {
216 NCP_FINFO(inode)->nwattr = nwinfo->i.attributes;
217 ncp_update_attrs(inode, nwinfo);
218 }
219
220 ncp_update_dates(inode, &nwinfo->i);
221 ncp_update_dirent(inode, nwinfo);
222}
223
224/*
225 * Fill in the inode based on the ncp_entry_info structure. Used only for brand new inodes.
226 */
227static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
228{
229 struct ncp_server *server = NCP_SERVER(inode);
230
231 NCP_FINFO(inode)->flags = 0;
232
233 ncp_update_attrs(inode, nwinfo);
234
235 ncp_dbg(2, "inode->i_mode = %u\n", inode->i_mode);
236
237 set_nlink(inode, 1);
238 inode->i_uid = server->m.uid;
239 inode->i_gid = server->m.gid;
240
241 ncp_update_dates(inode, &nwinfo->i);
242 ncp_update_inode(inode, nwinfo);
243}
244
245#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
246static const struct inode_operations ncp_symlink_inode_operations = {
247 .get_link = page_get_link,
248 .setattr = ncp_notify_change,
249};
250#endif
251
252/*
253 * Get a new inode.
254 */
255struct inode *
256ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
257{
258 struct inode *inode;
259
260 if (info == NULL) {
261 pr_err("%s: info is NULL\n", __func__);
262 return NULL;
263 }
264
265 inode = new_inode(sb);
266 if (inode) {
267 atomic_set(&NCP_FINFO(inode)->opened, info->opened);
268
269 inode->i_ino = info->ino;
270 ncp_set_attr(inode, info);
271 if (S_ISREG(inode->i_mode)) {
272 inode->i_op = &ncp_file_inode_operations;
273 inode->i_fop = &ncp_file_operations;
274 } else if (S_ISDIR(inode->i_mode)) {
275 inode->i_op = &ncp_dir_inode_operations;
276 inode->i_fop = &ncp_dir_operations;
277#ifdef CONFIG_NCPFS_NFS_NS
278 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
279 init_special_inode(inode, inode->i_mode,
280 new_decode_dev(info->i.nfs.rdev));
281#endif
282#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
283 } else if (S_ISLNK(inode->i_mode)) {
284 inode->i_op = &ncp_symlink_inode_operations;
285 inode_nohighmem(inode);
286 inode->i_data.a_ops = &ncp_symlink_aops;
287#endif
288 } else {
289 make_bad_inode(inode);
290 }
291 insert_inode_hash(inode);
292 } else
293 pr_err("%s: iget failed!\n", __func__);
294 return inode;
295}
296
297static void
298ncp_evict_inode(struct inode *inode)
299{
300 truncate_inode_pages_final(&inode->i_data);
301 clear_inode(inode);
302
303 if (S_ISDIR(inode->i_mode)) {
304 ncp_dbg(2, "put directory %ld\n", inode->i_ino);
305 }
306
307 if (ncp_make_closed(inode) != 0) {
308 /* We can't do anything but complain. */
309 pr_err("%s: could not close\n", __func__);
310 }
311}
312
313static void ncp_stop_tasks(struct ncp_server *server) {
314 struct sock* sk = server->ncp_sock->sk;
315
316 lock_sock(sk);
317 sk->sk_error_report = server->error_report;
318 sk->sk_data_ready = server->data_ready;
319 sk->sk_write_space = server->write_space;
320 release_sock(sk);
321 del_timer_sync(&server->timeout_tm);
322
323 flush_work(&server->rcv.tq);
324 if (sk->sk_socket->type == SOCK_STREAM)
325 flush_work(&server->tx.tq);
326 else
327 flush_work(&server->timeout_tq);
328}
329
330static int ncp_show_options(struct seq_file *seq, struct dentry *root)
331{
332 struct ncp_server *server = NCP_SBP(root->d_sb);
333 unsigned int tmp;
334
335 if (!uid_eq(server->m.uid, GLOBAL_ROOT_UID))
336 seq_printf(seq, ",uid=%u",
337 from_kuid_munged(&init_user_ns, server->m.uid));
338 if (!gid_eq(server->m.gid, GLOBAL_ROOT_GID))
339 seq_printf(seq, ",gid=%u",
340 from_kgid_munged(&init_user_ns, server->m.gid));
341 if (!uid_eq(server->m.mounted_uid, GLOBAL_ROOT_UID))
342 seq_printf(seq, ",owner=%u",
343 from_kuid_munged(&init_user_ns, server->m.mounted_uid));
344 tmp = server->m.file_mode & S_IALLUGO;
345 if (tmp != NCP_DEFAULT_FILE_MODE)
346 seq_printf(seq, ",mode=0%o", tmp);
347 tmp = server->m.dir_mode & S_IALLUGO;
348 if (tmp != NCP_DEFAULT_DIR_MODE)
349 seq_printf(seq, ",dirmode=0%o", tmp);
350 if (server->m.time_out != NCP_DEFAULT_TIME_OUT * HZ / 100) {
351 tmp = server->m.time_out * 100 / HZ;
352 seq_printf(seq, ",timeout=%u", tmp);
353 }
354 if (server->m.retry_count != NCP_DEFAULT_RETRY_COUNT)
355 seq_printf(seq, ",retry=%u", server->m.retry_count);
356 if (server->m.flags != 0)
357 seq_printf(seq, ",flags=%lu", server->m.flags);
358 if (server->m.wdog_pid != NULL)
359 seq_printf(seq, ",wdogpid=%u", pid_vnr(server->m.wdog_pid));
360
361 return 0;
362}
363
364static const struct ncp_option ncp_opts[] = {
365 { "uid", OPT_INT, 'u' },
366 { "gid", OPT_INT, 'g' },
367 { "owner", OPT_INT, 'o' },
368 { "mode", OPT_INT, 'm' },
369 { "dirmode", OPT_INT, 'd' },
370 { "timeout", OPT_INT, 't' },
371 { "retry", OPT_INT, 'r' },
372 { "flags", OPT_INT, 'f' },
373 { "wdogpid", OPT_INT, 'w' },
374 { "ncpfd", OPT_INT, 'n' },
375 { "infofd", OPT_INT, 'i' }, /* v5 */
376 { "version", OPT_INT, 'v' },
377 { NULL, 0, 0 } };
378
379static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options) {
380 int optval;
381 char *optarg;
382 unsigned long optint;
383 int version = 0;
384 int ret;
385
386 data->flags = 0;
387 data->int_flags = 0;
388 data->mounted_uid = GLOBAL_ROOT_UID;
389 data->wdog_pid = NULL;
390 data->ncp_fd = ~0;
391 data->time_out = NCP_DEFAULT_TIME_OUT;
392 data->retry_count = NCP_DEFAULT_RETRY_COUNT;
393 data->uid = GLOBAL_ROOT_UID;
394 data->gid = GLOBAL_ROOT_GID;
395 data->file_mode = NCP_DEFAULT_FILE_MODE;
396 data->dir_mode = NCP_DEFAULT_DIR_MODE;
397 data->info_fd = -1;
398 data->mounted_vol[0] = 0;
399
400 while ((optval = ncp_getopt("ncpfs", &options, ncp_opts, NULL, &optarg, &optint)) != 0) {
401 ret = optval;
402 if (ret < 0)
403 goto err;
404 switch (optval) {
405 case 'u':
406 data->uid = make_kuid(current_user_ns(), optint);
407 if (!uid_valid(data->uid)) {
408 ret = -EINVAL;
409 goto err;
410 }
411 break;
412 case 'g':
413 data->gid = make_kgid(current_user_ns(), optint);
414 if (!gid_valid(data->gid)) {
415 ret = -EINVAL;
416 goto err;
417 }
418 break;
419 case 'o':
420 data->mounted_uid = make_kuid(current_user_ns(), optint);
421 if (!uid_valid(data->mounted_uid)) {
422 ret = -EINVAL;
423 goto err;
424 }
425 break;
426 case 'm':
427 data->file_mode = optint;
428 break;
429 case 'd':
430 data->dir_mode = optint;
431 break;
432 case 't':
433 data->time_out = optint;
434 break;
435 case 'r':
436 data->retry_count = optint;
437 break;
438 case 'f':
439 data->flags = optint;
440 break;
441 case 'w':
442 data->wdog_pid = find_get_pid(optint);
443 break;
444 case 'n':
445 data->ncp_fd = optint;
446 break;
447 case 'i':
448 data->info_fd = optint;
449 break;
450 case 'v':
451 ret = -ECHRNG;
452 if (optint < NCP_MOUNT_VERSION_V4)
453 goto err;
454 if (optint > NCP_MOUNT_VERSION_V5)
455 goto err;
456 version = optint;
457 break;
458
459 }
460 }
461 return 0;
462err:
463 put_pid(data->wdog_pid);
464 data->wdog_pid = NULL;
465 return ret;
466}
467
468static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
469{
470 struct ncp_mount_data_kernel data;
471 struct ncp_server *server;
472 struct inode *root_inode;
473 struct socket *sock;
474 int error;
475 int default_bufsize;
476#ifdef CONFIG_NCPFS_PACKET_SIGNING
477 int options;
478#endif
479 struct ncp_entry_info finfo;
480
481 memset(&data, 0, sizeof(data));
482 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
483 if (!server)
484 return -ENOMEM;
485 sb->s_fs_info = server;
486
487 error = -EFAULT;
488 if (raw_data == NULL)
489 goto out;
490 switch (*(int*)raw_data) {
491 case NCP_MOUNT_VERSION:
492 {
493 struct ncp_mount_data* md = (struct ncp_mount_data*)raw_data;
494
495 data.flags = md->flags;
496 data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE;
497 data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
498 data.wdog_pid = find_get_pid(md->wdog_pid);
499 data.ncp_fd = md->ncp_fd;
500 data.time_out = md->time_out;
501 data.retry_count = md->retry_count;
502 data.uid = make_kuid(current_user_ns(), md->uid);
503 data.gid = make_kgid(current_user_ns(), md->gid);
504 data.file_mode = md->file_mode;
505 data.dir_mode = md->dir_mode;
506 data.info_fd = -1;
507 memcpy(data.mounted_vol, md->mounted_vol,
508 NCP_VOLNAME_LEN+1);
509 }
510 break;
511 case NCP_MOUNT_VERSION_V4:
512 {
513 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
514
515 data.flags = md->flags;
516 data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
517 data.wdog_pid = find_get_pid(md->wdog_pid);
518 data.ncp_fd = md->ncp_fd;
519 data.time_out = md->time_out;
520 data.retry_count = md->retry_count;
521 data.uid = make_kuid(current_user_ns(), md->uid);
522 data.gid = make_kgid(current_user_ns(), md->gid);
523 data.file_mode = md->file_mode;
524 data.dir_mode = md->dir_mode;
525 data.info_fd = -1;
526 }
527 break;
528 default:
529 error = -ECHRNG;
530 if (memcmp(raw_data, "vers", 4) == 0) {
531 error = ncp_parse_options(&data, raw_data);
532 }
533 if (error)
534 goto out;
535 break;
536 }
537 error = -EINVAL;
538 if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
539 !gid_valid(data.gid))
540 goto out;
541 sock = sockfd_lookup(data.ncp_fd, &error);
542 if (!sock)
543 goto out;
544
545 if (sock->type == SOCK_STREAM)
546 default_bufsize = 0xF000;
547 else
548 default_bufsize = 1024;
549
550 sb->s_flags |= SB_NODIRATIME; /* probably even noatime */
551 sb->s_maxbytes = 0xFFFFFFFFU;
552 sb->s_blocksize = 1024; /* Eh... Is this correct? */
553 sb->s_blocksize_bits = 10;
554 sb->s_magic = NCP_SUPER_MAGIC;
555 sb->s_op = &ncp_sops;
556 sb->s_d_op = &ncp_dentry_operations;
557
558 server = NCP_SBP(sb);
559 memset(server, 0, sizeof(*server));
560
561 error = super_setup_bdi(sb);
562 if (error)
563 goto out_fput;
564
565 server->ncp_sock = sock;
566
567 if (data.info_fd != -1) {
568 struct socket *info_sock = sockfd_lookup(data.info_fd, &error);
569 if (!info_sock)
570 goto out_fput;
571 server->info_sock = info_sock;
572 error = -EBADFD;
573 if (info_sock->type != SOCK_STREAM)
574 goto out_fput2;
575 }
576
577/* server->lock = 0; */
578 mutex_init(&server->mutex);
579 server->packet = NULL;
580/* server->buffer_size = 0; */
581/* server->conn_status = 0; */
582/* server->root_dentry = NULL; */
583/* server->root_setuped = 0; */
584 mutex_init(&server->root_setup_lock);
585#ifdef CONFIG_NCPFS_PACKET_SIGNING
586/* server->sign_wanted = 0; */
587/* server->sign_active = 0; */
588#endif
589 init_rwsem(&server->auth_rwsem);
590 server->auth.auth_type = NCP_AUTH_NONE;
591/* server->auth.object_name_len = 0; */
592/* server->auth.object_name = NULL; */
593/* server->auth.object_type = 0; */
594/* server->priv.len = 0; */
595/* server->priv.data = NULL; */
596
597 server->m = data;
598 /* Although anything producing this is buggy, it happens
599 now because of PATH_MAX changes.. */
600 if (server->m.time_out < 1) {
601 server->m.time_out = 10;
602 pr_info("You need to recompile your ncpfs utils..\n");
603 }
604 server->m.time_out = server->m.time_out * HZ / 100;
605 server->m.file_mode = (server->m.file_mode & S_IRWXUGO) | S_IFREG;
606 server->m.dir_mode = (server->m.dir_mode & S_IRWXUGO) | S_IFDIR;
607
608#ifdef CONFIG_NCPFS_NLS
609 /* load the default NLS charsets */
610 server->nls_vol = load_nls_default();
611 server->nls_io = load_nls_default();
612#endif /* CONFIG_NCPFS_NLS */
613
614 atomic_set(&server->dentry_ttl, 0); /* no caching */
615
616 INIT_LIST_HEAD(&server->tx.requests);
617 mutex_init(&server->rcv.creq_mutex);
618 server->tx.creq = NULL;
619 server->rcv.creq = NULL;
620
621 timer_setup(&server->timeout_tm, ncpdgram_timeout_call, 0);
622#undef NCP_PACKET_SIZE
623#define NCP_PACKET_SIZE 131072
624 error = -ENOMEM;
625 server->packet_size = NCP_PACKET_SIZE;
626 server->packet = vmalloc(NCP_PACKET_SIZE);
627 if (server->packet == NULL)
628 goto out_nls;
629 server->txbuf = vmalloc(NCP_PACKET_SIZE);
630 if (server->txbuf == NULL)
631 goto out_packet;
632 server->rxbuf = vmalloc(NCP_PACKET_SIZE);
633 if (server->rxbuf == NULL)
634 goto out_txbuf;
635
636 lock_sock(sock->sk);
637 server->data_ready = sock->sk->sk_data_ready;
638 server->write_space = sock->sk->sk_write_space;
639 server->error_report = sock->sk->sk_error_report;
640 sock->sk->sk_user_data = server;
641 sock->sk->sk_data_ready = ncp_tcp_data_ready;
642 sock->sk->sk_error_report = ncp_tcp_error_report;
643 if (sock->type == SOCK_STREAM) {
644 server->rcv.ptr = (unsigned char*)&server->rcv.buf;
645 server->rcv.len = 10;
646 server->rcv.state = 0;
647 INIT_WORK(&server->rcv.tq, ncp_tcp_rcv_proc);
648 INIT_WORK(&server->tx.tq, ncp_tcp_tx_proc);
649 sock->sk->sk_write_space = ncp_tcp_write_space;
650 } else {
651 INIT_WORK(&server->rcv.tq, ncpdgram_rcv_proc);
652 INIT_WORK(&server->timeout_tq, ncpdgram_timeout_proc);
653 }
654 release_sock(sock->sk);
655
656 ncp_lock_server(server);
657 error = ncp_connect(server);
658 ncp_unlock_server(server);
659 if (error < 0)
660 goto out_rxbuf;
661 ncp_dbg(1, "NCP_SBP(sb) = %p\n", NCP_SBP(sb));
662
663 error = -EMSGSIZE; /* -EREMOTESIDEINCOMPATIBLE */
664#ifdef CONFIG_NCPFS_PACKET_SIGNING
665 if (ncp_negotiate_size_and_options(server, default_bufsize,
666 NCP_DEFAULT_OPTIONS, &(server->buffer_size), &options) == 0)
667 {
668 if (options != NCP_DEFAULT_OPTIONS)
669 {
670 if (ncp_negotiate_size_and_options(server,
671 default_bufsize,
672 options & 2,
673 &(server->buffer_size), &options) != 0)
674
675 {
676 goto out_disconnect;
677 }
678 }
679 ncp_lock_server(server);
680 if (options & 2)
681 server->sign_wanted = 1;
682 ncp_unlock_server(server);
683 }
684 else
685#endif /* CONFIG_NCPFS_PACKET_SIGNING */
686 if (ncp_negotiate_buffersize(server, default_bufsize,
687 &(server->buffer_size)) != 0)
688 goto out_disconnect;
689 ncp_dbg(1, "bufsize = %d\n", server->buffer_size);
690
691 memset(&finfo, 0, sizeof(finfo));
692 finfo.i.attributes = aDIR;
693 finfo.i.dataStreamSize = 0; /* ignored */
694 finfo.i.dirEntNum = 0;
695 finfo.i.DosDirNum = 0;
696#ifdef CONFIG_NCPFS_SMALLDOS
697 finfo.i.NSCreator = NW_NS_DOS;
698#endif
699 finfo.volume = NCP_NUMBER_OF_VOLUMES;
700 /* set dates of mountpoint to Jan 1, 1986; 00:00 */
701 finfo.i.creationTime = finfo.i.modifyTime
702 = cpu_to_le16(0x0000);
703 finfo.i.creationDate = finfo.i.modifyDate
704 = finfo.i.lastAccessDate
705 = cpu_to_le16(0x0C21);
706 finfo.i.nameLen = 0;
707 finfo.i.entryName[0] = '\0';
708
709 finfo.opened = 0;
710 finfo.ino = 2; /* tradition */
711
712 server->name_space[finfo.volume] = NW_NS_DOS;
713
714 error = -ENOMEM;
715 root_inode = ncp_iget(sb, &finfo);
716 if (!root_inode)
717 goto out_disconnect;
718 ncp_dbg(1, "root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
719 sb->s_root = d_make_root(root_inode);
720 if (!sb->s_root)
721 goto out_disconnect;
722 return 0;
723
724out_disconnect:
725 ncp_lock_server(server);
726 ncp_disconnect(server);
727 ncp_unlock_server(server);
728out_rxbuf:
729 ncp_stop_tasks(server);
730 vfree(server->rxbuf);
731out_txbuf:
732 vfree(server->txbuf);
733out_packet:
734 vfree(server->packet);
735out_nls:
736#ifdef CONFIG_NCPFS_NLS
737 unload_nls(server->nls_io);
738 unload_nls(server->nls_vol);
739#endif
740 mutex_destroy(&server->rcv.creq_mutex);
741 mutex_destroy(&server->root_setup_lock);
742 mutex_destroy(&server->mutex);
743out_fput2:
744 if (server->info_sock)
745 sockfd_put(server->info_sock);
746out_fput:
747 sockfd_put(sock);
748out:
749 put_pid(data.wdog_pid);
750 sb->s_fs_info = NULL;
751 kfree(server);
752 return error;
753}
754
755static void delayed_free(struct rcu_head *p)
756{
757 struct ncp_server *server = container_of(p, struct ncp_server, rcu);
758#ifdef CONFIG_NCPFS_NLS
759 /* unload the NLS charsets */
760 unload_nls(server->nls_vol);
761 unload_nls(server->nls_io);
762#endif /* CONFIG_NCPFS_NLS */
763 kfree(server);
764}
765
766static void ncp_put_super(struct super_block *sb)
767{
768 struct ncp_server *server = NCP_SBP(sb);
769
770 ncp_lock_server(server);
771 ncp_disconnect(server);
772 ncp_unlock_server(server);
773
774 ncp_stop_tasks(server);
775
776 mutex_destroy(&server->rcv.creq_mutex);
777 mutex_destroy(&server->root_setup_lock);
778 mutex_destroy(&server->mutex);
779
780 if (server->info_sock)
781 sockfd_put(server->info_sock);
782 sockfd_put(server->ncp_sock);
783 kill_pid(server->m.wdog_pid, SIGTERM, 1);
784 put_pid(server->m.wdog_pid);
785
786 kfree(server->priv.data);
787 kfree(server->auth.object_name);
788 vfree(server->rxbuf);
789 vfree(server->txbuf);
790 vfree(server->packet);
791 call_rcu(&server->rcu, delayed_free);
792}
793
794static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
795{
796 struct dentry* d;
797 struct inode* i;
798 struct ncp_inode_info* ni;
799 struct ncp_server* s;
800 struct ncp_volume_info vi;
801 struct super_block *sb = dentry->d_sb;
802 int err;
803 __u8 dh;
804
805 d = sb->s_root;
806 if (!d) {
807 goto dflt;
808 }
809 i = d_inode(d);
810 if (!i) {
811 goto dflt;
812 }
813 ni = NCP_FINFO(i);
814 if (!ni) {
815 goto dflt;
816 }
817 s = NCP_SBP(sb);
818 if (!s) {
819 goto dflt;
820 }
821 if (!s->m.mounted_vol[0]) {
822 goto dflt;
823 }
824
825 err = ncp_dirhandle_alloc(s, ni->volNumber, ni->DosDirNum, &dh);
826 if (err) {
827 goto dflt;
828 }
829 err = ncp_get_directory_info(s, dh, &vi);
830 ncp_dirhandle_free(s, dh);
831 if (err) {
832 goto dflt;
833 }
834 buf->f_type = NCP_SUPER_MAGIC;
835 buf->f_bsize = vi.sectors_per_block * 512;
836 buf->f_blocks = vi.total_blocks;
837 buf->f_bfree = vi.free_blocks;
838 buf->f_bavail = vi.free_blocks;
839 buf->f_files = vi.total_dir_entries;
840 buf->f_ffree = vi.available_dir_entries;
841 buf->f_namelen = 12;
842 return 0;
843
844 /* We cannot say how much disk space is left on a mounted
845 NetWare Server, because free space is distributed over
846 volumes, and the current user might have disk quotas. So
847 free space is not that simple to determine. Our decision
848 here is to err conservatively. */
849
850dflt:;
851 buf->f_type = NCP_SUPER_MAGIC;
852 buf->f_bsize = NCP_BLOCK_SIZE;
853 buf->f_blocks = 0;
854 buf->f_bfree = 0;
855 buf->f_bavail = 0;
856 buf->f_namelen = 12;
857 return 0;
858}
859
860int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
861{
862 struct inode *inode = d_inode(dentry);
863 int result = 0;
864 __le32 info_mask;
865 struct nw_modify_dos_info info;
866 struct ncp_server *server;
867
868 result = -EIO;
869
870 server = NCP_SERVER(inode);
871 if (!server) /* How this could happen? */
872 goto out;
873
874 result = -EPERM;
875 if (IS_DEADDIR(d_inode(dentry)))
876 goto out;
877
878 /* ageing the dentry to force validation */
879 ncp_age_dentry(server, dentry);
880
881 result = setattr_prepare(dentry, attr);
882 if (result < 0)
883 goto out;
884
885 result = -EPERM;
886 if ((attr->ia_valid & ATTR_UID) && !uid_eq(attr->ia_uid, server->m.uid))
887 goto out;
888
889 if ((attr->ia_valid & ATTR_GID) && !gid_eq(attr->ia_gid, server->m.gid))
890 goto out;
891
892 if (((attr->ia_valid & ATTR_MODE) &&
893 (attr->ia_mode &
894 ~(S_IFREG | S_IFDIR | S_IRWXUGO))))
895 goto out;
896
897 info_mask = 0;
898 memset(&info, 0, sizeof(info));
899
900#if 1
901 if ((attr->ia_valid & ATTR_MODE) != 0)
902 {
903 umode_t newmode = attr->ia_mode;
904
905 info_mask |= DM_ATTRIBUTES;
906
907 if (S_ISDIR(inode->i_mode)) {
908 newmode &= server->m.dir_mode;
909 } else {
910#ifdef CONFIG_NCPFS_EXTRAS
911 if (server->m.flags & NCP_MOUNT_EXTRAS) {
912 /* any non-default execute bit set */
913 if (newmode & ~server->m.file_mode & S_IXUGO)
914 info.attributes |= aSHARED | aSYSTEM;
915 /* read for group/world and not in default file_mode */
916 else if (newmode & ~server->m.file_mode & S_IRUGO)
917 info.attributes |= aSHARED;
918 } else
919#endif
920 newmode &= server->m.file_mode;
921 }
922 if (newmode & S_IWUGO)
923 info.attributes &= ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT);
924 else
925 info.attributes |= (aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT);
926
927#ifdef CONFIG_NCPFS_NFS_NS
928 if (ncp_is_nfs_extras(server, NCP_FINFO(inode)->volNumber)) {
929 result = ncp_modify_nfs_info(server,
930 NCP_FINFO(inode)->volNumber,
931 NCP_FINFO(inode)->dirEntNum,
932 attr->ia_mode, 0);
933 if (result != 0)
934 goto out;
935 info.attributes &= ~(aSHARED | aSYSTEM);
936 {
937 /* mark partial success */
938 struct iattr tmpattr;
939
940 tmpattr.ia_valid = ATTR_MODE;
941 tmpattr.ia_mode = attr->ia_mode;
942
943 setattr_copy(inode, &tmpattr);
944 mark_inode_dirty(inode);
945 }
946 }
947#endif
948 }
949#endif
950
951 /* Do SIZE before attributes, otherwise mtime together with size does not work...
952 */
953 if ((attr->ia_valid & ATTR_SIZE) != 0) {
954 int written;
955
956 ncp_dbg(1, "trying to change size to %llu\n", attr->ia_size);
957
958 if ((result = ncp_make_open(inode, O_WRONLY)) < 0) {
959 result = -EACCES;
960 goto out;
961 }
962 ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle,
963 attr->ia_size, 0, "", &written);
964
965 /* According to ndir, the changes only take effect after
966 closing the file */
967 ncp_inode_close(inode);
968 result = ncp_make_closed(inode);
969 if (result)
970 goto out;
971
972 if (attr->ia_size != i_size_read(inode)) {
973 truncate_setsize(inode, attr->ia_size);
974 mark_inode_dirty(inode);
975 }
976 }
977 if ((attr->ia_valid & ATTR_CTIME) != 0) {
978 info_mask |= (DM_CREATE_TIME | DM_CREATE_DATE);
979 ncp_date_unix2dos(attr->ia_ctime.tv_sec,
980 &info.creationTime, &info.creationDate);
981 }
982 if ((attr->ia_valid & ATTR_MTIME) != 0) {
983 info_mask |= (DM_MODIFY_TIME | DM_MODIFY_DATE);
984 ncp_date_unix2dos(attr->ia_mtime.tv_sec,
985 &info.modifyTime, &info.modifyDate);
986 }
987 if ((attr->ia_valid & ATTR_ATIME) != 0) {
988 __le16 dummy;
989 info_mask |= (DM_LAST_ACCESS_DATE);
990 ncp_date_unix2dos(attr->ia_atime.tv_sec,
991 &dummy, &info.lastAccessDate);
992 }
993 if (info_mask != 0) {
994 result = ncp_modify_file_or_subdir_dos_info(NCP_SERVER(inode),
995 inode, info_mask, &info);
996 if (result != 0) {
997 if (info_mask == (DM_CREATE_TIME | DM_CREATE_DATE)) {
998 /* NetWare seems not to allow this. I
999 do not know why. So, just tell the
1000 user everything went fine. This is
1001 a terrible hack, but I do not know
1002 how to do this correctly. */
1003 result = 0;
1004 } else
1005 goto out;
1006 }
1007#ifdef CONFIG_NCPFS_STRONG
1008 if ((!result) && (info_mask & DM_ATTRIBUTES))
1009 NCP_FINFO(inode)->nwattr = info.attributes;
1010#endif
1011 }
1012 if (result)
1013 goto out;
1014
1015 setattr_copy(inode, attr);
1016 mark_inode_dirty(inode);
1017
1018out:
1019 if (result > 0)
1020 result = -EACCES;
1021 return result;
1022}
1023
1024static struct dentry *ncp_mount(struct file_system_type *fs_type,
1025 int flags, const char *dev_name, void *data)
1026{
1027 return mount_nodev(fs_type, flags, data, ncp_fill_super);
1028}
1029
1030static struct file_system_type ncp_fs_type = {
1031 .owner = THIS_MODULE,
1032 .name = "ncpfs",
1033 .mount = ncp_mount,
1034 .kill_sb = kill_anon_super,
1035 .fs_flags = FS_BINARY_MOUNTDATA,
1036};
1037MODULE_ALIAS_FS("ncpfs");
1038
1039static int __init init_ncp_fs(void)
1040{
1041 int err;
1042 ncp_dbg(1, "called\n");
1043
1044 err = init_inodecache();
1045 if (err)
1046 goto out1;
1047 err = register_filesystem(&ncp_fs_type);
1048 if (err)
1049 goto out;
1050 return 0;
1051out:
1052 destroy_inodecache();
1053out1:
1054 return err;
1055}
1056
1057static void __exit exit_ncp_fs(void)
1058{
1059 ncp_dbg(1, "called\n");
1060 unregister_filesystem(&ncp_fs_type);
1061 destroy_inodecache();
1062}
1063
1064module_init(init_ncp_fs)
1065module_exit(exit_ncp_fs)
1066MODULE_LICENSE("GPL");
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
deleted file mode 100644
index d378b98cd7b6..000000000000
--- a/fs/ncpfs/ioctl.c
+++ /dev/null
@@ -1,923 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ioctl.c
4 *
5 * Copyright (C) 1995, 1996 by Volker Lendecke
6 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
7 * Modified 1998, 1999 Wolfram Pienkoss for NLS
8 *
9 */
10
11#include <linux/capability.h>
12#include <linux/compat.h>
13#include <linux/errno.h>
14#include <linux/fs.h>
15#include <linux/ioctl.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mount.h>
19#include <linux/slab.h>
20#include <linux/highuid.h>
21#include <linux/vmalloc.h>
22#include <linux/sched.h>
23#include <linux/cred.h>
24
25#include <linux/uaccess.h>
26
27#include "ncp_fs.h"
28
29/* maximum limit for ncp_objectname_ioctl */
30#define NCP_OBJECT_NAME_MAX_LEN 4096
31/* maximum limit for ncp_privatedata_ioctl */
32#define NCP_PRIVATE_DATA_MAX_LEN 8192
33/* maximum negotiable packet size */
34#define NCP_PACKET_SIZE_INTERNAL 65536
35
36static int
37ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
38 struct ncp_fs_info __user *arg)
39{
40 struct ncp_fs_info info;
41
42 if (copy_from_user(&info, arg, sizeof(info)))
43 return -EFAULT;
44
45 if (info.version != NCP_GET_FS_INFO_VERSION) {
46 ncp_dbg(1, "info.version invalid: %d\n", info.version);
47 return -EINVAL;
48 }
49 /* TODO: info.addr = server->m.serv_addr; */
50 SET_UID(info.mounted_uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
51 info.connection = server->connection;
52 info.buffer_size = server->buffer_size;
53 info.volume_number = NCP_FINFO(inode)->volNumber;
54 info.directory_id = NCP_FINFO(inode)->DosDirNum;
55
56 if (copy_to_user(arg, &info, sizeof(info)))
57 return -EFAULT;
58 return 0;
59}
60
61static int
62ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
63 struct ncp_fs_info_v2 __user * arg)
64{
65 struct ncp_fs_info_v2 info2;
66
67 if (copy_from_user(&info2, arg, sizeof(info2)))
68 return -EFAULT;
69
70 if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
71 ncp_dbg(1, "info.version invalid: %d\n", info2.version);
72 return -EINVAL;
73 }
74 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
75 info2.connection = server->connection;
76 info2.buffer_size = server->buffer_size;
77 info2.volume_number = NCP_FINFO(inode)->volNumber;
78 info2.directory_id = NCP_FINFO(inode)->DosDirNum;
79 info2.dummy1 = info2.dummy2 = info2.dummy3 = 0;
80
81 if (copy_to_user(arg, &info2, sizeof(info2)))
82 return -EFAULT;
83 return 0;
84}
85
86#ifdef CONFIG_COMPAT
87struct compat_ncp_objectname_ioctl
88{
89 s32 auth_type;
90 u32 object_name_len;
91 compat_caddr_t object_name; /* a userspace data, in most cases user name */
92};
93
94struct compat_ncp_fs_info_v2 {
95 s32 version;
96 u32 mounted_uid;
97 u32 connection;
98 u32 buffer_size;
99
100 u32 volume_number;
101 u32 directory_id;
102
103 u32 dummy1;
104 u32 dummy2;
105 u32 dummy3;
106};
107
108struct compat_ncp_ioctl_request {
109 u32 function;
110 u32 size;
111 compat_caddr_t data;
112};
113
114struct compat_ncp_privatedata_ioctl
115{
116 u32 len;
117 compat_caddr_t data; /* ~1000 for NDS */
118};
119
120#define NCP_IOC_GET_FS_INFO_V2_32 _IOWR('n', 4, struct compat_ncp_fs_info_v2)
121#define NCP_IOC_NCPREQUEST_32 _IOR('n', 1, struct compat_ncp_ioctl_request)
122#define NCP_IOC_GETOBJECTNAME_32 _IOWR('n', 9, struct compat_ncp_objectname_ioctl)
123#define NCP_IOC_SETOBJECTNAME_32 _IOR('n', 9, struct compat_ncp_objectname_ioctl)
124#define NCP_IOC_GETPRIVATEDATA_32 _IOWR('n', 10, struct compat_ncp_privatedata_ioctl)
125#define NCP_IOC_SETPRIVATEDATA_32 _IOR('n', 10, struct compat_ncp_privatedata_ioctl)
126
127static int
128ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
129 struct compat_ncp_fs_info_v2 __user * arg)
130{
131 struct compat_ncp_fs_info_v2 info2;
132
133 if (copy_from_user(&info2, arg, sizeof(info2)))
134 return -EFAULT;
135
136 if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
137 ncp_dbg(1, "info.version invalid: %d\n", info2.version);
138 return -EINVAL;
139 }
140 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
141 info2.connection = server->connection;
142 info2.buffer_size = server->buffer_size;
143 info2.volume_number = NCP_FINFO(inode)->volNumber;
144 info2.directory_id = NCP_FINFO(inode)->DosDirNum;
145 info2.dummy1 = info2.dummy2 = info2.dummy3 = 0;
146
147 if (copy_to_user(arg, &info2, sizeof(info2)))
148 return -EFAULT;
149 return 0;
150}
151#endif
152
153#define NCP_IOC_GETMOUNTUID16 _IOW('n', 2, u16)
154#define NCP_IOC_GETMOUNTUID32 _IOW('n', 2, u32)
155#define NCP_IOC_GETMOUNTUID64 _IOW('n', 2, u64)
156
157#ifdef CONFIG_NCPFS_NLS
158/* Here we are select the iocharset and the codepage for NLS.
159 * Thanks Petr Vandrovec for idea and many hints.
160 */
161static int
162ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
163{
164 struct ncp_nls_ioctl user;
165 struct nls_table *codepage;
166 struct nls_table *iocharset;
167 struct nls_table *oldset_io;
168 struct nls_table *oldset_cp;
169 int utf8;
170 int err;
171
172 if (copy_from_user(&user, arg, sizeof(user)))
173 return -EFAULT;
174
175 codepage = NULL;
176 user.codepage[NCP_IOCSNAME_LEN] = 0;
177 if (!user.codepage[0] || !strcmp(user.codepage, "default"))
178 codepage = load_nls_default();
179 else {
180 codepage = load_nls(user.codepage);
181 if (!codepage) {
182 return -EBADRQC;
183 }
184 }
185
186 iocharset = NULL;
187 user.iocharset[NCP_IOCSNAME_LEN] = 0;
188 if (!user.iocharset[0] || !strcmp(user.iocharset, "default")) {
189 iocharset = load_nls_default();
190 utf8 = 0;
191 } else if (!strcmp(user.iocharset, "utf8")) {
192 iocharset = load_nls_default();
193 utf8 = 1;
194 } else {
195 iocharset = load_nls(user.iocharset);
196 if (!iocharset) {
197 unload_nls(codepage);
198 return -EBADRQC;
199 }
200 utf8 = 0;
201 }
202
203 mutex_lock(&server->root_setup_lock);
204 if (server->root_setuped) {
205 oldset_cp = codepage;
206 oldset_io = iocharset;
207 err = -EBUSY;
208 } else {
209 if (utf8)
210 NCP_SET_FLAG(server, NCP_FLAG_UTF8);
211 else
212 NCP_CLR_FLAG(server, NCP_FLAG_UTF8);
213 oldset_cp = server->nls_vol;
214 server->nls_vol = codepage;
215 oldset_io = server->nls_io;
216 server->nls_io = iocharset;
217 err = 0;
218 }
219 mutex_unlock(&server->root_setup_lock);
220 unload_nls(oldset_cp);
221 unload_nls(oldset_io);
222
223 return err;
224}
225
226static int
227ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
228{
229 struct ncp_nls_ioctl user;
230 int len;
231
232 memset(&user, 0, sizeof(user));
233 mutex_lock(&server->root_setup_lock);
234 if (server->nls_vol && server->nls_vol->charset) {
235 len = strlen(server->nls_vol->charset);
236 if (len > NCP_IOCSNAME_LEN)
237 len = NCP_IOCSNAME_LEN;
238 strncpy(user.codepage, server->nls_vol->charset, len);
239 user.codepage[len] = 0;
240 }
241
242 if (NCP_IS_FLAG(server, NCP_FLAG_UTF8))
243 strcpy(user.iocharset, "utf8");
244 else if (server->nls_io && server->nls_io->charset) {
245 len = strlen(server->nls_io->charset);
246 if (len > NCP_IOCSNAME_LEN)
247 len = NCP_IOCSNAME_LEN;
248 strncpy(user.iocharset, server->nls_io->charset, len);
249 user.iocharset[len] = 0;
250 }
251 mutex_unlock(&server->root_setup_lock);
252
253 if (copy_to_user(arg, &user, sizeof(user)))
254 return -EFAULT;
255 return 0;
256}
257#endif /* CONFIG_NCPFS_NLS */
258
259static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg)
260{
261 struct ncp_server *server = NCP_SERVER(inode);
262 int result;
263 struct ncp_ioctl_request request;
264 char* bouncebuffer;
265 void __user *argp = (void __user *)arg;
266
267 switch (cmd) {
268#ifdef CONFIG_COMPAT
269 case NCP_IOC_NCPREQUEST_32:
270#endif
271 case NCP_IOC_NCPREQUEST:
272#ifdef CONFIG_COMPAT
273 if (cmd == NCP_IOC_NCPREQUEST_32) {
274 struct compat_ncp_ioctl_request request32;
275 if (copy_from_user(&request32, argp, sizeof(request32)))
276 return -EFAULT;
277 request.function = request32.function;
278 request.size = request32.size;
279 request.data = compat_ptr(request32.data);
280 } else
281#endif
282 if (copy_from_user(&request, argp, sizeof(request)))
283 return -EFAULT;
284
285 if ((request.function > 255)
286 || (request.size >
287 NCP_PACKET_SIZE - sizeof(struct ncp_request_header))) {
288 return -EINVAL;
289 }
290 bouncebuffer = vmalloc(NCP_PACKET_SIZE_INTERNAL);
291 if (!bouncebuffer)
292 return -ENOMEM;
293 if (copy_from_user(bouncebuffer, request.data, request.size)) {
294 vfree(bouncebuffer);
295 return -EFAULT;
296 }
297 ncp_lock_server(server);
298
299 /* FIXME: We hack around in the server's structures
300 here to be able to use ncp_request */
301
302 server->has_subfunction = 0;
303 server->current_size = request.size;
304 memcpy(server->packet, bouncebuffer, request.size);
305
306 result = ncp_request2(server, request.function,
307 bouncebuffer, NCP_PACKET_SIZE_INTERNAL);
308 if (result < 0)
309 result = -EIO;
310 else
311 result = server->reply_size;
312 ncp_unlock_server(server);
313 ncp_dbg(1, "copy %d bytes\n", result);
314 if (result >= 0)
315 if (copy_to_user(request.data, bouncebuffer, result))
316 result = -EFAULT;
317 vfree(bouncebuffer);
318 return result;
319
320 case NCP_IOC_CONN_LOGGED_IN:
321
322 if (!(server->m.int_flags & NCP_IMOUNT_LOGGEDIN_POSSIBLE))
323 return -EINVAL;
324 mutex_lock(&server->root_setup_lock);
325 if (server->root_setuped)
326 result = -EBUSY;
327 else {
328 result = ncp_conn_logged_in(inode->i_sb);
329 if (result == 0)
330 server->root_setuped = 1;
331 }
332 mutex_unlock(&server->root_setup_lock);
333 return result;
334
335 case NCP_IOC_GET_FS_INFO:
336 return ncp_get_fs_info(server, inode, argp);
337
338 case NCP_IOC_GET_FS_INFO_V2:
339 return ncp_get_fs_info_v2(server, inode, argp);
340
341#ifdef CONFIG_COMPAT
342 case NCP_IOC_GET_FS_INFO_V2_32:
343 return ncp_get_compat_fs_info_v2(server, inode, argp);
344#endif
345 /* we have too many combinations of CONFIG_COMPAT,
346 * CONFIG_64BIT and CONFIG_UID16, so just handle
347 * any of the possible ioctls */
348 case NCP_IOC_GETMOUNTUID16:
349 {
350 u16 uid;
351
352 SET_UID(uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
353 if (put_user(uid, (u16 __user *)argp))
354 return -EFAULT;
355 return 0;
356 }
357 case NCP_IOC_GETMOUNTUID32:
358 {
359 uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
360 if (put_user(uid, (u32 __user *)argp))
361 return -EFAULT;
362 return 0;
363 }
364 case NCP_IOC_GETMOUNTUID64:
365 {
366 uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
367 if (put_user(uid, (u64 __user *)argp))
368 return -EFAULT;
369 return 0;
370 }
371 case NCP_IOC_GETROOT:
372 {
373 struct ncp_setroot_ioctl sr;
374
375 result = -EACCES;
376 mutex_lock(&server->root_setup_lock);
377 if (server->m.mounted_vol[0]) {
378 struct dentry* dentry = inode->i_sb->s_root;
379
380 if (dentry) {
381 struct inode* s_inode = d_inode(dentry);
382
383 if (s_inode) {
384 sr.volNumber = NCP_FINFO(s_inode)->volNumber;
385 sr.dirEntNum = NCP_FINFO(s_inode)->dirEntNum;
386 sr.namespace = server->name_space[sr.volNumber];
387 result = 0;
388 } else
389 ncp_dbg(1, "d_inode(s_root)==NULL\n");
390 } else
391 ncp_dbg(1, "s_root==NULL\n");
392 } else {
393 sr.volNumber = -1;
394 sr.namespace = 0;
395 sr.dirEntNum = 0;
396 result = 0;
397 }
398 mutex_unlock(&server->root_setup_lock);
399 if (!result && copy_to_user(argp, &sr, sizeof(sr)))
400 result = -EFAULT;
401 return result;
402 }
403
404 case NCP_IOC_SETROOT:
405 {
406 struct ncp_setroot_ioctl sr;
407 __u32 vnum;
408 __le32 de;
409 __le32 dosde;
410 struct dentry* dentry;
411
412 if (copy_from_user(&sr, argp, sizeof(sr)))
413 return -EFAULT;
414 mutex_lock(&server->root_setup_lock);
415 if (server->root_setuped)
416 result = -EBUSY;
417 else {
418 if (sr.volNumber < 0) {
419 server->m.mounted_vol[0] = 0;
420 vnum = NCP_NUMBER_OF_VOLUMES;
421 de = 0;
422 dosde = 0;
423 result = 0;
424 } else if (sr.volNumber >= NCP_NUMBER_OF_VOLUMES) {
425 result = -EINVAL;
426 } else if (ncp_mount_subdir(server, sr.volNumber,
427 sr.namespace, sr.dirEntNum,
428 &vnum, &de, &dosde)) {
429 result = -ENOENT;
430 } else
431 result = 0;
432
433 if (result == 0) {
434 dentry = inode->i_sb->s_root;
435 if (dentry) {
436 struct inode* s_inode = d_inode(dentry);
437
438 if (s_inode) {
439 NCP_FINFO(s_inode)->volNumber = vnum;
440 NCP_FINFO(s_inode)->dirEntNum = de;
441 NCP_FINFO(s_inode)->DosDirNum = dosde;
442 server->root_setuped = 1;
443 } else {
444 ncp_dbg(1, "d_inode(s_root)==NULL\n");
445 result = -EIO;
446 }
447 } else {
448 ncp_dbg(1, "s_root==NULL\n");
449 result = -EIO;
450 }
451 }
452 }
453 mutex_unlock(&server->root_setup_lock);
454
455 return result;
456 }
457
458#ifdef CONFIG_NCPFS_PACKET_SIGNING
459 case NCP_IOC_SIGN_INIT:
460 {
461 struct ncp_sign_init sign;
462
463 if (argp)
464 if (copy_from_user(&sign, argp, sizeof(sign)))
465 return -EFAULT;
466 ncp_lock_server(server);
467 mutex_lock(&server->rcv.creq_mutex);
468 if (argp) {
469 if (server->sign_wanted) {
470 memcpy(server->sign_root,sign.sign_root,8);
471 memcpy(server->sign_last,sign.sign_last,16);
472 server->sign_active = 1;
473 }
474 /* ignore when signatures not wanted */
475 } else {
476 server->sign_active = 0;
477 }
478 mutex_unlock(&server->rcv.creq_mutex);
479 ncp_unlock_server(server);
480 return 0;
481 }
482
483 case NCP_IOC_SIGN_WANTED:
484 {
485 int state;
486
487 ncp_lock_server(server);
488 state = server->sign_wanted;
489 ncp_unlock_server(server);
490 if (put_user(state, (int __user *)argp))
491 return -EFAULT;
492 return 0;
493 }
494
495 case NCP_IOC_SET_SIGN_WANTED:
496 {
497 int newstate;
498
499 /* get only low 8 bits... */
500 if (get_user(newstate, (unsigned char __user *)argp))
501 return -EFAULT;
502 result = 0;
503 ncp_lock_server(server);
504 if (server->sign_active) {
505 /* cannot turn signatures OFF when active */
506 if (!newstate)
507 result = -EINVAL;
508 } else {
509 server->sign_wanted = newstate != 0;
510 }
511 ncp_unlock_server(server);
512 return result;
513 }
514
515#endif /* CONFIG_NCPFS_PACKET_SIGNING */
516
517#ifdef CONFIG_NCPFS_IOCTL_LOCKING
518 case NCP_IOC_LOCKUNLOCK:
519 {
520 struct ncp_lock_ioctl rqdata;
521
522 if (copy_from_user(&rqdata, argp, sizeof(rqdata)))
523 return -EFAULT;
524 if (rqdata.origin != 0)
525 return -EINVAL;
526 /* check for cmd */
527 switch (rqdata.cmd) {
528 case NCP_LOCK_EX:
529 case NCP_LOCK_SH:
530 if (rqdata.timeout < 0)
531 return -EINVAL;
532 if (rqdata.timeout == 0)
533 rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT;
534 else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT)
535 rqdata.timeout = NCP_LOCK_MAX_TIMEOUT;
536 break;
537 case NCP_LOCK_LOG:
538 rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; /* has no effect */
539 case NCP_LOCK_CLEAR:
540 break;
541 default:
542 return -EINVAL;
543 }
544 /* locking needs both read and write access */
545 if ((result = ncp_make_open(inode, O_RDWR)) != 0)
546 {
547 return result;
548 }
549 result = -EISDIR;
550 if (!S_ISREG(inode->i_mode))
551 goto outrel;
552 if (rqdata.cmd == NCP_LOCK_CLEAR)
553 {
554 result = ncp_ClearPhysicalRecord(NCP_SERVER(inode),
555 NCP_FINFO(inode)->file_handle,
556 rqdata.offset,
557 rqdata.length);
558 if (result > 0) result = 0; /* no such lock */
559 }
560 else
561 {
562 int lockcmd;
563
564 switch (rqdata.cmd)
565 {
566 case NCP_LOCK_EX: lockcmd=1; break;
567 case NCP_LOCK_SH: lockcmd=3; break;
568 default: lockcmd=0; break;
569 }
570 result = ncp_LogPhysicalRecord(NCP_SERVER(inode),
571 NCP_FINFO(inode)->file_handle,
572 lockcmd,
573 rqdata.offset,
574 rqdata.length,
575 rqdata.timeout);
576 if (result > 0) result = -EAGAIN;
577 }
578outrel:
579 ncp_inode_close(inode);
580 return result;
581 }
582#endif /* CONFIG_NCPFS_IOCTL_LOCKING */
583
584#ifdef CONFIG_COMPAT
585 case NCP_IOC_GETOBJECTNAME_32:
586 {
587 struct compat_ncp_objectname_ioctl user;
588 size_t outl;
589
590 if (copy_from_user(&user, argp, sizeof(user)))
591 return -EFAULT;
592 down_read(&server->auth_rwsem);
593 user.auth_type = server->auth.auth_type;
594 outl = user.object_name_len;
595 user.object_name_len = server->auth.object_name_len;
596 if (outl > user.object_name_len)
597 outl = user.object_name_len;
598 result = 0;
599 if (outl) {
600 if (copy_to_user(compat_ptr(user.object_name),
601 server->auth.object_name,
602 outl))
603 result = -EFAULT;
604 }
605 up_read(&server->auth_rwsem);
606 if (!result && copy_to_user(argp, &user, sizeof(user)))
607 result = -EFAULT;
608 return result;
609 }
610#endif
611
612 case NCP_IOC_GETOBJECTNAME:
613 {
614 struct ncp_objectname_ioctl user;
615 size_t outl;
616
617 if (copy_from_user(&user, argp, sizeof(user)))
618 return -EFAULT;
619 down_read(&server->auth_rwsem);
620 user.auth_type = server->auth.auth_type;
621 outl = user.object_name_len;
622 user.object_name_len = server->auth.object_name_len;
623 if (outl > user.object_name_len)
624 outl = user.object_name_len;
625 result = 0;
626 if (outl) {
627 if (copy_to_user(user.object_name,
628 server->auth.object_name,
629 outl))
630 result = -EFAULT;
631 }
632 up_read(&server->auth_rwsem);
633 if (!result && copy_to_user(argp, &user, sizeof(user)))
634 result = -EFAULT;
635 return result;
636 }
637
638#ifdef CONFIG_COMPAT
639 case NCP_IOC_SETOBJECTNAME_32:
640#endif
641 case NCP_IOC_SETOBJECTNAME:
642 {
643 struct ncp_objectname_ioctl user;
644 void* newname;
645 void* oldname;
646 size_t oldnamelen;
647 void* oldprivate;
648 size_t oldprivatelen;
649
650#ifdef CONFIG_COMPAT
651 if (cmd == NCP_IOC_SETOBJECTNAME_32) {
652 struct compat_ncp_objectname_ioctl user32;
653 if (copy_from_user(&user32, argp, sizeof(user32)))
654 return -EFAULT;
655 user.auth_type = user32.auth_type;
656 user.object_name_len = user32.object_name_len;
657 user.object_name = compat_ptr(user32.object_name);
658 } else
659#endif
660 if (copy_from_user(&user, argp, sizeof(user)))
661 return -EFAULT;
662
663 if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
664 return -ENOMEM;
665 if (user.object_name_len) {
666 newname = memdup_user(user.object_name,
667 user.object_name_len);
668 if (IS_ERR(newname))
669 return PTR_ERR(newname);
670 } else {
671 newname = NULL;
672 }
673 down_write(&server->auth_rwsem);
674 oldname = server->auth.object_name;
675 oldnamelen = server->auth.object_name_len;
676 oldprivate = server->priv.data;
677 oldprivatelen = server->priv.len;
678 server->auth.auth_type = user.auth_type;
679 server->auth.object_name_len = user.object_name_len;
680 server->auth.object_name = newname;
681 server->priv.len = 0;
682 server->priv.data = NULL;
683 up_write(&server->auth_rwsem);
684 kfree(oldprivate);
685 kfree(oldname);
686 return 0;
687 }
688
689#ifdef CONFIG_COMPAT
690 case NCP_IOC_GETPRIVATEDATA_32:
691#endif
692 case NCP_IOC_GETPRIVATEDATA:
693 {
694 struct ncp_privatedata_ioctl user;
695 size_t outl;
696
697#ifdef CONFIG_COMPAT
698 if (cmd == NCP_IOC_GETPRIVATEDATA_32) {
699 struct compat_ncp_privatedata_ioctl user32;
700 if (copy_from_user(&user32, argp, sizeof(user32)))
701 return -EFAULT;
702 user.len = user32.len;
703 user.data = compat_ptr(user32.data);
704 } else
705#endif
706 if (copy_from_user(&user, argp, sizeof(user)))
707 return -EFAULT;
708
709 down_read(&server->auth_rwsem);
710 outl = user.len;
711 user.len = server->priv.len;
712 if (outl > user.len) outl = user.len;
713 result = 0;
714 if (outl) {
715 if (copy_to_user(user.data,
716 server->priv.data,
717 outl))
718 result = -EFAULT;
719 }
720 up_read(&server->auth_rwsem);
721 if (result)
722 return result;
723#ifdef CONFIG_COMPAT
724 if (cmd == NCP_IOC_GETPRIVATEDATA_32) {
725 struct compat_ncp_privatedata_ioctl user32;
726 user32.len = user.len;
727 user32.data = (unsigned long) user.data;
728 if (copy_to_user(argp, &user32, sizeof(user32)))
729 return -EFAULT;
730 } else
731#endif
732 if (copy_to_user(argp, &user, sizeof(user)))
733 return -EFAULT;
734
735 return 0;
736 }
737
738#ifdef CONFIG_COMPAT
739 case NCP_IOC_SETPRIVATEDATA_32:
740#endif
741 case NCP_IOC_SETPRIVATEDATA:
742 {
743 struct ncp_privatedata_ioctl user;
744 void* new;
745 void* old;
746 size_t oldlen;
747
748#ifdef CONFIG_COMPAT
749 if (cmd == NCP_IOC_SETPRIVATEDATA_32) {
750 struct compat_ncp_privatedata_ioctl user32;
751 if (copy_from_user(&user32, argp, sizeof(user32)))
752 return -EFAULT;
753 user.len = user32.len;
754 user.data = compat_ptr(user32.data);
755 } else
756#endif
757 if (copy_from_user(&user, argp, sizeof(user)))
758 return -EFAULT;
759
760 if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
761 return -ENOMEM;
762 if (user.len) {
763 new = memdup_user(user.data, user.len);
764 if (IS_ERR(new))
765 return PTR_ERR(new);
766 } else {
767 new = NULL;
768 }
769 down_write(&server->auth_rwsem);
770 old = server->priv.data;
771 oldlen = server->priv.len;
772 server->priv.len = user.len;
773 server->priv.data = new;
774 up_write(&server->auth_rwsem);
775 kfree(old);
776 return 0;
777 }
778
779#ifdef CONFIG_NCPFS_NLS
780 case NCP_IOC_SETCHARSETS:
781 return ncp_set_charsets(server, argp);
782
783 case NCP_IOC_GETCHARSETS:
784 return ncp_get_charsets(server, argp);
785
786#endif /* CONFIG_NCPFS_NLS */
787
788 case NCP_IOC_SETDENTRYTTL:
789 {
790 u_int32_t user;
791
792 if (copy_from_user(&user, argp, sizeof(user)))
793 return -EFAULT;
794 /* 20 secs at most... */
795 if (user > 20000)
796 return -EINVAL;
797 user = (user * HZ) / 1000;
798 atomic_set(&server->dentry_ttl, user);
799 return 0;
800 }
801
802 case NCP_IOC_GETDENTRYTTL:
803 {
804 u_int32_t user = (atomic_read(&server->dentry_ttl) * 1000) / HZ;
805 if (copy_to_user(argp, &user, sizeof(user)))
806 return -EFAULT;
807 return 0;
808 }
809
810 }
811 return -EINVAL;
812}
813
814long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
815{
816 struct inode *inode = file_inode(filp);
817 struct ncp_server *server = NCP_SERVER(inode);
818 kuid_t uid = current_uid();
819 int need_drop_write = 0;
820 long ret;
821
822 switch (cmd) {
823 case NCP_IOC_SETCHARSETS:
824 case NCP_IOC_CONN_LOGGED_IN:
825 case NCP_IOC_SETROOT:
826 if (!capable(CAP_SYS_ADMIN)) {
827 ret = -EPERM;
828 goto out;
829 }
830 break;
831 }
832 if (!uid_eq(server->m.mounted_uid, uid)) {
833 switch (cmd) {
834 /*
835 * Only mount owner can issue these ioctls. Information
836 * necessary to authenticate to other NDS servers are
837 * stored here.
838 */
839 case NCP_IOC_GETOBJECTNAME:
840 case NCP_IOC_SETOBJECTNAME:
841 case NCP_IOC_GETPRIVATEDATA:
842 case NCP_IOC_SETPRIVATEDATA:
843#ifdef CONFIG_COMPAT
844 case NCP_IOC_GETOBJECTNAME_32:
845 case NCP_IOC_SETOBJECTNAME_32:
846 case NCP_IOC_GETPRIVATEDATA_32:
847 case NCP_IOC_SETPRIVATEDATA_32:
848#endif
849 ret = -EACCES;
850 goto out;
851 /*
852 * These require write access on the inode if user id
853 * does not match. Note that they do not write to the
854 * file... But old code did mnt_want_write, so I keep
855 * it as is. Of course not for mountpoint owner, as
856 * that breaks read-only mounts altogether as ncpmount
857 * needs working NCP_IOC_NCPREQUEST and
858 * NCP_IOC_GET_FS_INFO. Some of these codes (setdentryttl,
859 * signinit, setsignwanted) should be probably restricted
860 * to owner only, or even more to CAP_SYS_ADMIN).
861 */
862 case NCP_IOC_GET_FS_INFO:
863 case NCP_IOC_GET_FS_INFO_V2:
864 case NCP_IOC_NCPREQUEST:
865 case NCP_IOC_SETDENTRYTTL:
866 case NCP_IOC_SIGN_INIT:
867 case NCP_IOC_LOCKUNLOCK:
868 case NCP_IOC_SET_SIGN_WANTED:
869#ifdef CONFIG_COMPAT
870 case NCP_IOC_GET_FS_INFO_V2_32:
871 case NCP_IOC_NCPREQUEST_32:
872#endif
873 ret = mnt_want_write_file(filp);
874 if (ret)
875 goto out;
876 need_drop_write = 1;
877 ret = inode_permission(inode, MAY_WRITE);
878 if (ret)
879 goto outDropWrite;
880 break;
881 /*
882 * Read access required.
883 */
884 case NCP_IOC_GETMOUNTUID16:
885 case NCP_IOC_GETMOUNTUID32:
886 case NCP_IOC_GETMOUNTUID64:
887 case NCP_IOC_GETROOT:
888 case NCP_IOC_SIGN_WANTED:
889 ret = inode_permission(inode, MAY_READ);
890 if (ret)
891 goto out;
892 break;
893 /*
894 * Anybody can read these.
895 */
896 case NCP_IOC_GETCHARSETS:
897 case NCP_IOC_GETDENTRYTTL:
898 default:
899 /* Three codes below are protected by CAP_SYS_ADMIN above. */
900 case NCP_IOC_SETCHARSETS:
901 case NCP_IOC_CONN_LOGGED_IN:
902 case NCP_IOC_SETROOT:
903 break;
904 }
905 }
906 ret = __ncp_ioctl(inode, cmd, arg);
907outDropWrite:
908 if (need_drop_write)
909 mnt_drop_write_file(filp);
910out:
911 return ret;
912}
913
914#ifdef CONFIG_COMPAT
915long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
916{
917 long ret;
918
919 arg = (unsigned long) compat_ptr(arg);
920 ret = ncp_ioctl(file, cmd, arg);
921 return ret;
922}
923#endif
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
deleted file mode 100644
index a5c5cf2ff007..000000000000
--- a/fs/ncpfs/mmap.c
+++ /dev/null
@@ -1,125 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * mmap.c
4 *
5 * Copyright (C) 1995, 1996 by Volker Lendecke
6 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
7 *
8 */
9
10#include <linux/stat.h>
11#include <linux/time.h>
12#include <linux/kernel.h>
13#include <linux/gfp.h>
14#include <linux/mm.h>
15#include <linux/shm.h>
16#include <linux/errno.h>
17#include <linux/mman.h>
18#include <linux/string.h>
19#include <linux/fcntl.h>
20#include <linux/memcontrol.h>
21
22#include <linux/uaccess.h>
23
24#include "ncp_fs.h"
25
26/*
27 * Fill in the supplied page for mmap
28 * XXX: how are we excluding truncate/invalidate here? Maybe need to lock
29 * page?
30 */
31static int ncp_file_mmap_fault(struct vm_fault *vmf)
32{
33 struct inode *inode = file_inode(vmf->vma->vm_file);
34 char *pg_addr;
35 unsigned int already_read;
36 unsigned int count;
37 int bufsize;
38 int pos; /* XXX: loff_t ? */
39
40 /*
41 * ncpfs has nothing against high pages as long
42 * as recvmsg and memset works on it
43 */
44 vmf->page = alloc_page(GFP_HIGHUSER);
45 if (!vmf->page)
46 return VM_FAULT_OOM;
47 pg_addr = kmap(vmf->page);
48 pos = vmf->pgoff << PAGE_SHIFT;
49
50 count = PAGE_SIZE;
51 /* what we can read in one go */
52 bufsize = NCP_SERVER(inode)->buffer_size;
53
54 already_read = 0;
55 if (ncp_make_open(inode, O_RDONLY) >= 0) {
56 while (already_read < count) {
57 int read_this_time;
58 int to_read;
59
60 to_read = bufsize - (pos % bufsize);
61
62 to_read = min_t(unsigned int, to_read, count - already_read);
63
64 if (ncp_read_kernel(NCP_SERVER(inode),
65 NCP_FINFO(inode)->file_handle,
66 pos, to_read,
67 pg_addr + already_read,
68 &read_this_time) != 0) {
69 read_this_time = 0;
70 }
71 pos += read_this_time;
72 already_read += read_this_time;
73
74 if (read_this_time < to_read) {
75 break;
76 }
77 }
78 ncp_inode_close(inode);
79
80 }
81
82 if (already_read < PAGE_SIZE)
83 memset(pg_addr + already_read, 0, PAGE_SIZE - already_read);
84 flush_dcache_page(vmf->page);
85 kunmap(vmf->page);
86
87 /*
88 * If I understand ncp_read_kernel() properly, the above always
89 * fetches from the network, here the analogue of disk.
90 * -- nyc
91 */
92 count_vm_event(PGMAJFAULT);
93 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
94 return VM_FAULT_MAJOR;
95}
96
97static const struct vm_operations_struct ncp_file_mmap =
98{
99 .fault = ncp_file_mmap_fault,
100};
101
102
103/* This is used for a general mmap of a ncp file */
104int ncp_mmap(struct file *file, struct vm_area_struct *vma)
105{
106 struct inode *inode = file_inode(file);
107
108 ncp_dbg(1, "called\n");
109
110 if (!ncp_conn_valid(NCP_SERVER(inode)))
111 return -EIO;
112
113 /* only PAGE_COW or read-only supported now */
114 if (vma->vm_flags & VM_SHARED)
115 return -EINVAL;
116 /* we do not support files bigger than 4GB... We eventually
117 supports just 4GB... */
118 if (vma_pages(vma) + vma->vm_pgoff
119 > (1U << (32 - PAGE_SHIFT)))
120 return -EFBIG;
121
122 vma->vm_ops = &ncp_file_mmap;
123 file_accessed(file);
124 return 0;
125}
diff --git a/fs/ncpfs/ncp_fs.h b/fs/ncpfs/ncp_fs.h
deleted file mode 100644
index bdd262b6c198..000000000000
--- a/fs/ncpfs/ncp_fs.h
+++ /dev/null
@@ -1,101 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/ncp_fs.h>
3#include "ncp_fs_i.h"
4#include "ncp_fs_sb.h"
5
6#undef NCPFS_PARANOIA
7#ifdef NCPFS_PARANOIA
8#define ncp_vdbg(fmt, ...) \
9 pr_debug(fmt, ##__VA_ARGS__)
10#else
11#define ncp_vdbg(fmt, ...) \
12do { \
13 if (0) \
14 pr_debug(fmt, ##__VA_ARGS__); \
15} while (0)
16#endif
17
18#ifndef DEBUG_NCP
19#define DEBUG_NCP 0
20#endif
21
22#if DEBUG_NCP > 0 && !defined(DEBUG)
23#define DEBUG
24#endif
25
26#define ncp_dbg(level, fmt, ...) \
27do { \
28 if (level <= DEBUG_NCP) \
29 pr_debug(fmt, ##__VA_ARGS__); \
30} while (0)
31
32#define NCP_MAX_RPC_TIMEOUT (6*HZ)
33
34
35struct ncp_entry_info {
36 struct nw_info_struct i;
37 ino_t ino;
38 int opened;
39 int access;
40 unsigned int volume;
41 __u8 file_handle[6];
42};
43
44static inline struct ncp_server *NCP_SBP(const struct super_block *sb)
45{
46 return sb->s_fs_info;
47}
48
49#define NCP_SERVER(inode) NCP_SBP((inode)->i_sb)
50static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode)
51{
52 return container_of(inode, struct ncp_inode_info, vfs_inode);
53}
54
55/* linux/fs/ncpfs/inode.c */
56int ncp_notify_change(struct dentry *, struct iattr *);
57struct inode *ncp_iget(struct super_block *, struct ncp_entry_info *);
58void ncp_update_inode(struct inode *, struct ncp_entry_info *);
59void ncp_update_inode2(struct inode *, struct ncp_entry_info *);
60
61/* linux/fs/ncpfs/dir.c */
62extern const struct inode_operations ncp_dir_inode_operations;
63extern const struct file_operations ncp_dir_operations;
64extern const struct dentry_operations ncp_dentry_operations;
65int ncp_conn_logged_in(struct super_block *);
66int ncp_date_dos2unix(__le16 time, __le16 date);
67void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date);
68
69/* linux/fs/ncpfs/ioctl.c */
70long ncp_ioctl(struct file *, unsigned int, unsigned long);
71long ncp_compat_ioctl(struct file *, unsigned int, unsigned long);
72
73/* linux/fs/ncpfs/sock.c */
74int ncp_request2(struct ncp_server *server, int function,
75 void* reply, int max_reply_size);
76static inline int ncp_request(struct ncp_server *server, int function) {
77 return ncp_request2(server, function, server->packet, server->packet_size);
78}
79int ncp_connect(struct ncp_server *server);
80int ncp_disconnect(struct ncp_server *server);
81void ncp_lock_server(struct ncp_server *server);
82void ncp_unlock_server(struct ncp_server *server);
83
84/* linux/fs/ncpfs/symlink.c */
85#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
86extern const struct address_space_operations ncp_symlink_aops;
87int ncp_symlink(struct inode*, struct dentry*, const char*);
88#endif
89
90/* linux/fs/ncpfs/file.c */
91extern const struct inode_operations ncp_file_inode_operations;
92extern const struct file_operations ncp_file_operations;
93int ncp_make_open(struct inode *, int);
94
95/* linux/fs/ncpfs/mmap.c */
96int ncp_mmap(struct file *, struct vm_area_struct *);
97
98/* linux/fs/ncpfs/ncplib_kernel.c */
99int ncp_make_closed(struct inode *);
100
101#include "ncplib_kernel.h"
diff --git a/fs/ncpfs/ncp_fs_i.h b/fs/ncpfs/ncp_fs_i.h
deleted file mode 100644
index 3432bafb53a5..000000000000
--- a/fs/ncpfs/ncp_fs_i.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * ncp_fs_i.h
4 *
5 * Copyright (C) 1995 Volker Lendecke
6 *
7 */
8
9#ifndef _LINUX_NCP_FS_I
10#define _LINUX_NCP_FS_I
11
12/*
13 * This is the ncpfs part of the inode structure. This must contain
14 * all the information we need to work with an inode after creation.
15 */
16struct ncp_inode_info {
17 __le32 dirEntNum;
18 __le32 DosDirNum;
19 __u8 volNumber;
20 __le32 nwattr;
21 struct mutex open_mutex;
22 atomic_t opened;
23 int access;
24 int flags;
25#define NCPI_KLUDGE_SYMLINK 0x0001
26#define NCPI_DIR_CACHE 0x0002
27 __u8 file_handle[6];
28 struct inode vfs_inode;
29};
30
31#endif /* _LINUX_NCP_FS_I */
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
deleted file mode 100644
index f06cde4adf71..000000000000
--- a/fs/ncpfs/ncp_fs_sb.h
+++ /dev/null
@@ -1,174 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * ncp_fs_sb.h
4 *
5 * Copyright (C) 1995, 1996 by Volker Lendecke
6 *
7 */
8
9#ifndef _NCP_FS_SB
10#define _NCP_FS_SB
11
12#include <linux/types.h>
13#include <linux/ncp_mount.h>
14#include <linux/net.h>
15#include <linux/mutex.h>
16#include <linux/backing-dev.h>
17#include <linux/workqueue.h>
18
19#define NCP_DEFAULT_OPTIONS 0 /* 2 for packet signatures */
20
21struct sock;
22
23struct ncp_mount_data_kernel {
24 unsigned long flags; /* NCP_MOUNT_* flags */
25 unsigned int int_flags; /* internal flags */
26#define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001
27 kuid_t mounted_uid; /* Who may umount() this filesystem? */
28 struct pid *wdog_pid; /* Who cares for our watchdog packets? */
29 unsigned int ncp_fd; /* The socket to the ncp port */
30 unsigned int time_out; /* How long should I wait after
31 sending a NCP request? */
32 unsigned int retry_count; /* And how often should I retry? */
33 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
34 kuid_t uid;
35 kgid_t gid;
36 umode_t file_mode;
37 umode_t dir_mode;
38 int info_fd;
39};
40
41struct ncp_server {
42 struct rcu_head rcu;
43 struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of
44 interest for us later, so we store
45 it completely. */
46
47 __u8 name_space[NCP_NUMBER_OF_VOLUMES + 2];
48
49 struct socket *ncp_sock;/* ncp socket */
50 struct socket *info_sock;
51
52 u8 sequence;
53 u8 task;
54 u16 connection; /* Remote connection number */
55
56 u8 completion; /* Status message from server */
57 u8 conn_status; /* Bit 4 = 1 ==> Server going down, no
58 requests allowed anymore.
59 Bit 0 = 1 ==> Server is down. */
60
61 int buffer_size; /* Negotiated bufsize */
62
63 int reply_size; /* Size of last reply */
64
65 int packet_size;
66 unsigned char *packet; /* Here we prepare requests and
67 receive replies */
68 unsigned char *txbuf; /* Storage for current request */
69 unsigned char *rxbuf; /* Storage for reply to current request */
70
71 int lock; /* To prevent mismatch in protocols. */
72 struct mutex mutex;
73
74 int current_size; /* for packet preparation */
75 int has_subfunction;
76 int ncp_reply_size;
77
78 int root_setuped;
79 struct mutex root_setup_lock;
80
81 /* info for packet signing */
82 int sign_wanted; /* 1=Server needs signed packets */
83 int sign_active; /* 0=don't do signing, 1=do */
84 char sign_root[8]; /* generated from password and encr. key */
85 char sign_last[16];
86
87 /* Authentication info: NDS or BINDERY, username */
88 struct {
89 int auth_type;
90 size_t object_name_len;
91 void* object_name;
92 int object_type;
93 } auth;
94 /* Password info */
95 struct {
96 size_t len;
97 void* data;
98 } priv;
99 struct rw_semaphore auth_rwsem;
100
101 /* nls info: codepage for volume and charset for I/O */
102 struct nls_table *nls_vol;
103 struct nls_table *nls_io;
104
105 /* maximum age in jiffies */
106 atomic_t dentry_ttl;
107
108 /* miscellaneous */
109 unsigned int flags;
110
111 spinlock_t requests_lock; /* Lock accesses to tx.requests, tx.creq and rcv.creq when STREAM mode */
112
113 void (*data_ready)(struct sock* sk);
114 void (*error_report)(struct sock* sk);
115 void (*write_space)(struct sock* sk); /* STREAM mode only */
116 struct {
117 struct work_struct tq; /* STREAM/DGRAM: data/error ready */
118 struct ncp_request_reply* creq; /* STREAM/DGRAM: awaiting reply from this request */
119 struct mutex creq_mutex; /* DGRAM only: lock accesses to rcv.creq */
120
121 unsigned int state; /* STREAM only: receiver state */
122 struct {
123 __u32 magic __packed;
124 __u32 len __packed;
125 __u16 type __packed;
126 __u16 p1 __packed;
127 __u16 p2 __packed;
128 __u16 p3 __packed;
129 __u16 type2 __packed;
130 } buf; /* STREAM only: temporary buffer */
131 unsigned char* ptr; /* STREAM only: pointer to data */
132 size_t len; /* STREAM only: length of data to receive */
133 } rcv;
134 struct {
135 struct list_head requests; /* STREAM only: queued requests */
136 struct work_struct tq; /* STREAM only: transmitter ready */
137 struct ncp_request_reply* creq; /* STREAM only: currently transmitted entry */
138 } tx;
139 struct timer_list timeout_tm; /* DGRAM only: timeout timer */
140 struct work_struct timeout_tq; /* DGRAM only: associated queue, we run timers from process context */
141 int timeout_last; /* DGRAM only: current timeout length */
142 int timeout_retries; /* DGRAM only: retries left */
143 struct {
144 size_t len;
145 __u8 data[128];
146 } unexpected_packet;
147};
148
149extern void ncp_tcp_rcv_proc(struct work_struct *work);
150extern void ncp_tcp_tx_proc(struct work_struct *work);
151extern void ncpdgram_rcv_proc(struct work_struct *work);
152extern void ncpdgram_timeout_proc(struct work_struct *work);
153extern void ncpdgram_timeout_call(struct timer_list *t);
154extern void ncp_tcp_data_ready(struct sock* sk);
155extern void ncp_tcp_write_space(struct sock* sk);
156extern void ncp_tcp_error_report(struct sock* sk);
157
158#define NCP_FLAG_UTF8 1
159
160#define NCP_CLR_FLAG(server, flag) ((server)->flags &= ~(flag))
161#define NCP_SET_FLAG(server, flag) ((server)->flags |= (flag))
162#define NCP_IS_FLAG(server, flag) ((server)->flags & (flag))
163
164static inline int ncp_conn_valid(struct ncp_server *server)
165{
166 return ((server->conn_status & 0x11) == 0);
167}
168
169static inline void ncp_invalidate_conn(struct ncp_server *server)
170{
171 server->conn_status |= 0x01;
172}
173
174#endif
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
deleted file mode 100644
index 804adfebba2f..000000000000
--- a/fs/ncpfs/ncplib_kernel.c
+++ /dev/null
@@ -1,1322 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ncplib_kernel.c
4 *
5 * Copyright (C) 1995, 1996 by Volker Lendecke
6 * Modified for big endian by J.F. Chadima and David S. Miller
7 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
8 * Modified 1999 Wolfram Pienkoss for NLS
9 * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include "ncp_fs.h"
16
17static inline void assert_server_locked(struct ncp_server *server)
18{
19 if (server->lock == 0) {
20 ncp_dbg(1, "server not locked!\n");
21 }
22}
23
24static void ncp_add_byte(struct ncp_server *server, __u8 x)
25{
26 assert_server_locked(server);
27 *(__u8 *) (&(server->packet[server->current_size])) = x;
28 server->current_size += 1;
29 return;
30}
31
32static void ncp_add_word(struct ncp_server *server, __le16 x)
33{
34 assert_server_locked(server);
35 put_unaligned(x, (__le16 *) (&(server->packet[server->current_size])));
36 server->current_size += 2;
37 return;
38}
39
40static void ncp_add_be16(struct ncp_server *server, __u16 x)
41{
42 assert_server_locked(server);
43 put_unaligned(cpu_to_be16(x), (__be16 *) (&(server->packet[server->current_size])));
44 server->current_size += 2;
45}
46
47static void ncp_add_dword(struct ncp_server *server, __le32 x)
48{
49 assert_server_locked(server);
50 put_unaligned(x, (__le32 *) (&(server->packet[server->current_size])));
51 server->current_size += 4;
52 return;
53}
54
55static void ncp_add_be32(struct ncp_server *server, __u32 x)
56{
57 assert_server_locked(server);
58 put_unaligned(cpu_to_be32(x), (__be32 *)(&(server->packet[server->current_size])));
59 server->current_size += 4;
60}
61
62static inline void ncp_add_dword_lh(struct ncp_server *server, __u32 x) {
63 ncp_add_dword(server, cpu_to_le32(x));
64}
65
66static void ncp_add_mem(struct ncp_server *server, const void *source, int size)
67{
68 assert_server_locked(server);
69 memcpy(&(server->packet[server->current_size]), source, size);
70 server->current_size += size;
71 return;
72}
73
74static void ncp_add_pstring(struct ncp_server *server, const char *s)
75{
76 int len = strlen(s);
77 assert_server_locked(server);
78 if (len > 255) {
79 ncp_dbg(1, "string too long: %s\n", s);
80 len = 255;
81 }
82 ncp_add_byte(server, len);
83 ncp_add_mem(server, s, len);
84 return;
85}
86
87static inline void ncp_init_request(struct ncp_server *server)
88{
89 ncp_lock_server(server);
90
91 server->current_size = sizeof(struct ncp_request_header);
92 server->has_subfunction = 0;
93}
94
95static inline void ncp_init_request_s(struct ncp_server *server, int subfunction)
96{
97 ncp_lock_server(server);
98
99 server->current_size = sizeof(struct ncp_request_header) + 2;
100 ncp_add_byte(server, subfunction);
101
102 server->has_subfunction = 1;
103}
104
105static inline char *
106ncp_reply_data(struct ncp_server *server, int offset)
107{
108 return &(server->packet[sizeof(struct ncp_reply_header) + offset]);
109}
110
111static inline u8 BVAL(const void *data)
112{
113 return *(const u8 *)data;
114}
115
116static u8 ncp_reply_byte(struct ncp_server *server, int offset)
117{
118 return *(const u8 *)ncp_reply_data(server, offset);
119}
120
121static inline u16 WVAL_LH(const void *data)
122{
123 return get_unaligned_le16(data);
124}
125
126static u16
127ncp_reply_le16(struct ncp_server *server, int offset)
128{
129 return get_unaligned_le16(ncp_reply_data(server, offset));
130}
131
132static u16
133ncp_reply_be16(struct ncp_server *server, int offset)
134{
135 return get_unaligned_be16(ncp_reply_data(server, offset));
136}
137
138static inline u32 DVAL_LH(const void *data)
139{
140 return get_unaligned_le32(data);
141}
142
143static __le32
144ncp_reply_dword(struct ncp_server *server, int offset)
145{
146 return get_unaligned((__le32 *)ncp_reply_data(server, offset));
147}
148
149static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) {
150 return le32_to_cpu(ncp_reply_dword(server, offset));
151}
152
153int
154ncp_negotiate_buffersize(struct ncp_server *server, int size, int *target)
155{
156 int result;
157
158 ncp_init_request(server);
159 ncp_add_be16(server, size);
160
161 if ((result = ncp_request(server, 33)) != 0) {
162 ncp_unlock_server(server);
163 return result;
164 }
165 *target = min_t(unsigned int, ncp_reply_be16(server, 0), size);
166
167 ncp_unlock_server(server);
168 return 0;
169}
170
171
172/* options:
173 * bit 0 ipx checksum
174 * bit 1 packet signing
175 */
176int
177ncp_negotiate_size_and_options(struct ncp_server *server,
178 int size, int options, int *ret_size, int *ret_options) {
179 int result;
180
181 /* there is minimum */
182 if (size < NCP_BLOCK_SIZE) size = NCP_BLOCK_SIZE;
183
184 ncp_init_request(server);
185 ncp_add_be16(server, size);
186 ncp_add_byte(server, options);
187
188 if ((result = ncp_request(server, 0x61)) != 0)
189 {
190 ncp_unlock_server(server);
191 return result;
192 }
193
194 /* NCP over UDP returns 0 (!!!) */
195 result = ncp_reply_be16(server, 0);
196 if (result >= NCP_BLOCK_SIZE)
197 size = min(result, size);
198 *ret_size = size;
199 *ret_options = ncp_reply_byte(server, 4);
200
201 ncp_unlock_server(server);
202 return 0;
203}
204
205int ncp_get_volume_info_with_number(struct ncp_server* server,
206 int n, struct ncp_volume_info* target) {
207 int result;
208 int len;
209
210 ncp_init_request_s(server, 44);
211 ncp_add_byte(server, n);
212
213 if ((result = ncp_request(server, 22)) != 0) {
214 goto out;
215 }
216 target->total_blocks = ncp_reply_dword_lh(server, 0);
217 target->free_blocks = ncp_reply_dword_lh(server, 4);
218 target->purgeable_blocks = ncp_reply_dword_lh(server, 8);
219 target->not_yet_purgeable_blocks = ncp_reply_dword_lh(server, 12);
220 target->total_dir_entries = ncp_reply_dword_lh(server, 16);
221 target->available_dir_entries = ncp_reply_dword_lh(server, 20);
222 target->sectors_per_block = ncp_reply_byte(server, 28);
223
224 memset(&(target->volume_name), 0, sizeof(target->volume_name));
225
226 result = -EIO;
227 len = ncp_reply_byte(server, 29);
228 if (len > NCP_VOLNAME_LEN) {
229 ncp_dbg(1, "volume name too long: %d\n", len);
230 goto out;
231 }
232 memcpy(&(target->volume_name), ncp_reply_data(server, 30), len);
233 result = 0;
234out:
235 ncp_unlock_server(server);
236 return result;
237}
238
239int ncp_get_directory_info(struct ncp_server* server, __u8 n,
240 struct ncp_volume_info* target) {
241 int result;
242 int len;
243
244 ncp_init_request_s(server, 45);
245 ncp_add_byte(server, n);
246
247 if ((result = ncp_request(server, 22)) != 0) {
248 goto out;
249 }
250 target->total_blocks = ncp_reply_dword_lh(server, 0);
251 target->free_blocks = ncp_reply_dword_lh(server, 4);
252 target->purgeable_blocks = 0;
253 target->not_yet_purgeable_blocks = 0;
254 target->total_dir_entries = ncp_reply_dword_lh(server, 8);
255 target->available_dir_entries = ncp_reply_dword_lh(server, 12);
256 target->sectors_per_block = ncp_reply_byte(server, 20);
257
258 memset(&(target->volume_name), 0, sizeof(target->volume_name));
259
260 result = -EIO;
261 len = ncp_reply_byte(server, 21);
262 if (len > NCP_VOLNAME_LEN) {
263 ncp_dbg(1, "volume name too long: %d\n", len);
264 goto out;
265 }
266 memcpy(&(target->volume_name), ncp_reply_data(server, 22), len);
267 result = 0;
268out:
269 ncp_unlock_server(server);
270 return result;
271}
272
273int
274ncp_close_file(struct ncp_server *server, const char *file_id)
275{
276 int result;
277
278 ncp_init_request(server);
279 ncp_add_byte(server, 0);
280 ncp_add_mem(server, file_id, 6);
281
282 result = ncp_request(server, 66);
283 ncp_unlock_server(server);
284 return result;
285}
286
287int
288ncp_make_closed(struct inode *inode)
289{
290 int err;
291
292 err = 0;
293 mutex_lock(&NCP_FINFO(inode)->open_mutex);
294 if (atomic_read(&NCP_FINFO(inode)->opened) == 1) {
295 atomic_set(&NCP_FINFO(inode)->opened, 0);
296 err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
297
298 if (!err)
299 ncp_vdbg("volnum=%d, dirent=%u, error=%d\n",
300 NCP_FINFO(inode)->volNumber,
301 NCP_FINFO(inode)->dirEntNum, err);
302 }
303 mutex_unlock(&NCP_FINFO(inode)->open_mutex);
304 return err;
305}
306
307static void ncp_add_handle_path(struct ncp_server *server, __u8 vol_num,
308 __le32 dir_base, int have_dir_base,
309 const char *path)
310{
311 ncp_add_byte(server, vol_num);
312 ncp_add_dword(server, dir_base);
313 if (have_dir_base != 0) {
314 ncp_add_byte(server, 1); /* dir_base */
315 } else {
316 ncp_add_byte(server, 0xff); /* no handle */
317 }
318 if (path != NULL) {
319 ncp_add_byte(server, 1); /* 1 component */
320 ncp_add_pstring(server, path);
321 } else {
322 ncp_add_byte(server, 0);
323 }
324}
325
326int ncp_dirhandle_alloc(struct ncp_server* server, __u8 volnum, __le32 dirent,
327 __u8* dirhandle) {
328 int result;
329
330 ncp_init_request(server);
331 ncp_add_byte(server, 12); /* subfunction */
332 ncp_add_byte(server, NW_NS_DOS);
333 ncp_add_byte(server, 0);
334 ncp_add_word(server, 0);
335 ncp_add_handle_path(server, volnum, dirent, 1, NULL);
336 if ((result = ncp_request(server, 87)) == 0) {
337 *dirhandle = ncp_reply_byte(server, 0);
338 }
339 ncp_unlock_server(server);
340 return result;
341}
342
343int ncp_dirhandle_free(struct ncp_server* server, __u8 dirhandle) {
344 int result;
345
346 ncp_init_request_s(server, 20);
347 ncp_add_byte(server, dirhandle);
348 result = ncp_request(server, 22);
349 ncp_unlock_server(server);
350 return result;
351}
352
353void ncp_extract_file_info(const void *structure, struct nw_info_struct *target)
354{
355 const __u8 *name_len;
356 const int info_struct_size = offsetof(struct nw_info_struct, nameLen);
357
358 memcpy(target, structure, info_struct_size);
359 name_len = structure + info_struct_size;
360 target->nameLen = *name_len;
361 memcpy(target->entryName, name_len + 1, *name_len);
362 target->entryName[*name_len] = '\0';
363 target->volNumber = le32_to_cpu(target->volNumber);
364 return;
365}
366
367#ifdef CONFIG_NCPFS_NFS_NS
368static inline void ncp_extract_nfs_info(const unsigned char *structure,
369 struct nw_nfs_info *target)
370{
371 target->mode = DVAL_LH(structure);
372 target->rdev = DVAL_LH(structure + 8);
373}
374#endif
375
376int ncp_obtain_nfs_info(struct ncp_server *server,
377 struct nw_info_struct *target)
378
379{
380 int result = 0;
381#ifdef CONFIG_NCPFS_NFS_NS
382 __u32 volnum = target->volNumber;
383
384 if (ncp_is_nfs_extras(server, volnum)) {
385 ncp_init_request(server);
386 ncp_add_byte(server, 19); /* subfunction */
387 ncp_add_byte(server, server->name_space[volnum]);
388 ncp_add_byte(server, NW_NS_NFS);
389 ncp_add_byte(server, 0);
390 ncp_add_byte(server, volnum);
391 ncp_add_dword(server, target->dirEntNum);
392 /* We must retrieve both nlinks and rdev, otherwise some server versions
393 report zeroes instead of valid data */
394 ncp_add_dword_lh(server, NSIBM_NFS_MODE | NSIBM_NFS_NLINKS | NSIBM_NFS_RDEV);
395
396 if ((result = ncp_request(server, 87)) == 0) {
397 ncp_extract_nfs_info(ncp_reply_data(server, 0), &target->nfs);
398 ncp_dbg(1, "(%s) mode=0%o, rdev=0x%x\n",
399 target->entryName, target->nfs.mode,
400 target->nfs.rdev);
401 } else {
402 target->nfs.mode = 0;
403 target->nfs.rdev = 0;
404 }
405 ncp_unlock_server(server);
406
407 } else
408#endif
409 {
410 target->nfs.mode = 0;
411 target->nfs.rdev = 0;
412 }
413 return result;
414}
415
416/*
417 * Returns information for a (one-component) name relative to
418 * the specified directory.
419 */
420int ncp_obtain_info(struct ncp_server *server, struct inode *dir, const char *path,
421 struct nw_info_struct *target)
422{
423 __u8 volnum = NCP_FINFO(dir)->volNumber;
424 __le32 dirent = NCP_FINFO(dir)->dirEntNum;
425 int result;
426
427 if (target == NULL) {
428 pr_err("%s: invalid call\n", __func__);
429 return -EINVAL;
430 }
431 ncp_init_request(server);
432 ncp_add_byte(server, 6); /* subfunction */
433 ncp_add_byte(server, server->name_space[volnum]);
434 ncp_add_byte(server, server->name_space[volnum]); /* N.B. twice ?? */
435 ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */
436 ncp_add_dword(server, RIM_ALL);
437 ncp_add_handle_path(server, volnum, dirent, 1, path);
438
439 if ((result = ncp_request(server, 87)) != 0)
440 goto out;
441 ncp_extract_file_info(ncp_reply_data(server, 0), target);
442 ncp_unlock_server(server);
443
444 result = ncp_obtain_nfs_info(server, target);
445 return result;
446
447out:
448 ncp_unlock_server(server);
449 return result;
450}
451
452#ifdef CONFIG_NCPFS_NFS_NS
453static int
454ncp_obtain_DOS_dir_base(struct ncp_server *server,
455 __u8 ns, __u8 volnum, __le32 dirent,
456 const char *path, /* At most 1 component */
457 __le32 *DOS_dir_base)
458{
459 int result;
460
461 ncp_init_request(server);
462 ncp_add_byte(server, 6); /* subfunction */
463 ncp_add_byte(server, ns);
464 ncp_add_byte(server, ns);
465 ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */
466 ncp_add_dword(server, RIM_DIRECTORY);
467 ncp_add_handle_path(server, volnum, dirent, 1, path);
468
469 if ((result = ncp_request(server, 87)) == 0)
470 {
471 if (DOS_dir_base) *DOS_dir_base=ncp_reply_dword(server, 0x34);
472 }
473 ncp_unlock_server(server);
474 return result;
475}
476#endif /* CONFIG_NCPFS_NFS_NS */
477
478static inline int
479ncp_get_known_namespace(struct ncp_server *server, __u8 volume)
480{
481#if defined(CONFIG_NCPFS_OS2_NS) || defined(CONFIG_NCPFS_NFS_NS)
482 int result;
483 __u8 *namespace;
484 __u16 no_namespaces;
485
486 ncp_init_request(server);
487 ncp_add_byte(server, 24); /* Subfunction: Get Name Spaces Loaded */
488 ncp_add_word(server, 0);
489 ncp_add_byte(server, volume);
490
491 if ((result = ncp_request(server, 87)) != 0) {
492 ncp_unlock_server(server);
493 return NW_NS_DOS; /* not result ?? */
494 }
495
496 result = NW_NS_DOS;
497 no_namespaces = ncp_reply_le16(server, 0);
498 namespace = ncp_reply_data(server, 2);
499
500 while (no_namespaces > 0) {
501 ncp_dbg(1, "found %d on %d\n", *namespace, volume);
502
503#ifdef CONFIG_NCPFS_NFS_NS
504 if ((*namespace == NW_NS_NFS) && !(server->m.flags&NCP_MOUNT_NO_NFS))
505 {
506 result = NW_NS_NFS;
507 break;
508 }
509#endif /* CONFIG_NCPFS_NFS_NS */
510#ifdef CONFIG_NCPFS_OS2_NS
511 if ((*namespace == NW_NS_OS2) && !(server->m.flags&NCP_MOUNT_NO_OS2))
512 {
513 result = NW_NS_OS2;
514 }
515#endif /* CONFIG_NCPFS_OS2_NS */
516 namespace += 1;
517 no_namespaces -= 1;
518 }
519 ncp_unlock_server(server);
520 return result;
521#else /* neither OS2 nor NFS - only DOS */
522 return NW_NS_DOS;
523#endif /* defined(CONFIG_NCPFS_OS2_NS) || defined(CONFIG_NCPFS_NFS_NS) */
524}
525
526int
527ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns)
528{
529 int ns = ncp_get_known_namespace(server, volume);
530
531 if (ret_ns)
532 *ret_ns = ns;
533
534 ncp_dbg(1, "namespace[%d] = %d\n", volume, server->name_space[volume]);
535
536 if (server->name_space[volume] == ns)
537 return 0;
538 server->name_space[volume] = ns;
539 return 1;
540}
541
542static int
543ncp_ObtainSpecificDirBase(struct ncp_server *server,
544 __u8 nsSrc, __u8 nsDst, __u8 vol_num, __le32 dir_base,
545 const char *path, /* At most 1 component */
546 __le32 *dirEntNum, __le32 *DosDirNum)
547{
548 int result;
549
550 ncp_init_request(server);
551 ncp_add_byte(server, 6); /* subfunction */
552 ncp_add_byte(server, nsSrc);
553 ncp_add_byte(server, nsDst);
554 ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */
555 ncp_add_dword(server, RIM_ALL);
556 ncp_add_handle_path(server, vol_num, dir_base, 1, path);
557
558 if ((result = ncp_request(server, 87)) != 0)
559 {
560 ncp_unlock_server(server);
561 return result;
562 }
563
564 if (dirEntNum)
565 *dirEntNum = ncp_reply_dword(server, 0x30);
566 if (DosDirNum)
567 *DosDirNum = ncp_reply_dword(server, 0x34);
568 ncp_unlock_server(server);
569 return 0;
570}
571
572int
573ncp_mount_subdir(struct ncp_server *server,
574 __u8 volNumber, __u8 srcNS, __le32 dirEntNum,
575 __u32* volume, __le32* newDirEnt, __le32* newDosEnt)
576{
577 int dstNS;
578 int result;
579
580 ncp_update_known_namespace(server, volNumber, &dstNS);
581 if ((result = ncp_ObtainSpecificDirBase(server, srcNS, dstNS, volNumber,
582 dirEntNum, NULL, newDirEnt, newDosEnt)) != 0)
583 {
584 return result;
585 }
586 *volume = volNumber;
587 server->m.mounted_vol[1] = 0;
588 server->m.mounted_vol[0] = 'X';
589 return 0;
590}
591
592int
593ncp_get_volume_root(struct ncp_server *server,
594 const char *volname, __u32* volume, __le32* dirent, __le32* dosdirent)
595{
596 int result;
597
598 ncp_dbg(1, "looking up vol %s\n", volname);
599
600 ncp_init_request(server);
601 ncp_add_byte(server, 22); /* Subfunction: Generate dir handle */
602 ncp_add_byte(server, 0); /* DOS namespace */
603 ncp_add_byte(server, 0); /* reserved */
604 ncp_add_byte(server, 0); /* reserved */
605 ncp_add_byte(server, 0); /* reserved */
606
607 ncp_add_byte(server, 0); /* faked volume number */
608 ncp_add_dword(server, 0); /* faked dir_base */
609 ncp_add_byte(server, 0xff); /* Don't have a dir_base */
610 ncp_add_byte(server, 1); /* 1 path component */
611 ncp_add_pstring(server, volname);
612
613 if ((result = ncp_request(server, 87)) != 0) {
614 ncp_unlock_server(server);
615 return result;
616 }
617 *dirent = *dosdirent = ncp_reply_dword(server, 4);
618 *volume = ncp_reply_byte(server, 8);
619 ncp_unlock_server(server);
620 return 0;
621}
622
623int
624ncp_lookup_volume(struct ncp_server *server,
625 const char *volname, struct nw_info_struct *target)
626{
627 int result;
628
629 memset(target, 0, sizeof(*target));
630 result = ncp_get_volume_root(server, volname,
631 &target->volNumber, &target->dirEntNum, &target->DosDirNum);
632 if (result) {
633 return result;
634 }
635 ncp_update_known_namespace(server, target->volNumber, NULL);
636 target->nameLen = strlen(volname);
637 memcpy(target->entryName, volname, target->nameLen+1);
638 target->attributes = aDIR;
639 /* set dates to Jan 1, 1986 00:00 */
640 target->creationTime = target->modifyTime = cpu_to_le16(0x0000);
641 target->creationDate = target->modifyDate = target->lastAccessDate = cpu_to_le16(0x0C21);
642 target->nfs.mode = 0;
643 return 0;
644}
645
646int ncp_modify_file_or_subdir_dos_info_path(struct ncp_server *server,
647 struct inode *dir,
648 const char *path,
649 __le32 info_mask,
650 const struct nw_modify_dos_info *info)
651{
652 __u8 volnum = NCP_FINFO(dir)->volNumber;
653 __le32 dirent = NCP_FINFO(dir)->dirEntNum;
654 int result;
655
656 ncp_init_request(server);
657 ncp_add_byte(server, 7); /* subfunction */
658 ncp_add_byte(server, server->name_space[volnum]);
659 ncp_add_byte(server, 0); /* reserved */
660 ncp_add_word(server, cpu_to_le16(0x8006)); /* search attribs: all */
661
662 ncp_add_dword(server, info_mask);
663 ncp_add_mem(server, info, sizeof(*info));
664 ncp_add_handle_path(server, volnum, dirent, 1, path);
665
666 result = ncp_request(server, 87);
667 ncp_unlock_server(server);
668 return result;
669}
670
671int ncp_modify_file_or_subdir_dos_info(struct ncp_server *server,
672 struct inode *dir,
673 __le32 info_mask,
674 const struct nw_modify_dos_info *info)
675{
676 return ncp_modify_file_or_subdir_dos_info_path(server, dir, NULL,
677 info_mask, info);
678}
679
680#ifdef CONFIG_NCPFS_NFS_NS
681int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent,
682 __u32 mode, __u32 rdev)
683
684{
685 int result = 0;
686
687 ncp_init_request(server);
688 if (server->name_space[volnum] == NW_NS_NFS) {
689 ncp_add_byte(server, 25); /* subfunction */
690 ncp_add_byte(server, server->name_space[volnum]);
691 ncp_add_byte(server, NW_NS_NFS);
692 ncp_add_byte(server, volnum);
693 ncp_add_dword(server, dirent);
694 /* we must always operate on both nlinks and rdev, otherwise
695 rdev is not set */
696 ncp_add_dword_lh(server, NSIBM_NFS_MODE | NSIBM_NFS_NLINKS | NSIBM_NFS_RDEV);
697 ncp_add_dword_lh(server, mode);
698 ncp_add_dword_lh(server, 1); /* nlinks */
699 ncp_add_dword_lh(server, rdev);
700 result = ncp_request(server, 87);
701 }
702 ncp_unlock_server(server);
703 return result;
704}
705#endif
706
707
708static int
709ncp_DeleteNSEntry(struct ncp_server *server,
710 __u8 have_dir_base, __u8 volnum, __le32 dirent,
711 const char* name, __u8 ns, __le16 attr)
712{
713 int result;
714
715 ncp_init_request(server);
716 ncp_add_byte(server, 8); /* subfunction */
717 ncp_add_byte(server, ns);
718 ncp_add_byte(server, 0); /* reserved */
719 ncp_add_word(server, attr); /* search attribs: all */
720 ncp_add_handle_path(server, volnum, dirent, have_dir_base, name);
721
722 result = ncp_request(server, 87);
723 ncp_unlock_server(server);
724 return result;
725}
726
727int
728ncp_del_file_or_subdir2(struct ncp_server *server,
729 struct dentry *dentry)
730{
731 struct inode *inode = d_inode(dentry);
732 __u8 volnum;
733 __le32 dirent;
734
735 if (!inode) {
736 return 0xFF; /* Any error */
737 }
738 volnum = NCP_FINFO(inode)->volNumber;
739 dirent = NCP_FINFO(inode)->DosDirNum;
740 return ncp_DeleteNSEntry(server, 1, volnum, dirent, NULL, NW_NS_DOS, cpu_to_le16(0x8006));
741}
742
743int
744ncp_del_file_or_subdir(struct ncp_server *server,
745 struct inode *dir, const char *name)
746{
747 __u8 volnum = NCP_FINFO(dir)->volNumber;
748 __le32 dirent = NCP_FINFO(dir)->dirEntNum;
749 int name_space;
750
751 name_space = server->name_space[volnum];
752#ifdef CONFIG_NCPFS_NFS_NS
753 if (name_space == NW_NS_NFS)
754 {
755 int result;
756
757 result=ncp_obtain_DOS_dir_base(server, name_space, volnum, dirent, name, &dirent);
758 if (result) return result;
759 name = NULL;
760 name_space = NW_NS_DOS;
761 }
762#endif /* CONFIG_NCPFS_NFS_NS */
763 return ncp_DeleteNSEntry(server, 1, volnum, dirent, name, name_space, cpu_to_le16(0x8006));
764}
765
766static inline void ConvertToNWfromDWORD(__u16 v0, __u16 v1, __u8 ret[6])
767{
768 __le16 *dest = (__le16 *) ret;
769 dest[1] = cpu_to_le16(v0);
770 dest[2] = cpu_to_le16(v1);
771 dest[0] = cpu_to_le16(v0 + 1);
772 return;
773}
774
775/* If both dir and name are NULL, then in target there's already a
776 looked-up entry that wants to be opened. */
777int ncp_open_create_file_or_subdir(struct ncp_server *server,
778 struct inode *dir, const char *name,
779 int open_create_mode,
780 __le32 create_attributes,
781 __le16 desired_acc_rights,
782 struct ncp_entry_info *target)
783{
784 __le16 search_attribs = cpu_to_le16(0x0006);
785 __u8 volnum;
786 __le32 dirent;
787 int result;
788
789 volnum = NCP_FINFO(dir)->volNumber;
790 dirent = NCP_FINFO(dir)->dirEntNum;
791
792 if ((create_attributes & aDIR) != 0) {
793 search_attribs |= cpu_to_le16(0x8000);
794 }
795 ncp_init_request(server);
796 ncp_add_byte(server, 1); /* subfunction */
797 ncp_add_byte(server, server->name_space[volnum]);
798 ncp_add_byte(server, open_create_mode);
799 ncp_add_word(server, search_attribs);
800 ncp_add_dword(server, RIM_ALL);
801 ncp_add_dword(server, create_attributes);
802 /* The desired acc rights seem to be the inherited rights mask
803 for directories */
804 ncp_add_word(server, desired_acc_rights);
805 ncp_add_handle_path(server, volnum, dirent, 1, name);
806
807 if ((result = ncp_request(server, 87)) != 0)
808 goto out;
809 if (!(create_attributes & aDIR))
810 target->opened = 1;
811
812 /* in target there's a new finfo to fill */
813 ncp_extract_file_info(ncp_reply_data(server, 6), &(target->i));
814 target->volume = target->i.volNumber;
815 ConvertToNWfromDWORD(ncp_reply_le16(server, 0),
816 ncp_reply_le16(server, 2),
817 target->file_handle);
818
819 ncp_unlock_server(server);
820
821 (void)ncp_obtain_nfs_info(server, &(target->i));
822 return 0;
823
824out:
825 ncp_unlock_server(server);
826 return result;
827}
828
829int
830ncp_initialize_search(struct ncp_server *server, struct inode *dir,
831 struct nw_search_sequence *target)
832{
833 __u8 volnum = NCP_FINFO(dir)->volNumber;
834 __le32 dirent = NCP_FINFO(dir)->dirEntNum;
835 int result;
836
837 ncp_init_request(server);
838 ncp_add_byte(server, 2); /* subfunction */
839 ncp_add_byte(server, server->name_space[volnum]);
840 ncp_add_byte(server, 0); /* reserved */
841 ncp_add_handle_path(server, volnum, dirent, 1, NULL);
842
843 result = ncp_request(server, 87);
844 if (result)
845 goto out;
846 memcpy(target, ncp_reply_data(server, 0), sizeof(*target));
847
848out:
849 ncp_unlock_server(server);
850 return result;
851}
852
853int ncp_search_for_fileset(struct ncp_server *server,
854 struct nw_search_sequence *seq,
855 int* more,
856 int* cnt,
857 char* buffer,
858 size_t bufsize,
859 char** rbuf,
860 size_t* rsize)
861{
862 int result;
863
864 ncp_init_request(server);
865 ncp_add_byte(server, 20);
866 ncp_add_byte(server, server->name_space[seq->volNumber]);
867 ncp_add_byte(server, 0); /* datastream */
868 ncp_add_word(server, cpu_to_le16(0x8006));
869 ncp_add_dword(server, RIM_ALL);
870 ncp_add_word(server, cpu_to_le16(32767)); /* max returned items */
871 ncp_add_mem(server, seq, 9);
872#ifdef CONFIG_NCPFS_NFS_NS
873 if (server->name_space[seq->volNumber] == NW_NS_NFS) {
874 ncp_add_byte(server, 0); /* 0 byte pattern */
875 } else
876#endif
877 {
878 ncp_add_byte(server, 2); /* 2 byte pattern */
879 ncp_add_byte(server, 0xff); /* following is a wildcard */
880 ncp_add_byte(server, '*');
881 }
882 result = ncp_request2(server, 87, buffer, bufsize);
883 if (result) {
884 ncp_unlock_server(server);
885 return result;
886 }
887 if (server->ncp_reply_size < 12) {
888 ncp_unlock_server(server);
889 return 0xFF;
890 }
891 *rsize = server->ncp_reply_size - 12;
892 ncp_unlock_server(server);
893 buffer = buffer + sizeof(struct ncp_reply_header);
894 *rbuf = buffer + 12;
895 *cnt = WVAL_LH(buffer + 10);
896 *more = BVAL(buffer + 9);
897 memcpy(seq, buffer, 9);
898 return 0;
899}
900
901static int
902ncp_RenameNSEntry(struct ncp_server *server,
903 struct inode *old_dir, const char *old_name, __le16 old_type,
904 struct inode *new_dir, const char *new_name)
905{
906 int result = -EINVAL;
907
908 if ((old_dir == NULL) || (old_name == NULL) ||
909 (new_dir == NULL) || (new_name == NULL))
910 goto out;
911
912 ncp_init_request(server);
913 ncp_add_byte(server, 4); /* subfunction */
914 ncp_add_byte(server, server->name_space[NCP_FINFO(old_dir)->volNumber]);
915 ncp_add_byte(server, 1); /* rename flag */
916 ncp_add_word(server, old_type); /* search attributes */
917
918 /* source Handle Path */
919 ncp_add_byte(server, NCP_FINFO(old_dir)->volNumber);
920 ncp_add_dword(server, NCP_FINFO(old_dir)->dirEntNum);
921 ncp_add_byte(server, 1);
922 ncp_add_byte(server, 1); /* 1 source component */
923
924 /* dest Handle Path */
925 ncp_add_byte(server, NCP_FINFO(new_dir)->volNumber);
926 ncp_add_dword(server, NCP_FINFO(new_dir)->dirEntNum);
927 ncp_add_byte(server, 1);
928 ncp_add_byte(server, 1); /* 1 destination component */
929
930 /* source path string */
931 ncp_add_pstring(server, old_name);
932 /* dest path string */
933 ncp_add_pstring(server, new_name);
934
935 result = ncp_request(server, 87);
936 ncp_unlock_server(server);
937out:
938 return result;
939}
940
941int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server,
942 struct inode *old_dir, const char *old_name,
943 struct inode *new_dir, const char *new_name)
944{
945 int result;
946 __le16 old_type = cpu_to_le16(0x06);
947
948/* If somebody can do it atomic, call me... vandrove@vc.cvut.cz */
949 result = ncp_RenameNSEntry(server, old_dir, old_name, old_type,
950 new_dir, new_name);
951 if (result == 0xFF) /* File Not Found, try directory */
952 {
953 old_type = cpu_to_le16(0x16);
954 result = ncp_RenameNSEntry(server, old_dir, old_name, old_type,
955 new_dir, new_name);
956 }
957 if (result != 0x92) return result; /* All except NO_FILES_RENAMED */
958 result = ncp_del_file_or_subdir(server, new_dir, new_name);
959 if (result != 0) return -EACCES;
960 result = ncp_RenameNSEntry(server, old_dir, old_name, old_type,
961 new_dir, new_name);
962 return result;
963}
964
965
966/* We have to transfer to/from user space */
967int
968ncp_read_kernel(struct ncp_server *server, const char *file_id,
969 __u32 offset, __u16 to_read, char *target, int *bytes_read)
970{
971 const char *source;
972 int result;
973
974 ncp_init_request(server);
975 ncp_add_byte(server, 0);
976 ncp_add_mem(server, file_id, 6);
977 ncp_add_be32(server, offset);
978 ncp_add_be16(server, to_read);
979
980 if ((result = ncp_request(server, 72)) != 0) {
981 goto out;
982 }
983 *bytes_read = ncp_reply_be16(server, 0);
984 source = ncp_reply_data(server, 2 + (offset & 1));
985
986 memcpy(target, source, *bytes_read);
987out:
988 ncp_unlock_server(server);
989 return result;
990}
991
992/* There is a problem... egrep and some other silly tools do:
993 x = mmap(NULL, MAP_PRIVATE, PROT_READ|PROT_WRITE, <ncpfs fd>, 32768);
994 read(<ncpfs fd>, x, 32768);
995 Now copying read result by copy_to_user causes pagefault. This pagefault
996 could not be handled because of server was locked due to read. So we have
997 to use temporary buffer. So ncp_unlock_server must be done before
998 copy_to_user (and for write, copy_from_user must be done before
999 ncp_init_request... same applies for send raw packet ioctl). Because of
1000 file is normally read in bigger chunks, caller provides kmalloced
1001 (vmalloced) chunk of memory with size >= to_read...
1002 */
1003int
1004ncp_read_bounce(struct ncp_server *server, const char *file_id,
1005 __u32 offset, __u16 to_read, struct iov_iter *to,
1006 int *bytes_read, void *bounce, __u32 bufsize)
1007{
1008 int result;
1009
1010 ncp_init_request(server);
1011 ncp_add_byte(server, 0);
1012 ncp_add_mem(server, file_id, 6);
1013 ncp_add_be32(server, offset);
1014 ncp_add_be16(server, to_read);
1015 result = ncp_request2(server, 72, bounce, bufsize);
1016 ncp_unlock_server(server);
1017 if (!result) {
1018 int len = get_unaligned_be16((char *)bounce +
1019 sizeof(struct ncp_reply_header));
1020 result = -EIO;
1021 if (len <= to_read) {
1022 char* source;
1023
1024 source = (char*)bounce +
1025 sizeof(struct ncp_reply_header) + 2 +
1026 (offset & 1);
1027 *bytes_read = len;
1028 result = 0;
1029 if (copy_to_iter(source, len, to) != len)
1030 result = -EFAULT;
1031 }
1032 }
1033 return result;
1034}
1035
1036int
1037ncp_write_kernel(struct ncp_server *server, const char *file_id,
1038 __u32 offset, __u16 to_write,
1039 const char *source, int *bytes_written)
1040{
1041 int result;
1042
1043 ncp_init_request(server);
1044 ncp_add_byte(server, 0);
1045 ncp_add_mem(server, file_id, 6);
1046 ncp_add_be32(server, offset);
1047 ncp_add_be16(server, to_write);
1048 ncp_add_mem(server, source, to_write);
1049
1050 if ((result = ncp_request(server, 73)) == 0)
1051 *bytes_written = to_write;
1052 ncp_unlock_server(server);
1053 return result;
1054}
1055
1056#ifdef CONFIG_NCPFS_IOCTL_LOCKING
1057int
1058ncp_LogPhysicalRecord(struct ncp_server *server, const char *file_id,
1059 __u8 locktype, __u32 offset, __u32 length, __u16 timeout)
1060{
1061 int result;
1062
1063 ncp_init_request(server);
1064 ncp_add_byte(server, locktype);
1065 ncp_add_mem(server, file_id, 6);
1066 ncp_add_be32(server, offset);
1067 ncp_add_be32(server, length);
1068 ncp_add_be16(server, timeout);
1069
1070 if ((result = ncp_request(server, 0x1A)) != 0)
1071 {
1072 ncp_unlock_server(server);
1073 return result;
1074 }
1075 ncp_unlock_server(server);
1076 return 0;
1077}
1078
1079int
1080ncp_ClearPhysicalRecord(struct ncp_server *server, const char *file_id,
1081 __u32 offset, __u32 length)
1082{
1083 int result;
1084
1085 ncp_init_request(server);
1086 ncp_add_byte(server, 0); /* who knows... lanalyzer says that */
1087 ncp_add_mem(server, file_id, 6);
1088 ncp_add_be32(server, offset);
1089 ncp_add_be32(server, length);
1090
1091 if ((result = ncp_request(server, 0x1E)) != 0)
1092 {
1093 ncp_unlock_server(server);
1094 return result;
1095 }
1096 ncp_unlock_server(server);
1097 return 0;
1098}
1099#endif /* CONFIG_NCPFS_IOCTL_LOCKING */
1100
1101#ifdef CONFIG_NCPFS_NLS
1102/* This are the NLS conversion routines with inspirations and code parts
1103 * from the vfat file system and hints from Petr Vandrovec.
1104 */
1105
1106int
1107ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen,
1108 const unsigned char *iname, unsigned int ilen, int cc)
1109{
1110 struct nls_table *in = server->nls_io;
1111 struct nls_table *out = server->nls_vol;
1112 unsigned char *vname_start;
1113 unsigned char *vname_end;
1114 const unsigned char *iname_end;
1115
1116 iname_end = iname + ilen;
1117 vname_start = vname;
1118 vname_end = vname + *vlen - 1;
1119
1120 while (iname < iname_end) {
1121 int chl;
1122 wchar_t ec;
1123
1124 if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
1125 int k;
1126 unicode_t u;
1127
1128 k = utf8_to_utf32(iname, iname_end - iname, &u);
1129 if (k < 0 || u > MAX_WCHAR_T)
1130 return -EINVAL;
1131 iname += k;
1132 ec = u;
1133 } else {
1134 if (*iname == NCP_ESC) {
1135 int k;
1136
1137 if (iname_end - iname < 5)
1138 goto nospec;
1139
1140 ec = 0;
1141 for (k = 1; k < 5; k++) {
1142 unsigned char nc;
1143
1144 nc = iname[k] - '0';
1145 if (nc >= 10) {
1146 nc -= 'A' - '0' - 10;
1147 if ((nc < 10) || (nc > 15)) {
1148 goto nospec;
1149 }
1150 }
1151 ec = (ec << 4) | nc;
1152 }
1153 iname += 5;
1154 } else {
1155nospec:;
1156 if ( (chl = in->char2uni(iname, iname_end - iname, &ec)) < 0)
1157 return chl;
1158 iname += chl;
1159 }
1160 }
1161
1162 /* unitoupper should be here! */
1163
1164 chl = out->uni2char(ec, vname, vname_end - vname);
1165 if (chl < 0)
1166 return chl;
1167
1168 /* this is wrong... */
1169 if (cc) {
1170 int chi;
1171
1172 for (chi = 0; chi < chl; chi++){
1173 vname[chi] = ncp_toupper(out, vname[chi]);
1174 }
1175 }
1176 vname += chl;
1177 }
1178
1179 *vname = 0;
1180 *vlen = vname - vname_start;
1181 return 0;
1182}
1183
1184int
1185ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen,
1186 const unsigned char *vname, unsigned int vlen, int cc)
1187{
1188 struct nls_table *in = server->nls_vol;
1189 struct nls_table *out = server->nls_io;
1190 const unsigned char *vname_end;
1191 unsigned char *iname_start;
1192 unsigned char *iname_end;
1193 unsigned char *vname_cc;
1194 int err;
1195
1196 vname_cc = NULL;
1197
1198 if (cc) {
1199 int i;
1200
1201 /* this is wrong! */
1202 vname_cc = kmalloc(vlen, GFP_KERNEL);
1203 if (!vname_cc)
1204 return -ENOMEM;
1205 for (i = 0; i < vlen; i++)
1206 vname_cc[i] = ncp_tolower(in, vname[i]);
1207 vname = vname_cc;
1208 }
1209
1210 iname_start = iname;
1211 iname_end = iname + *ilen - 1;
1212 vname_end = vname + vlen;
1213
1214 while (vname < vname_end) {
1215 wchar_t ec;
1216 int chl;
1217
1218 if ( (chl = in->char2uni(vname, vname_end - vname, &ec)) < 0) {
1219 err = chl;
1220 goto quit;
1221 }
1222 vname += chl;
1223
1224 /* unitolower should be here! */
1225
1226 if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
1227 int k;
1228
1229 k = utf32_to_utf8(ec, iname, iname_end - iname);
1230 if (k < 0) {
1231 err = -ENAMETOOLONG;
1232 goto quit;
1233 }
1234 iname += k;
1235 } else {
1236 if ( (chl = out->uni2char(ec, iname, iname_end - iname)) >= 0) {
1237 iname += chl;
1238 } else {
1239 int k;
1240
1241 if (iname_end - iname < 5) {
1242 err = -ENAMETOOLONG;
1243 goto quit;
1244 }
1245 *iname = NCP_ESC;
1246 for (k = 4; k > 0; k--) {
1247 unsigned char v;
1248
1249 v = (ec & 0xF) + '0';
1250 if (v > '9') {
1251 v += 'A' - '9' - 1;
1252 }
1253 iname[k] = v;
1254 ec >>= 4;
1255 }
1256 iname += 5;
1257 }
1258 }
1259 }
1260
1261 *iname = 0;
1262 *ilen = iname - iname_start;
1263 err = 0;
1264quit:;
1265 if (cc)
1266 kfree(vname_cc);
1267 return err;
1268}
1269
1270#else
1271
1272int
1273ncp__io2vol(unsigned char *vname, unsigned int *vlen,
1274 const unsigned char *iname, unsigned int ilen, int cc)
1275{
1276 int i;
1277
1278 if (*vlen <= ilen)
1279 return -ENAMETOOLONG;
1280
1281 if (cc)
1282 for (i = 0; i < ilen; i++) {
1283 *vname = toupper(*iname);
1284 vname++;
1285 iname++;
1286 }
1287 else {
1288 memmove(vname, iname, ilen);
1289 vname += ilen;
1290 }
1291
1292 *vlen = ilen;
1293 *vname = 0;
1294 return 0;
1295}
1296
1297int
1298ncp__vol2io(unsigned char *iname, unsigned int *ilen,
1299 const unsigned char *vname, unsigned int vlen, int cc)
1300{
1301 int i;
1302
1303 if (*ilen <= vlen)
1304 return -ENAMETOOLONG;
1305
1306 if (cc)
1307 for (i = 0; i < vlen; i++) {
1308 *iname = tolower(*vname);
1309 iname++;
1310 vname++;
1311 }
1312 else {
1313 memmove(iname, vname, vlen);
1314 iname += vlen;
1315 }
1316
1317 *ilen = vlen;
1318 *iname = 0;
1319 return 0;
1320}
1321
1322#endif
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
deleted file mode 100644
index aaae8aa9bf7d..000000000000
--- a/fs/ncpfs/ncplib_kernel.h
+++ /dev/null
@@ -1,215 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * ncplib_kernel.h
4 *
5 * Copyright (C) 1995, 1996 by Volker Lendecke
6 * Modified for big endian by J.F. Chadima and David S. Miller
7 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
8 * Modified 1998, 1999 Wolfram Pienkoss for NLS
9 * Modified 1999 Wolfram Pienkoss for directory caching
10 *
11 */
12
13#ifndef _NCPLIB_H
14#define _NCPLIB_H
15
16
17#include <linux/fs.h>
18#include <linux/types.h>
19#include <linux/errno.h>
20#include <linux/slab.h>
21#include <linux/stat.h>
22#include <linux/fcntl.h>
23#include <linux/pagemap.h>
24
25#include <linux/uaccess.h>
26#include <asm/byteorder.h>
27#include <asm/unaligned.h>
28#include <asm/string.h>
29
30#ifdef CONFIG_NCPFS_NLS
31#include <linux/nls.h>
32#else
33#include <linux/ctype.h>
34#endif /* CONFIG_NCPFS_NLS */
35
36#define NCP_MIN_SYMLINK_SIZE 8
37#define NCP_MAX_SYMLINK_SIZE 512
38
39#define NCP_BLOCK_SHIFT 9
40#define NCP_BLOCK_SIZE (1 << (NCP_BLOCK_SHIFT))
41
42int ncp_negotiate_buffersize(struct ncp_server *, int, int *);
43int ncp_negotiate_size_and_options(struct ncp_server *server, int size,
44 int options, int *ret_size, int *ret_options);
45
46int ncp_get_volume_info_with_number(struct ncp_server* server, int n,
47 struct ncp_volume_info *target);
48
49int ncp_get_directory_info(struct ncp_server* server, __u8 dirhandle,
50 struct ncp_volume_info* target);
51
52int ncp_close_file(struct ncp_server *, const char *);
53static inline int ncp_read_bounce_size(__u32 size) {
54 return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8;
55};
56int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16,
57 struct iov_iter *, int *, void *bounce, __u32 bouncelen);
58int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16,
59 char *, int *);
60int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16,
61 const char *, int *);
62
63static inline void ncp_inode_close(struct inode *inode) {
64 atomic_dec(&NCP_FINFO(inode)->opened);
65}
66
67void ncp_extract_file_info(const void* src, struct nw_info_struct* target);
68int ncp_obtain_info(struct ncp_server *server, struct inode *, const char *,
69 struct nw_info_struct *target);
70int ncp_obtain_nfs_info(struct ncp_server *server, struct nw_info_struct *target);
71int ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns);
72int ncp_get_volume_root(struct ncp_server *server, const char *volname,
73 __u32 *volume, __le32 *dirent, __le32 *dosdirent);
74int ncp_lookup_volume(struct ncp_server *, const char *, struct nw_info_struct *);
75int ncp_modify_file_or_subdir_dos_info(struct ncp_server *, struct inode *,
76 __le32, const struct nw_modify_dos_info *info);
77int ncp_modify_file_or_subdir_dos_info_path(struct ncp_server *, struct inode *,
78 const char* path, __le32, const struct nw_modify_dos_info *info);
79int ncp_modify_nfs_info(struct ncp_server *, __u8 volnum, __le32 dirent,
80 __u32 mode, __u32 rdev);
81
82int ncp_del_file_or_subdir2(struct ncp_server *, struct dentry*);
83int ncp_del_file_or_subdir(struct ncp_server *, struct inode *, const char *);
84int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, const char *,
85 int, __le32, __le16, struct ncp_entry_info *);
86
87int ncp_initialize_search(struct ncp_server *, struct inode *,
88 struct nw_search_sequence *target);
89int ncp_search_for_fileset(struct ncp_server *server,
90 struct nw_search_sequence *seq,
91 int* more, int* cnt,
92 char* buffer, size_t bufsize,
93 char** rbuf, size_t* rsize);
94
95int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server,
96 struct inode *, const char *, struct inode *, const char *);
97
98
99int
100ncp_LogPhysicalRecord(struct ncp_server *server,
101 const char *file_id, __u8 locktype,
102 __u32 offset, __u32 length, __u16 timeout);
103
104#ifdef CONFIG_NCPFS_IOCTL_LOCKING
105int
106ncp_ClearPhysicalRecord(struct ncp_server *server,
107 const char *file_id,
108 __u32 offset, __u32 length);
109#endif /* CONFIG_NCPFS_IOCTL_LOCKING */
110
111int
112ncp_mount_subdir(struct ncp_server *, __u8, __u8, __le32,
113 __u32* volume, __le32* dirent, __le32* dosdirent);
114int ncp_dirhandle_alloc(struct ncp_server *, __u8 vol, __le32 dirent, __u8 *dirhandle);
115int ncp_dirhandle_free(struct ncp_server *, __u8 dirhandle);
116
117int ncp_create_new(struct inode *dir, struct dentry *dentry,
118 umode_t mode, dev_t rdev, __le32 attributes);
119
120static inline int ncp_is_nfs_extras(struct ncp_server* server, unsigned int volnum) {
121#ifdef CONFIG_NCPFS_NFS_NS
122 return (server->m.flags & NCP_MOUNT_NFS_EXTRAS) &&
123 (server->name_space[volnum] == NW_NS_NFS);
124#else
125 return 0;
126#endif
127}
128
129#ifdef CONFIG_NCPFS_NLS
130
131int ncp__io2vol(struct ncp_server *, unsigned char *, unsigned int *,
132 const unsigned char *, unsigned int, int);
133int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *,
134 const unsigned char *, unsigned int, int);
135
136#define NCP_ESC ':'
137#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io)
138#define ncp_tolower(t, c) nls_tolower(t, c)
139#define ncp_toupper(t, c) nls_toupper(t, c)
140#define ncp_strnicmp(t, s1, s2, len) \
141 nls_strnicmp(t, s1, s2, len)
142#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(S,m,i,n,k,U)
143#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(S,m,i,n,k,U)
144
145#else
146
147int ncp__io2vol(unsigned char *, unsigned int *,
148 const unsigned char *, unsigned int, int);
149int ncp__vol2io(unsigned char *, unsigned int *,
150 const unsigned char *, unsigned int, int);
151
152#define NCP_IO_TABLE(sb) NULL
153#define ncp_tolower(t, c) tolower(c)
154#define ncp_toupper(t, c) toupper(c)
155#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U)
156#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U)
157
158
159static inline int ncp_strnicmp(const struct nls_table *t,
160 const unsigned char *s1, const unsigned char *s2, int len)
161{
162 while (len--) {
163 if (tolower(*s1++) != tolower(*s2++))
164 return 1;
165 }
166
167 return 0;
168}
169
170#endif /* CONFIG_NCPFS_NLS */
171
172#define NCP_GET_AGE(dentry) (jiffies - (dentry)->d_time)
173#define NCP_MAX_AGE(server) atomic_read(&(server)->dentry_ttl)
174#define NCP_TEST_AGE(server,dentry) (NCP_GET_AGE(dentry) < NCP_MAX_AGE(server))
175
176static inline void
177ncp_age_dentry(struct ncp_server* server, struct dentry* dentry)
178{
179 dentry->d_time = jiffies - NCP_MAX_AGE(server);
180}
181
182static inline void
183ncp_new_dentry(struct dentry* dentry)
184{
185 dentry->d_time = jiffies;
186}
187
188struct ncp_cache_head {
189 time_t mtime;
190 unsigned long time; /* cache age */
191 unsigned long end; /* last valid fpos in cache */
192 int eof;
193};
194
195#define NCP_DIRCACHE_SIZE ((int)(PAGE_SIZE/sizeof(struct dentry *)))
196union ncp_dir_cache {
197 struct ncp_cache_head head;
198 struct dentry *dentry[NCP_DIRCACHE_SIZE];
199};
200
201#define NCP_FIRSTCACHE_SIZE ((int)((NCP_DIRCACHE_SIZE * \
202 sizeof(struct dentry *) - sizeof(struct ncp_cache_head)) / \
203 sizeof(struct dentry *)))
204
205#define NCP_DIRCACHE_START (NCP_DIRCACHE_SIZE - NCP_FIRSTCACHE_SIZE)
206
207struct ncp_cache_control {
208 struct ncp_cache_head head;
209 struct page *page;
210 union ncp_dir_cache *cache;
211 unsigned long fpos, ofs;
212 int filled, valid, idx;
213};
214
215#endif /* _NCPLIB_H */
diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c
deleted file mode 100644
index 8085b1a3ba47..000000000000
--- a/fs/ncpfs/ncpsign_kernel.c
+++ /dev/null
@@ -1,128 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ncpsign_kernel.c
4 *
5 * Arne de Bruijn (arne@knoware.nl), 1997
6 *
7 */
8
9
10#ifdef CONFIG_NCPFS_PACKET_SIGNING
11
12#include <linux/string.h>
13#include <linux/ncp.h>
14#include <linux/bitops.h>
15#include "ncp_fs.h"
16#include "ncpsign_kernel.h"
17
18/* i386: 32-bit, little endian, handles mis-alignment */
19#ifdef __i386__
20#define GET_LE32(p) (*(const int *)(p))
21#define PUT_LE32(p,v) { *(int *)(p)=v; }
22#else
23/* from include/ncplib.h */
24#define BVAL(buf,pos) (((const __u8 *)(buf))[pos])
25#define PVAL(buf,pos) ((unsigned)BVAL(buf,pos))
26#define BSET(buf,pos,val) (((__u8 *)(buf))[pos] = (val))
27
28static inline __u16
29WVAL_LH(const __u8 * buf, int pos)
30{
31 return PVAL(buf, pos) | PVAL(buf, pos + 1) << 8;
32}
33static inline __u32
34DVAL_LH(const __u8 * buf, int pos)
35{
36 return WVAL_LH(buf, pos) | WVAL_LH(buf, pos + 2) << 16;
37}
38static inline void
39WSET_LH(__u8 * buf, int pos, __u16 val)
40{
41 BSET(buf, pos, val & 0xff);
42 BSET(buf, pos + 1, val >> 8);
43}
44static inline void
45DSET_LH(__u8 * buf, int pos, __u32 val)
46{
47 WSET_LH(buf, pos, val & 0xffff);
48 WSET_LH(buf, pos + 2, val >> 16);
49}
50
51#define GET_LE32(p) DVAL_LH(p,0)
52#define PUT_LE32(p,v) DSET_LH(p,0,v)
53#endif
54
55static void nwsign(char *r_data1, char *r_data2, char *outdata) {
56 int i;
57 unsigned int w0,w1,w2,w3;
58 static int rbit[4]={0, 2, 1, 3};
59#ifdef __i386__
60 unsigned int *data2=(unsigned int *)r_data2;
61#else
62 unsigned int data2[16];
63 for (i=0;i<16;i++)
64 data2[i]=GET_LE32(r_data2+(i<<2));
65#endif
66 w0=GET_LE32(r_data1);
67 w1=GET_LE32(r_data1+4);
68 w2=GET_LE32(r_data1+8);
69 w3=GET_LE32(r_data1+12);
70 for (i=0;i<16;i+=4) {
71 w0=rol32(w0 + ((w1 & w2) | ((~w1) & w3)) + data2[i+0],3);
72 w3=rol32(w3 + ((w0 & w1) | ((~w0) & w2)) + data2[i+1],7);
73 w2=rol32(w2 + ((w3 & w0) | ((~w3) & w1)) + data2[i+2],11);
74 w1=rol32(w1 + ((w2 & w3) | ((~w2) & w0)) + data2[i+3],19);
75 }
76 for (i=0;i<4;i++) {
77 w0=rol32(w0 + (((w2 | w3) & w1) | (w2 & w3)) + 0x5a827999 + data2[i+0],3);
78 w3=rol32(w3 + (((w1 | w2) & w0) | (w1 & w2)) + 0x5a827999 + data2[i+4],5);
79 w2=rol32(w2 + (((w0 | w1) & w3) | (w0 & w1)) + 0x5a827999 + data2[i+8],9);
80 w1=rol32(w1 + (((w3 | w0) & w2) | (w3 & w0)) + 0x5a827999 + data2[i+12],13);
81 }
82 for (i=0;i<4;i++) {
83 w0=rol32(w0 + ((w1 ^ w2) ^ w3) + 0x6ed9eba1 + data2[rbit[i]+0],3);
84 w3=rol32(w3 + ((w0 ^ w1) ^ w2) + 0x6ed9eba1 + data2[rbit[i]+8],9);
85 w2=rol32(w2 + ((w3 ^ w0) ^ w1) + 0x6ed9eba1 + data2[rbit[i]+4],11);
86 w1=rol32(w1 + ((w2 ^ w3) ^ w0) + 0x6ed9eba1 + data2[rbit[i]+12],15);
87 }
88 PUT_LE32(outdata,(w0+GET_LE32(r_data1)) & 0xffffffff);
89 PUT_LE32(outdata+4,(w1+GET_LE32(r_data1+4)) & 0xffffffff);
90 PUT_LE32(outdata+8,(w2+GET_LE32(r_data1+8)) & 0xffffffff);
91 PUT_LE32(outdata+12,(w3+GET_LE32(r_data1+12)) & 0xffffffff);
92}
93
94/* Make a signature for the current packet and add it at the end of the */
95/* packet. */
96void __sign_packet(struct ncp_server *server, const char *packet, size_t size, __u32 totalsize, void *sign_buff) {
97 unsigned char data[64];
98
99 memcpy(data, server->sign_root, 8);
100 *(__u32*)(data + 8) = totalsize;
101 if (size < 52) {
102 memcpy(data + 12, packet, size);
103 memset(data + 12 + size, 0, 52 - size);
104 } else {
105 memcpy(data + 12, packet, 52);
106 }
107 nwsign(server->sign_last, data, server->sign_last);
108 memcpy(sign_buff, server->sign_last, 8);
109}
110
111int sign_verify_reply(struct ncp_server *server, const char *packet, size_t size, __u32 totalsize, const void *sign_buff) {
112 unsigned char data[64];
113 unsigned char hash[16];
114
115 memcpy(data, server->sign_root, 8);
116 *(__u32*)(data + 8) = totalsize;
117 if (size < 52) {
118 memcpy(data + 12, packet, size);
119 memset(data + 12 + size, 0, 52 - size);
120 } else {
121 memcpy(data + 12, packet, 52);
122 }
123 nwsign(server->sign_last, data, hash);
124 return memcmp(sign_buff, hash, 8);
125}
126
127#endif /* CONFIG_NCPFS_PACKET_SIGNING */
128
diff --git a/fs/ncpfs/ncpsign_kernel.h b/fs/ncpfs/ncpsign_kernel.h
deleted file mode 100644
index 57ff0a0650b8..000000000000
--- a/fs/ncpfs/ncpsign_kernel.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * ncpsign_kernel.h
4 *
5 * Arne de Bruijn (arne@knoware.nl), 1997
6 *
7 */
8
9#ifndef _NCPSIGN_KERNEL_H
10#define _NCPSIGN_KERNEL_H
11
12#ifdef CONFIG_NCPFS_PACKET_SIGNING
13void __sign_packet(struct ncp_server *server, const char *data, size_t size, __u32 totalsize, void *sign_buff);
14int sign_verify_reply(struct ncp_server *server, const char *data, size_t size, __u32 totalsize, const void *sign_buff);
15#endif
16
17static inline size_t sign_packet(struct ncp_server *server, const char *data, size_t size, __u32 totalsize, void *sign_buff) {
18#ifdef CONFIG_NCPFS_PACKET_SIGNING
19 if (server->sign_active) {
20 __sign_packet(server, data, size, totalsize, sign_buff);
21 return 8;
22 }
23#endif
24 return 0;
25}
26
27#endif
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
deleted file mode 100644
index 4c13174d85b7..000000000000
--- a/fs/ncpfs/sock.c
+++ /dev/null
@@ -1,855 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/fs/ncpfs/sock.c
4 *
5 * Copyright (C) 1992, 1993 Rick Sladkey
6 *
7 * Modified 1995, 1996 by Volker Lendecke to be usable for ncp
8 * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache
9 *
10 */
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/time.h>
15#include <linux/errno.h>
16#include <linux/socket.h>
17#include <linux/fcntl.h>
18#include <linux/stat.h>
19#include <linux/string.h>
20#include <linux/sched/signal.h>
21#include <linux/uaccess.h>
22#include <linux/in.h>
23#include <linux/net.h>
24#include <linux/mm.h>
25#include <linux/netdevice.h>
26#include <linux/signal.h>
27#include <linux/slab.h>
28#include <net/scm.h>
29#include <net/sock.h>
30#include <linux/ipx.h>
31#include <linux/poll.h>
32#include <linux/file.h>
33
34#include "ncp_fs.h"
35
36#include "ncpsign_kernel.h"
37
38static int _recv(struct socket *sock, void *buf, int size, unsigned flags)
39{
40 struct msghdr msg = {NULL, };
41 struct kvec iov = {buf, size};
42 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size);
43 return sock_recvmsg(sock, &msg, flags);
44}
45
46static int _send(struct socket *sock, const void *buff, int len)
47{
48 struct msghdr msg = { .msg_flags = 0 };
49 struct kvec vec = {.iov_base = (void *)buff, .iov_len = len};
50 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &vec, 1, len);
51 return sock_sendmsg(sock, &msg);
52}
53
54struct ncp_request_reply {
55 struct list_head req;
56 wait_queue_head_t wq;
57 atomic_t refs;
58 unsigned char* reply_buf;
59 size_t datalen;
60 int result;
61 enum { RQ_DONE, RQ_INPROGRESS, RQ_QUEUED, RQ_IDLE, RQ_ABANDONED } status;
62 struct iov_iter from;
63 struct kvec tx_iov[3];
64 u_int16_t tx_type;
65 u_int32_t sign[6];
66};
67
68static inline struct ncp_request_reply* ncp_alloc_req(void)
69{
70 struct ncp_request_reply *req;
71
72 req = kmalloc(sizeof(struct ncp_request_reply), GFP_KERNEL);
73 if (!req)
74 return NULL;
75
76 init_waitqueue_head(&req->wq);
77 atomic_set(&req->refs, (1));
78 req->status = RQ_IDLE;
79
80 return req;
81}
82
83static void ncp_req_get(struct ncp_request_reply *req)
84{
85 atomic_inc(&req->refs);
86}
87
88static void ncp_req_put(struct ncp_request_reply *req)
89{
90 if (atomic_dec_and_test(&req->refs))
91 kfree(req);
92}
93
94void ncp_tcp_data_ready(struct sock *sk)
95{
96 struct ncp_server *server = sk->sk_user_data;
97
98 server->data_ready(sk);
99 schedule_work(&server->rcv.tq);
100}
101
102void ncp_tcp_error_report(struct sock *sk)
103{
104 struct ncp_server *server = sk->sk_user_data;
105
106 server->error_report(sk);
107 schedule_work(&server->rcv.tq);
108}
109
110void ncp_tcp_write_space(struct sock *sk)
111{
112 struct ncp_server *server = sk->sk_user_data;
113
114 /* We do not need any locking: we first set tx.creq, and then we do sendmsg,
115 not vice versa... */
116 server->write_space(sk);
117 if (server->tx.creq)
118 schedule_work(&server->tx.tq);
119}
120
121void ncpdgram_timeout_call(struct timer_list *t)
122{
123 struct ncp_server *server = from_timer(server, t, timeout_tm);
124
125 schedule_work(&server->timeout_tq);
126}
127
128static inline void ncp_finish_request(struct ncp_server *server, struct ncp_request_reply *req, int result)
129{
130 req->result = result;
131 if (req->status != RQ_ABANDONED)
132 memcpy(req->reply_buf, server->rxbuf, req->datalen);
133 req->status = RQ_DONE;
134 wake_up_all(&req->wq);
135 ncp_req_put(req);
136}
137
138static void __abort_ncp_connection(struct ncp_server *server)
139{
140 struct ncp_request_reply *req;
141
142 ncp_invalidate_conn(server);
143 del_timer(&server->timeout_tm);
144 while (!list_empty(&server->tx.requests)) {
145 req = list_entry(server->tx.requests.next, struct ncp_request_reply, req);
146
147 list_del_init(&req->req);
148 ncp_finish_request(server, req, -EIO);
149 }
150 req = server->rcv.creq;
151 if (req) {
152 server->rcv.creq = NULL;
153 ncp_finish_request(server, req, -EIO);
154 server->rcv.ptr = NULL;
155 server->rcv.state = 0;
156 }
157 req = server->tx.creq;
158 if (req) {
159 server->tx.creq = NULL;
160 ncp_finish_request(server, req, -EIO);
161 }
162}
163
164static inline int get_conn_number(struct ncp_reply_header *rp)
165{
166 return rp->conn_low | (rp->conn_high << 8);
167}
168
169static inline void __ncp_abort_request(struct ncp_server *server, struct ncp_request_reply *req, int err)
170{
171 /* If req is done, we got signal, but we also received answer... */
172 switch (req->status) {
173 case RQ_IDLE:
174 case RQ_DONE:
175 break;
176 case RQ_QUEUED:
177 list_del_init(&req->req);
178 ncp_finish_request(server, req, err);
179 break;
180 case RQ_INPROGRESS:
181 req->status = RQ_ABANDONED;
182 break;
183 case RQ_ABANDONED:
184 break;
185 }
186}
187
188static inline void ncp_abort_request(struct ncp_server *server, struct ncp_request_reply *req, int err)
189{
190 mutex_lock(&server->rcv.creq_mutex);
191 __ncp_abort_request(server, req, err);
192 mutex_unlock(&server->rcv.creq_mutex);
193}
194
195static inline void __ncptcp_abort(struct ncp_server *server)
196{
197 __abort_ncp_connection(server);
198}
199
200static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req)
201{
202 struct msghdr msg = { .msg_iter = req->from, .msg_flags = MSG_DONTWAIT };
203 return sock_sendmsg(sock, &msg);
204}
205
206static void __ncptcp_try_send(struct ncp_server *server)
207{
208 struct ncp_request_reply *rq;
209 struct msghdr msg = { .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT };
210 int result;
211
212 rq = server->tx.creq;
213 if (!rq)
214 return;
215
216 msg.msg_iter = rq->from;
217 result = sock_sendmsg(server->ncp_sock, &msg);
218
219 if (result == -EAGAIN)
220 return;
221
222 if (result < 0) {
223 pr_err("tcp: Send failed: %d\n", result);
224 __ncp_abort_request(server, rq, result);
225 return;
226 }
227 if (!msg_data_left(&msg)) {
228 server->rcv.creq = rq;
229 server->tx.creq = NULL;
230 return;
231 }
232 rq->from = msg.msg_iter;
233}
234
235static inline void ncp_init_header(struct ncp_server *server, struct ncp_request_reply *req, struct ncp_request_header *h)
236{
237 req->status = RQ_INPROGRESS;
238 h->conn_low = server->connection;
239 h->conn_high = server->connection >> 8;
240 h->sequence = ++server->sequence;
241}
242
243static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request_reply *req)
244{
245 size_t signlen, len = req->tx_iov[1].iov_len;
246 struct ncp_request_header *h = req->tx_iov[1].iov_base;
247
248 ncp_init_header(server, req, h);
249 signlen = sign_packet(server,
250 req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
251 len - sizeof(struct ncp_request_header) + 1,
252 cpu_to_le32(len), req->sign);
253 if (signlen) {
254 /* NCP over UDP appends signature */
255 req->tx_iov[2].iov_base = req->sign;
256 req->tx_iov[2].iov_len = signlen;
257 }
258 iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
259 req->tx_iov + 1, signlen ? 2 : 1, len + signlen);
260 server->rcv.creq = req;
261 server->timeout_last = server->m.time_out;
262 server->timeout_retries = server->m.retry_count;
263 ncpdgram_send(server->ncp_sock, req);
264 mod_timer(&server->timeout_tm, jiffies + server->m.time_out);
265}
266
267#define NCP_TCP_XMIT_MAGIC (0x446D6454)
268#define NCP_TCP_XMIT_VERSION (1)
269#define NCP_TCP_RCVD_MAGIC (0x744E6350)
270
271static void ncptcp_start_request(struct ncp_server *server, struct ncp_request_reply *req)
272{
273 size_t signlen, len = req->tx_iov[1].iov_len;
274 struct ncp_request_header *h = req->tx_iov[1].iov_base;
275
276 ncp_init_header(server, req, h);
277 signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
278 len - sizeof(struct ncp_request_header) + 1,
279 cpu_to_be32(len + 24), req->sign + 4) + 16;
280
281 req->sign[0] = htonl(NCP_TCP_XMIT_MAGIC);
282 req->sign[1] = htonl(len + signlen);
283 req->sign[2] = htonl(NCP_TCP_XMIT_VERSION);
284 req->sign[3] = htonl(req->datalen + 8);
285 /* NCP over TCP prepends signature */
286 req->tx_iov[0].iov_base = req->sign;
287 req->tx_iov[0].iov_len = signlen;
288 iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
289 req->tx_iov, 2, len + signlen);
290
291 server->tx.creq = req;
292 __ncptcp_try_send(server);
293}
294
295static inline void __ncp_start_request(struct ncp_server *server, struct ncp_request_reply *req)
296{
297 /* we copy the data so that we do not depend on the caller
298 staying alive */
299 memcpy(server->txbuf, req->tx_iov[1].iov_base, req->tx_iov[1].iov_len);
300 req->tx_iov[1].iov_base = server->txbuf;
301
302 if (server->ncp_sock->type == SOCK_STREAM)
303 ncptcp_start_request(server, req);
304 else
305 ncpdgram_start_request(server, req);
306}
307
308static int ncp_add_request(struct ncp_server *server, struct ncp_request_reply *req)
309{
310 mutex_lock(&server->rcv.creq_mutex);
311 if (!ncp_conn_valid(server)) {
312 mutex_unlock(&server->rcv.creq_mutex);
313 pr_err("tcp: Server died\n");
314 return -EIO;
315 }
316 ncp_req_get(req);
317 if (server->tx.creq || server->rcv.creq) {
318 req->status = RQ_QUEUED;
319 list_add_tail(&req->req, &server->tx.requests);
320 mutex_unlock(&server->rcv.creq_mutex);
321 return 0;
322 }
323 __ncp_start_request(server, req);
324 mutex_unlock(&server->rcv.creq_mutex);
325 return 0;
326}
327
328static void __ncp_next_request(struct ncp_server *server)
329{
330 struct ncp_request_reply *req;
331
332 server->rcv.creq = NULL;
333 if (list_empty(&server->tx.requests)) {
334 return;
335 }
336 req = list_entry(server->tx.requests.next, struct ncp_request_reply, req);
337 list_del_init(&req->req);
338 __ncp_start_request(server, req);
339}
340
341static void info_server(struct ncp_server *server, unsigned int id, const void * data, size_t len)
342{
343 if (server->info_sock) {
344 struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
345 __be32 hdr[2] = {cpu_to_be32(len + 8), cpu_to_be32(id)};
346 struct kvec iov[2] = {
347 {.iov_base = hdr, .iov_len = 8},
348 {.iov_base = (void *)data, .iov_len = len},
349 };
350
351 iov_iter_kvec(&msg.msg_iter, ITER_KVEC | WRITE,
352 iov, 2, len + 8);
353
354 sock_sendmsg(server->info_sock, &msg);
355 }
356}
357
358void ncpdgram_rcv_proc(struct work_struct *work)
359{
360 struct ncp_server *server =
361 container_of(work, struct ncp_server, rcv.tq);
362 struct socket* sock;
363
364 sock = server->ncp_sock;
365
366 while (1) {
367 struct ncp_reply_header reply;
368 int result;
369
370 result = _recv(sock, &reply, sizeof(reply), MSG_PEEK | MSG_DONTWAIT);
371 if (result < 0) {
372 break;
373 }
374 if (result >= sizeof(reply)) {
375 struct ncp_request_reply *req;
376
377 if (reply.type == NCP_WATCHDOG) {
378 unsigned char buf[10];
379
380 if (server->connection != get_conn_number(&reply)) {
381 goto drop;
382 }
383 result = _recv(sock, buf, sizeof(buf), MSG_DONTWAIT);
384 if (result < 0) {
385 ncp_dbg(1, "recv failed with %d\n", result);
386 continue;
387 }
388 if (result < 10) {
389 ncp_dbg(1, "too short (%u) watchdog packet\n", result);
390 continue;
391 }
392 if (buf[9] != '?') {
393 ncp_dbg(1, "bad signature (%02X) in watchdog packet\n", buf[9]);
394 continue;
395 }
396 buf[9] = 'Y';
397 _send(sock, buf, sizeof(buf));
398 continue;
399 }
400 if (reply.type != NCP_POSITIVE_ACK && reply.type != NCP_REPLY) {
401 result = _recv(sock, server->unexpected_packet.data, sizeof(server->unexpected_packet.data), MSG_DONTWAIT);
402 if (result < 0) {
403 continue;
404 }
405 info_server(server, 0, server->unexpected_packet.data, result);
406 continue;
407 }
408 mutex_lock(&server->rcv.creq_mutex);
409 req = server->rcv.creq;
410 if (req && (req->tx_type == NCP_ALLOC_SLOT_REQUEST || (server->sequence == reply.sequence &&
411 server->connection == get_conn_number(&reply)))) {
412 if (reply.type == NCP_POSITIVE_ACK) {
413 server->timeout_retries = server->m.retry_count;
414 server->timeout_last = NCP_MAX_RPC_TIMEOUT;
415 mod_timer(&server->timeout_tm, jiffies + NCP_MAX_RPC_TIMEOUT);
416 } else if (reply.type == NCP_REPLY) {
417 result = _recv(sock, server->rxbuf, req->datalen, MSG_DONTWAIT);
418#ifdef CONFIG_NCPFS_PACKET_SIGNING
419 if (result >= 0 && server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) {
420 if (result < 8 + 8) {
421 result = -EIO;
422 } else {
423 unsigned int hdrl;
424
425 result -= 8;
426 hdrl = sock->sk->sk_family == AF_INET ? 8 : 6;
427 if (sign_verify_reply(server, server->rxbuf + hdrl, result - hdrl, cpu_to_le32(result), server->rxbuf + result)) {
428 pr_info("Signature violation\n");
429 result = -EIO;
430 }
431 }
432 }
433#endif
434 del_timer(&server->timeout_tm);
435 server->rcv.creq = NULL;
436 ncp_finish_request(server, req, result);
437 __ncp_next_request(server);
438 mutex_unlock(&server->rcv.creq_mutex);
439 continue;
440 }
441 }
442 mutex_unlock(&server->rcv.creq_mutex);
443 }
444drop:;
445 _recv(sock, &reply, sizeof(reply), MSG_DONTWAIT);
446 }
447}
448
449static void __ncpdgram_timeout_proc(struct ncp_server *server)
450{
451 /* If timer is pending, we are processing another request... */
452 if (!timer_pending(&server->timeout_tm)) {
453 struct ncp_request_reply* req;
454
455 req = server->rcv.creq;
456 if (req) {
457 int timeout;
458
459 if (server->m.flags & NCP_MOUNT_SOFT) {
460 if (server->timeout_retries-- == 0) {
461 __ncp_abort_request(server, req, -ETIMEDOUT);
462 return;
463 }
464 }
465 /* Ignore errors */
466 ncpdgram_send(server->ncp_sock, req);
467 timeout = server->timeout_last << 1;
468 if (timeout > NCP_MAX_RPC_TIMEOUT) {
469 timeout = NCP_MAX_RPC_TIMEOUT;
470 }
471 server->timeout_last = timeout;
472 mod_timer(&server->timeout_tm, jiffies + timeout);
473 }
474 }
475}
476
477void ncpdgram_timeout_proc(struct work_struct *work)
478{
479 struct ncp_server *server =
480 container_of(work, struct ncp_server, timeout_tq);
481 mutex_lock(&server->rcv.creq_mutex);
482 __ncpdgram_timeout_proc(server);
483 mutex_unlock(&server->rcv.creq_mutex);
484}
485
486static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len)
487{
488 int result;
489
490 if (buffer) {
491 result = _recv(server->ncp_sock, buffer, len, MSG_DONTWAIT);
492 } else {
493 static unsigned char dummy[1024];
494
495 if (len > sizeof(dummy)) {
496 len = sizeof(dummy);
497 }
498 result = _recv(server->ncp_sock, dummy, len, MSG_DONTWAIT);
499 }
500 if (result < 0) {
501 return result;
502 }
503 if (result > len) {
504 pr_err("tcp: bug in recvmsg (%u > %zu)\n", result, len);
505 return -EIO;
506 }
507 return result;
508}
509
510static int __ncptcp_rcv_proc(struct ncp_server *server)
511{
512 /* We have to check the result, so store the complete header */
513 while (1) {
514 int result;
515 struct ncp_request_reply *req;
516 int datalen;
517 int type;
518
519 while (server->rcv.len) {
520 result = do_tcp_rcv(server, server->rcv.ptr, server->rcv.len);
521 if (result == -EAGAIN) {
522 return 0;
523 }
524 if (result <= 0) {
525 req = server->rcv.creq;
526 if (req) {
527 __ncp_abort_request(server, req, -EIO);
528 } else {
529 __ncptcp_abort(server);
530 }
531 if (result < 0) {
532 pr_err("tcp: error in recvmsg: %d\n", result);
533 } else {
534 ncp_dbg(1, "tcp: EOF\n");
535 }
536 return -EIO;
537 }
538 if (server->rcv.ptr) {
539 server->rcv.ptr += result;
540 }
541 server->rcv.len -= result;
542 }
543 switch (server->rcv.state) {
544 case 0:
545 if (server->rcv.buf.magic != htonl(NCP_TCP_RCVD_MAGIC)) {
546 pr_err("tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic));
547 __ncptcp_abort(server);
548 return -EIO;
549 }
550 datalen = ntohl(server->rcv.buf.len) & 0x0FFFFFFF;
551 if (datalen < 10) {
552 pr_err("tcp: Unexpected reply len %d\n", datalen);
553 __ncptcp_abort(server);
554 return -EIO;
555 }
556#ifdef CONFIG_NCPFS_PACKET_SIGNING
557 if (server->sign_active) {
558 if (datalen < 18) {
559 pr_err("tcp: Unexpected reply len %d\n", datalen);
560 __ncptcp_abort(server);
561 return -EIO;
562 }
563 server->rcv.buf.len = datalen - 8;
564 server->rcv.ptr = (unsigned char*)&server->rcv.buf.p1;
565 server->rcv.len = 8;
566 server->rcv.state = 4;
567 break;
568 }
569#endif
570 type = ntohs(server->rcv.buf.type);
571#ifdef CONFIG_NCPFS_PACKET_SIGNING
572cont:;
573#endif
574 if (type != NCP_REPLY) {
575 if (datalen - 8 <= sizeof(server->unexpected_packet.data)) {
576 *(__u16*)(server->unexpected_packet.data) = htons(type);
577 server->unexpected_packet.len = datalen - 8;
578
579 server->rcv.state = 5;
580 server->rcv.ptr = server->unexpected_packet.data + 2;
581 server->rcv.len = datalen - 10;
582 break;
583 }
584 ncp_dbg(1, "tcp: Unexpected NCP type %02X\n", type);
585skipdata2:;
586 server->rcv.state = 2;
587skipdata:;
588 server->rcv.ptr = NULL;
589 server->rcv.len = datalen - 10;
590 break;
591 }
592 req = server->rcv.creq;
593 if (!req) {
594 ncp_dbg(1, "Reply without appropriate request\n");
595 goto skipdata2;
596 }
597 if (datalen > req->datalen + 8) {
598 pr_err("tcp: Unexpected reply len %d (expected at most %zd)\n", datalen, req->datalen + 8);
599 server->rcv.state = 3;
600 goto skipdata;
601 }
602 req->datalen = datalen - 8;
603 ((struct ncp_reply_header*)server->rxbuf)->type = NCP_REPLY;
604 server->rcv.ptr = server->rxbuf + 2;
605 server->rcv.len = datalen - 10;
606 server->rcv.state = 1;
607 break;
608#ifdef CONFIG_NCPFS_PACKET_SIGNING
609 case 4:
610 datalen = server->rcv.buf.len;
611 type = ntohs(server->rcv.buf.type2);
612 goto cont;
613#endif
614 case 1:
615 req = server->rcv.creq;
616 if (req->tx_type != NCP_ALLOC_SLOT_REQUEST) {
617 if (((struct ncp_reply_header*)server->rxbuf)->sequence != server->sequence) {
618 pr_err("tcp: Bad sequence number\n");
619 __ncp_abort_request(server, req, -EIO);
620 return -EIO;
621 }
622 if ((((struct ncp_reply_header*)server->rxbuf)->conn_low | (((struct ncp_reply_header*)server->rxbuf)->conn_high << 8)) != server->connection) {
623 pr_err("tcp: Connection number mismatch\n");
624 __ncp_abort_request(server, req, -EIO);
625 return -EIO;
626 }
627 }
628#ifdef CONFIG_NCPFS_PACKET_SIGNING
629 if (server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) {
630 if (sign_verify_reply(server, server->rxbuf + 6, req->datalen - 6, cpu_to_be32(req->datalen + 16), &server->rcv.buf.type)) {
631 pr_err("tcp: Signature violation\n");
632 __ncp_abort_request(server, req, -EIO);
633 return -EIO;
634 }
635 }
636#endif
637 ncp_finish_request(server, req, req->datalen);
638 nextreq:;
639 __ncp_next_request(server);
640 case 2:
641 next:;
642 server->rcv.ptr = (unsigned char*)&server->rcv.buf;
643 server->rcv.len = 10;
644 server->rcv.state = 0;
645 break;
646 case 3:
647 ncp_finish_request(server, server->rcv.creq, -EIO);
648 goto nextreq;
649 case 5:
650 info_server(server, 0, server->unexpected_packet.data, server->unexpected_packet.len);
651 goto next;
652 }
653 }
654}
655
656void ncp_tcp_rcv_proc(struct work_struct *work)
657{
658 struct ncp_server *server =
659 container_of(work, struct ncp_server, rcv.tq);
660
661 mutex_lock(&server->rcv.creq_mutex);
662 __ncptcp_rcv_proc(server);
663 mutex_unlock(&server->rcv.creq_mutex);
664}
665
666void ncp_tcp_tx_proc(struct work_struct *work)
667{
668 struct ncp_server *server =
669 container_of(work, struct ncp_server, tx.tq);
670
671 mutex_lock(&server->rcv.creq_mutex);
672 __ncptcp_try_send(server);
673 mutex_unlock(&server->rcv.creq_mutex);
674}
675
676static int do_ncp_rpc_call(struct ncp_server *server, int size,
677 unsigned char* reply_buf, int max_reply_size)
678{
679 int result;
680 struct ncp_request_reply *req;
681
682 req = ncp_alloc_req();
683 if (!req)
684 return -ENOMEM;
685
686 req->reply_buf = reply_buf;
687 req->datalen = max_reply_size;
688 req->tx_iov[1].iov_base = server->packet;
689 req->tx_iov[1].iov_len = size;
690 req->tx_type = *(u_int16_t*)server->packet;
691
692 result = ncp_add_request(server, req);
693 if (result < 0)
694 goto out;
695
696 if (wait_event_interruptible(req->wq, req->status == RQ_DONE)) {
697 ncp_abort_request(server, req, -EINTR);
698 result = -EINTR;
699 goto out;
700 }
701
702 result = req->result;
703
704out:
705 ncp_req_put(req);
706
707 return result;
708}
709
710/*
711 * We need the server to be locked here, so check!
712 */
713
714static int ncp_do_request(struct ncp_server *server, int size,
715 void* reply, int max_reply_size)
716{
717 int result;
718
719 if (server->lock == 0) {
720 pr_err("Server not locked!\n");
721 return -EIO;
722 }
723 if (!ncp_conn_valid(server)) {
724 return -EIO;
725 }
726 {
727 sigset_t old_set;
728 unsigned long mask, flags;
729
730 spin_lock_irqsave(&current->sighand->siglock, flags);
731 old_set = current->blocked;
732 if (current->flags & PF_EXITING)
733 mask = 0;
734 else
735 mask = sigmask(SIGKILL);
736 if (server->m.flags & NCP_MOUNT_INTR) {
737 /* FIXME: This doesn't seem right at all. So, like,
738 we can't handle SIGINT and get whatever to stop?
739 What if we've blocked it ourselves? What about
740 alarms? Why, in fact, are we mucking with the
741 sigmask at all? -- r~ */
742 if (current->sighand->action[SIGINT - 1].sa.sa_handler == SIG_DFL)
743 mask |= sigmask(SIGINT);
744 if (current->sighand->action[SIGQUIT - 1].sa.sa_handler == SIG_DFL)
745 mask |= sigmask(SIGQUIT);
746 }
747 siginitsetinv(&current->blocked, mask);
748 recalc_sigpending();
749 spin_unlock_irqrestore(&current->sighand->siglock, flags);
750
751 result = do_ncp_rpc_call(server, size, reply, max_reply_size);
752
753 spin_lock_irqsave(&current->sighand->siglock, flags);
754 current->blocked = old_set;
755 recalc_sigpending();
756 spin_unlock_irqrestore(&current->sighand->siglock, flags);
757 }
758
759 ncp_dbg(2, "do_ncp_rpc_call returned %d\n", result);
760
761 return result;
762}
763
764/* ncp_do_request assures that at least a complete reply header is
765 * received. It assumes that server->current_size contains the ncp
766 * request size
767 */
768int ncp_request2(struct ncp_server *server, int function,
769 void* rpl, int size)
770{
771 struct ncp_request_header *h;
772 struct ncp_reply_header* reply = rpl;
773 int result;
774
775 h = (struct ncp_request_header *) (server->packet);
776 if (server->has_subfunction != 0) {
777 *(__u16 *) & (h->data[0]) = htons(server->current_size - sizeof(*h) - 2);
778 }
779 h->type = NCP_REQUEST;
780 /*
781 * The server shouldn't know or care what task is making a
782 * request, so we always use the same task number.
783 */
784 h->task = 2; /* (current->pid) & 0xff; */
785 h->function = function;
786
787 result = ncp_do_request(server, server->current_size, reply, size);
788 if (result < 0) {
789 ncp_dbg(1, "ncp_request_error: %d\n", result);
790 goto out;
791 }
792 server->completion = reply->completion_code;
793 server->conn_status = reply->connection_state;
794 server->reply_size = result;
795 server->ncp_reply_size = result - sizeof(struct ncp_reply_header);
796
797 result = reply->completion_code;
798
799 if (result != 0)
800 ncp_vdbg("completion code=%x\n", result);
801out:
802 return result;
803}
804
805int ncp_connect(struct ncp_server *server)
806{
807 struct ncp_request_header *h;
808 int result;
809
810 server->connection = 0xFFFF;
811 server->sequence = 255;
812
813 h = (struct ncp_request_header *) (server->packet);
814 h->type = NCP_ALLOC_SLOT_REQUEST;
815 h->task = 2; /* see above */
816 h->function = 0;
817
818 result = ncp_do_request(server, sizeof(*h), server->packet, server->packet_size);
819 if (result < 0)
820 goto out;
821 server->connection = h->conn_low + (h->conn_high * 256);
822 result = 0;
823out:
824 return result;
825}
826
827int ncp_disconnect(struct ncp_server *server)
828{
829 struct ncp_request_header *h;
830
831 h = (struct ncp_request_header *) (server->packet);
832 h->type = NCP_DEALLOC_SLOT_REQUEST;
833 h->task = 2; /* see above */
834 h->function = 0;
835
836 return ncp_do_request(server, sizeof(*h), server->packet, server->packet_size);
837}
838
839void ncp_lock_server(struct ncp_server *server)
840{
841 mutex_lock(&server->mutex);
842 if (server->lock)
843 pr_warn("%s: was locked!\n", __func__);
844 server->lock = 1;
845}
846
847void ncp_unlock_server(struct ncp_server *server)
848{
849 if (!server->lock) {
850 pr_warn("%s: was not locked!\n", __func__);
851 return;
852 }
853 server->lock = 0;
854 mutex_unlock(&server->mutex);
855}
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
deleted file mode 100644
index b6e16da4837a..000000000000
--- a/fs/ncpfs/symlink.c
+++ /dev/null
@@ -1,182 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/fs/ncpfs/symlink.c
4 *
5 * Code for allowing symbolic links on NCPFS (i.e. NetWare)
6 * Symbolic links are not supported on native NetWare, so we use an
7 * infrequently-used flag (Sh) and store a two-word magic header in
8 * the file to make sure we don't accidentally use a non-link file
9 * as a link.
10 *
11 * When using the NFS namespace, we set the mode to indicate a symlink and
12 * don't bother with the magic numbers.
13 *
14 * from linux/fs/ext2/symlink.c
15 *
16 * Copyright (C) 1998-99, Frank A. Vorstenbosch
17 *
18 * ncpfs symlink handling code
19 * NLS support (c) 1999 Petr Vandrovec
20 * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info
21 *
22 */
23
24
25#include <linux/uaccess.h>
26
27#include <linux/errno.h>
28#include <linux/fs.h>
29#include <linux/time.h>
30#include <linux/slab.h>
31#include <linux/mm.h>
32#include <linux/stat.h>
33#include "ncp_fs.h"
34
35/* these magic numbers must appear in the symlink file -- this makes it a bit
36 more resilient against the magic attributes being set on random files. */
37
38#define NCP_SYMLINK_MAGIC0 cpu_to_le32(0x6c6d7973) /* "symlnk->" */
39#define NCP_SYMLINK_MAGIC1 cpu_to_le32(0x3e2d6b6e)
40
41/* ----- read a symbolic link ------------------------------------------ */
42
43static int ncp_symlink_readpage(struct file *file, struct page *page)
44{
45 struct inode *inode = page->mapping->host;
46 int error, length, len;
47 char *link, *rawlink;
48 char *buf = kmap(page);
49
50 error = -ENOMEM;
51 rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
52 if (!rawlink)
53 goto fail;
54
55 if (ncp_make_open(inode,O_RDONLY))
56 goto failEIO;
57
58 error=ncp_read_kernel(NCP_SERVER(inode),NCP_FINFO(inode)->file_handle,
59 0,NCP_MAX_SYMLINK_SIZE,rawlink,&length);
60
61 ncp_inode_close(inode);
62 /* Close file handle if no other users... */
63 ncp_make_closed(inode);
64 if (error)
65 goto failEIO;
66
67 if (NCP_FINFO(inode)->flags & NCPI_KLUDGE_SYMLINK) {
68 if (length<NCP_MIN_SYMLINK_SIZE ||
69 ((__le32 *)rawlink)[0]!=NCP_SYMLINK_MAGIC0 ||
70 ((__le32 *)rawlink)[1]!=NCP_SYMLINK_MAGIC1)
71 goto failEIO;
72 link = rawlink + 8;
73 length -= 8;
74 } else {
75 link = rawlink;
76 }
77
78 len = NCP_MAX_SYMLINK_SIZE;
79 error = ncp_vol2io(NCP_SERVER(inode), buf, &len, link, length, 0);
80 kfree(rawlink);
81 if (error)
82 goto fail;
83 SetPageUptodate(page);
84 kunmap(page);
85 unlock_page(page);
86 return 0;
87
88failEIO:
89 error = -EIO;
90 kfree(rawlink);
91fail:
92 SetPageError(page);
93 kunmap(page);
94 unlock_page(page);
95 return error;
96}
97
98/*
99 * symlinks can't do much...
100 */
101const struct address_space_operations ncp_symlink_aops = {
102 .readpage = ncp_symlink_readpage,
103};
104
105/* ----- create a new symbolic link -------------------------------------- */
106
107int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
108 struct inode *inode;
109 char *rawlink;
110 int length, err, i, outlen;
111 int kludge;
112 umode_t mode;
113 __le32 attr;
114 unsigned int hdr;
115
116 ncp_dbg(1, "dir=%p, dentry=%p, symname=%s\n", dir, dentry, symname);
117
118 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber))
119 kludge = 0;
120 else
121#ifdef CONFIG_NCPFS_EXTRAS
122 if (NCP_SERVER(dir)->m.flags & NCP_MOUNT_SYMLINKS)
123 kludge = 1;
124 else
125#endif
126 /* EPERM is returned by VFS if symlink procedure does not exist */
127 return -EPERM;
128
129 rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
130 if (!rawlink)
131 return -ENOMEM;
132
133 if (kludge) {
134 mode = 0;
135 attr = aSHARED | aHIDDEN;
136 ((__le32 *)rawlink)[0]=NCP_SYMLINK_MAGIC0;
137 ((__le32 *)rawlink)[1]=NCP_SYMLINK_MAGIC1;
138 hdr = 8;
139 } else {
140 mode = S_IFLNK | S_IRWXUGO;
141 attr = 0;
142 hdr = 0;
143 }
144
145 length = strlen(symname);
146 /* map to/from server charset, do not touch upper/lower case as
147 symlink can point out of ncp filesystem */
148 outlen = NCP_MAX_SYMLINK_SIZE - hdr;
149 err = ncp_io2vol(NCP_SERVER(dir), rawlink + hdr, &outlen, symname, length, 0);
150 if (err)
151 goto failfree;
152
153 outlen += hdr;
154
155 err = -EIO;
156 if (ncp_create_new(dir,dentry,mode,0,attr)) {
157 goto failfree;
158 }
159
160 inode=d_inode(dentry);
161
162 if (ncp_make_open(inode, O_WRONLY))
163 goto failfree;
164
165 if (ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle,
166 0, outlen, rawlink, &i) || i!=outlen) {
167 goto fail;
168 }
169
170 ncp_inode_close(inode);
171 ncp_make_closed(inode);
172 kfree(rawlink);
173 return 0;
174fail:;
175 ncp_inode_close(inode);
176 ncp_make_closed(inode);
177failfree:;
178 kfree(rawlink);
179 return err;
180}
181
182/* ----- EOF ----- */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 626d1382002e..6b3b372b59b9 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -8,7 +8,6 @@
8#include <linux/file.h> 8#include <linux/file.h>
9#include <linux/falloc.h> 9#include <linux/falloc.h>
10#include <linux/nfs_fs.h> 10#include <linux/nfs_fs.h>
11#include <uapi/linux/btrfs.h> /* BTRFS_IOC_CLONE/BTRFS_IOC_CLONE_RANGE */
12#include "delegation.h" 11#include "delegation.h"
13#include "internal.h" 12#include "internal.h"
14#include "iostat.h" 13#include "iostat.h"
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 7c6f76d29f56..36b0772701a0 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -103,14 +103,14 @@ slow:
103 goto got_it; 103 goto got_it;
104} 104}
105 105
106void *ns_get_path(struct path *path, struct task_struct *task, 106void *ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
107 const struct proc_ns_operations *ns_ops) 107 void *private_data)
108{ 108{
109 struct ns_common *ns; 109 struct ns_common *ns;
110 void *ret; 110 void *ret;
111 111
112again: 112again:
113 ns = ns_ops->get(task); 113 ns = ns_get_cb(private_data);
114 if (!ns) 114 if (!ns)
115 return ERR_PTR(-ENOENT); 115 return ERR_PTR(-ENOENT);
116 116
@@ -120,6 +120,29 @@ again:
120 return ret; 120 return ret;
121} 121}
122 122
123struct ns_get_path_task_args {
124 const struct proc_ns_operations *ns_ops;
125 struct task_struct *task;
126};
127
128static struct ns_common *ns_get_path_task(void *private_data)
129{
130 struct ns_get_path_task_args *args = private_data;
131
132 return args->ns_ops->get(args->task);
133}
134
135void *ns_get_path(struct path *path, struct task_struct *task,
136 const struct proc_ns_operations *ns_ops)
137{
138 struct ns_get_path_task_args args = {
139 .ns_ops = ns_ops,
140 .task = task,
141 };
142
143 return ns_get_path_cb(path, ns_get_path_task, &args);
144}
145
123int open_related_ns(struct ns_common *ns, 146int open_related_ns(struct ns_common *ns,
124 struct ns_common *(*get_ns)(struct ns_common *ns)) 147 struct ns_common *(*get_ns)(struct ns_common *ns))
125{ 148{
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 40b5cc97f7b0..917fadca8a7b 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -311,7 +311,9 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
311 if (had_lock < 0) 311 if (had_lock < 0)
312 return ERR_PTR(had_lock); 312 return ERR_PTR(had_lock);
313 313
314 down_read(&OCFS2_I(inode)->ip_xattr_sem);
314 acl = ocfs2_get_acl_nolock(inode, type, di_bh); 315 acl = ocfs2_get_acl_nolock(inode, type, di_bh);
316 up_read(&OCFS2_I(inode)->ip_xattr_sem);
315 317
316 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); 318 ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
317 brelse(di_bh); 319 brelse(di_bh);
@@ -330,7 +332,9 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
330 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 332 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
331 return 0; 333 return 0;
332 334
335 down_read(&OCFS2_I(inode)->ip_xattr_sem);
333 acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh); 336 acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
337 up_read(&OCFS2_I(inode)->ip_xattr_sem);
334 if (IS_ERR(acl) || !acl) 338 if (IS_ERR(acl) || !acl)
335 return PTR_ERR(acl); 339 return PTR_ERR(acl);
336 ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); 340 ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
@@ -361,8 +365,10 @@ int ocfs2_init_acl(handle_t *handle,
361 365
362 if (!S_ISLNK(inode->i_mode)) { 366 if (!S_ISLNK(inode->i_mode)) {
363 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 367 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
368 down_read(&OCFS2_I(dir)->ip_xattr_sem);
364 acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, 369 acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
365 dir_bh); 370 dir_bh);
371 up_read(&OCFS2_I(dir)->ip_xattr_sem);
366 if (IS_ERR(acl)) 372 if (IS_ERR(acl))
367 return PTR_ERR(acl); 373 return PTR_ERR(acl);
368 } 374 }
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ab5105f9767e..9a876bb07cac 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
165 struct ocfs2_extent_rec *rec); 165 struct ocfs2_extent_rec *rec);
166static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et); 166static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
167static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); 167static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
168
169static int ocfs2_reuse_blk_from_dealloc(handle_t *handle,
170 struct ocfs2_extent_tree *et,
171 struct buffer_head **new_eb_bh,
172 int blk_wanted, int *blk_given);
173static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et);
174
168static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { 175static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
169 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, 176 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
170 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, 177 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
@@ -448,6 +455,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
448 if (!obj) 455 if (!obj)
449 obj = (void *)bh->b_data; 456 obj = (void *)bh->b_data;
450 et->et_object = obj; 457 et->et_object = obj;
458 et->et_dealloc = NULL;
451 459
452 et->et_ops->eo_fill_root_el(et); 460 et->et_ops->eo_fill_root_el(et);
453 if (!et->et_ops->eo_fill_max_leaf_clusters) 461 if (!et->et_ops->eo_fill_max_leaf_clusters)
@@ -1158,7 +1166,7 @@ static int ocfs2_add_branch(handle_t *handle,
1158 struct buffer_head **last_eb_bh, 1166 struct buffer_head **last_eb_bh,
1159 struct ocfs2_alloc_context *meta_ac) 1167 struct ocfs2_alloc_context *meta_ac)
1160{ 1168{
1161 int status, new_blocks, i; 1169 int status, new_blocks, i, block_given = 0;
1162 u64 next_blkno, new_last_eb_blk; 1170 u64 next_blkno, new_last_eb_blk;
1163 struct buffer_head *bh; 1171 struct buffer_head *bh;
1164 struct buffer_head **new_eb_bhs = NULL; 1172 struct buffer_head **new_eb_bhs = NULL;
@@ -1213,11 +1221,31 @@ static int ocfs2_add_branch(handle_t *handle,
1213 goto bail; 1221 goto bail;
1214 } 1222 }
1215 1223
1216 status = ocfs2_create_new_meta_bhs(handle, et, new_blocks, 1224 /* Firstyly, try to reuse dealloc since we have already estimated how
1217 meta_ac, new_eb_bhs); 1225 * many extent blocks we may use.
1218 if (status < 0) { 1226 */
1219 mlog_errno(status); 1227 if (!ocfs2_is_dealloc_empty(et)) {
1220 goto bail; 1228 status = ocfs2_reuse_blk_from_dealloc(handle, et,
1229 new_eb_bhs, new_blocks,
1230 &block_given);
1231 if (status < 0) {
1232 mlog_errno(status);
1233 goto bail;
1234 }
1235 }
1236
1237 BUG_ON(block_given > new_blocks);
1238
1239 if (block_given < new_blocks) {
1240 BUG_ON(!meta_ac);
1241 status = ocfs2_create_new_meta_bhs(handle, et,
1242 new_blocks - block_given,
1243 meta_ac,
1244 &new_eb_bhs[block_given]);
1245 if (status < 0) {
1246 mlog_errno(status);
1247 goto bail;
1248 }
1221 } 1249 }
1222 1250
1223 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 1251 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
@@ -1340,15 +1368,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1340 struct ocfs2_alloc_context *meta_ac, 1368 struct ocfs2_alloc_context *meta_ac,
1341 struct buffer_head **ret_new_eb_bh) 1369 struct buffer_head **ret_new_eb_bh)
1342{ 1370{
1343 int status, i; 1371 int status, i, block_given = 0;
1344 u32 new_clusters; 1372 u32 new_clusters;
1345 struct buffer_head *new_eb_bh = NULL; 1373 struct buffer_head *new_eb_bh = NULL;
1346 struct ocfs2_extent_block *eb; 1374 struct ocfs2_extent_block *eb;
1347 struct ocfs2_extent_list *root_el; 1375 struct ocfs2_extent_list *root_el;
1348 struct ocfs2_extent_list *eb_el; 1376 struct ocfs2_extent_list *eb_el;
1349 1377
1350 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, 1378 if (!ocfs2_is_dealloc_empty(et)) {
1351 &new_eb_bh); 1379 status = ocfs2_reuse_blk_from_dealloc(handle, et,
1380 &new_eb_bh, 1,
1381 &block_given);
1382 } else if (meta_ac) {
1383 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
1384 &new_eb_bh);
1385
1386 } else {
1387 BUG();
1388 }
1389
1352 if (status < 0) { 1390 if (status < 0) {
1353 mlog_errno(status); 1391 mlog_errno(status);
1354 goto bail; 1392 goto bail;
@@ -1511,7 +1549,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
1511 int depth = le16_to_cpu(el->l_tree_depth); 1549 int depth = le16_to_cpu(el->l_tree_depth);
1512 struct buffer_head *bh = NULL; 1550 struct buffer_head *bh = NULL;
1513 1551
1514 BUG_ON(meta_ac == NULL); 1552 BUG_ON(meta_ac == NULL && ocfs2_is_dealloc_empty(et));
1515 1553
1516 shift = ocfs2_find_branch_target(et, &bh); 1554 shift = ocfs2_find_branch_target(et, &bh);
1517 if (shift < 0) { 1555 if (shift < 0) {
@@ -2598,11 +2636,8 @@ static void ocfs2_unlink_subtree(handle_t *handle,
2598 int i; 2636 int i;
2599 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 2637 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
2600 struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el; 2638 struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
2601 struct ocfs2_extent_list *el;
2602 struct ocfs2_extent_block *eb; 2639 struct ocfs2_extent_block *eb;
2603 2640
2604 el = path_leaf_el(left_path);
2605
2606 eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data; 2641 eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data;
2607 2642
2608 for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) 2643 for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
@@ -3938,7 +3973,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
3938 struct ocfs2_path *path, 3973 struct ocfs2_path *path,
3939 struct ocfs2_extent_rec *insert_rec) 3974 struct ocfs2_extent_rec *insert_rec)
3940{ 3975{
3941 int ret, i, next_free; 3976 int i, next_free;
3942 struct buffer_head *bh; 3977 struct buffer_head *bh;
3943 struct ocfs2_extent_list *el; 3978 struct ocfs2_extent_list *el;
3944 struct ocfs2_extent_rec *rec; 3979 struct ocfs2_extent_rec *rec;
@@ -3955,7 +3990,6 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
3955 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 3990 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
3956 "Owner %llu has a bad extent list\n", 3991 "Owner %llu has a bad extent list\n",
3957 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); 3992 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
3958 ret = -EIO;
3959 return; 3993 return;
3960 } 3994 }
3961 3995
@@ -5057,7 +5091,6 @@ int ocfs2_split_extent(handle_t *handle,
5057 struct buffer_head *last_eb_bh = NULL; 5091 struct buffer_head *last_eb_bh = NULL;
5058 struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; 5092 struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
5059 struct ocfs2_merge_ctxt ctxt; 5093 struct ocfs2_merge_ctxt ctxt;
5060 struct ocfs2_extent_list *rightmost_el;
5061 5094
5062 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || 5095 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
5063 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < 5096 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
@@ -5093,9 +5126,7 @@ int ocfs2_split_extent(handle_t *handle,
5093 } 5126 }
5094 5127
5095 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 5128 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
5096 rightmost_el = &eb->h_list; 5129 }
5097 } else
5098 rightmost_el = path_root_el(path);
5099 5130
5100 if (rec->e_cpos == split_rec->e_cpos && 5131 if (rec->e_cpos == split_rec->e_cpos &&
5101 rec->e_leaf_clusters == split_rec->e_leaf_clusters) 5132 rec->e_leaf_clusters == split_rec->e_leaf_clusters)
@@ -6585,6 +6616,154 @@ ocfs2_find_per_slot_free_list(int type,
6585 return fl; 6616 return fl;
6586} 6617}
6587 6618
6619static struct ocfs2_per_slot_free_list *
6620ocfs2_find_preferred_free_list(int type,
6621 int preferred_slot,
6622 int *real_slot,
6623 struct ocfs2_cached_dealloc_ctxt *ctxt)
6624{
6625 struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
6626
6627 while (fl) {
6628 if (fl->f_inode_type == type && fl->f_slot == preferred_slot) {
6629 *real_slot = fl->f_slot;
6630 return fl;
6631 }
6632
6633 fl = fl->f_next_suballocator;
6634 }
6635
6636 /* If we can't find any free list matching preferred slot, just use
6637 * the first one.
6638 */
6639 fl = ctxt->c_first_suballocator;
6640 *real_slot = fl->f_slot;
6641
6642 return fl;
6643}
6644
6645/* Return Value 1 indicates empty */
6646static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et)
6647{
6648 struct ocfs2_per_slot_free_list *fl = NULL;
6649
6650 if (!et->et_dealloc)
6651 return 1;
6652
6653 fl = et->et_dealloc->c_first_suballocator;
6654 if (!fl)
6655 return 1;
6656
6657 if (!fl->f_first)
6658 return 1;
6659
6660 return 0;
6661}
6662
6663/* If extent was deleted from tree due to extent rotation and merging, and
6664 * no metadata is reserved ahead of time. Try to reuse some extents
6665 * just deleted. This is only used to reuse extent blocks.
6666 * It is supposed to find enough extent blocks in dealloc if our estimation
6667 * on metadata is accurate.
6668 */
6669static int ocfs2_reuse_blk_from_dealloc(handle_t *handle,
6670 struct ocfs2_extent_tree *et,
6671 struct buffer_head **new_eb_bh,
6672 int blk_wanted, int *blk_given)
6673{
6674 int i, status = 0, real_slot;
6675 struct ocfs2_cached_dealloc_ctxt *dealloc;
6676 struct ocfs2_per_slot_free_list *fl;
6677 struct ocfs2_cached_block_free *bf;
6678 struct ocfs2_extent_block *eb;
6679 struct ocfs2_super *osb =
6680 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
6681
6682 *blk_given = 0;
6683
6684 /* If extent tree doesn't have a dealloc, this is not faulty. Just
6685 * tell upper caller dealloc can't provide any block and it should
6686 * ask for alloc to claim more space.
6687 */
6688 dealloc = et->et_dealloc;
6689 if (!dealloc)
6690 goto bail;
6691
6692 for (i = 0; i < blk_wanted; i++) {
6693 /* Prefer to use local slot */
6694 fl = ocfs2_find_preferred_free_list(EXTENT_ALLOC_SYSTEM_INODE,
6695 osb->slot_num, &real_slot,
6696 dealloc);
6697 /* If no more block can be reused, we should claim more
6698 * from alloc. Just return here normally.
6699 */
6700 if (!fl) {
6701 status = 0;
6702 break;
6703 }
6704
6705 bf = fl->f_first;
6706 fl->f_first = bf->free_next;
6707
6708 new_eb_bh[i] = sb_getblk(osb->sb, bf->free_blk);
6709 if (new_eb_bh[i] == NULL) {
6710 status = -ENOMEM;
6711 mlog_errno(status);
6712 goto bail;
6713 }
6714
6715 mlog(0, "Reusing block(%llu) from "
6716 "dealloc(local slot:%d, real slot:%d)\n",
6717 bf->free_blk, osb->slot_num, real_slot);
6718
6719 ocfs2_set_new_buffer_uptodate(et->et_ci, new_eb_bh[i]);
6720
6721 status = ocfs2_journal_access_eb(handle, et->et_ci,
6722 new_eb_bh[i],
6723 OCFS2_JOURNAL_ACCESS_CREATE);
6724 if (status < 0) {
6725 mlog_errno(status);
6726 goto bail;
6727 }
6728
6729 memset(new_eb_bh[i]->b_data, 0, osb->sb->s_blocksize);
6730 eb = (struct ocfs2_extent_block *) new_eb_bh[i]->b_data;
6731
6732 /* We can't guarantee that buffer head is still cached, so
6733 * polutlate the extent block again.
6734 */
6735 strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
6736 eb->h_blkno = cpu_to_le64(bf->free_blk);
6737 eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
6738 eb->h_suballoc_slot = cpu_to_le16(real_slot);
6739 eb->h_suballoc_loc = cpu_to_le64(bf->free_bg);
6740 eb->h_suballoc_bit = cpu_to_le16(bf->free_bit);
6741 eb->h_list.l_count =
6742 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
6743
6744 /* We'll also be dirtied by the caller, so
6745 * this isn't absolutely necessary.
6746 */
6747 ocfs2_journal_dirty(handle, new_eb_bh[i]);
6748
6749 if (!fl->f_first) {
6750 dealloc->c_first_suballocator = fl->f_next_suballocator;
6751 kfree(fl);
6752 }
6753 kfree(bf);
6754 }
6755
6756 *blk_given = i;
6757
6758bail:
6759 if (unlikely(status < 0)) {
6760 for (i = 0; i < blk_wanted; i++)
6761 brelse(new_eb_bh[i]);
6762 }
6763
6764 return status;
6765}
6766
6588int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 6767int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6589 int type, int slot, u64 suballoc, 6768 int type, int slot, u64 suballoc,
6590 u64 blkno, unsigned int bit) 6769 u64 blkno, unsigned int bit)
@@ -7382,6 +7561,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
7382 struct buffer_head *gd_bh = NULL; 7561 struct buffer_head *gd_bh = NULL;
7383 struct ocfs2_dinode *main_bm; 7562 struct ocfs2_dinode *main_bm;
7384 struct ocfs2_group_desc *gd = NULL; 7563 struct ocfs2_group_desc *gd = NULL;
7564 struct ocfs2_trim_fs_info info, *pinfo = NULL;
7385 7565
7386 start = range->start >> osb->s_clustersize_bits; 7566 start = range->start >> osb->s_clustersize_bits;
7387 len = range->len >> osb->s_clustersize_bits; 7567 len = range->len >> osb->s_clustersize_bits;
@@ -7419,6 +7599,42 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
7419 7599
7420 trace_ocfs2_trim_fs(start, len, minlen); 7600 trace_ocfs2_trim_fs(start, len, minlen);
7421 7601
7602 ocfs2_trim_fs_lock_res_init(osb);
7603 ret = ocfs2_trim_fs_lock(osb, NULL, 1);
7604 if (ret < 0) {
7605 if (ret != -EAGAIN) {
7606 mlog_errno(ret);
7607 ocfs2_trim_fs_lock_res_uninit(osb);
7608 goto out_unlock;
7609 }
7610
7611 mlog(ML_NOTICE, "Wait for trim on device (%s) to "
7612 "finish, which is running from another node.\n",
7613 osb->dev_str);
7614 ret = ocfs2_trim_fs_lock(osb, &info, 0);
7615 if (ret < 0) {
7616 mlog_errno(ret);
7617 ocfs2_trim_fs_lock_res_uninit(osb);
7618 goto out_unlock;
7619 }
7620
7621 if (info.tf_valid && info.tf_success &&
7622 info.tf_start == start && info.tf_len == len &&
7623 info.tf_minlen == minlen) {
7624 /* Avoid sending duplicated trim to a shared device */
7625 mlog(ML_NOTICE, "The same trim on device (%s) was "
7626 "just done from node (%u), return.\n",
7627 osb->dev_str, info.tf_nodenum);
7628 range->len = info.tf_trimlen;
7629 goto out_trimunlock;
7630 }
7631 }
7632
7633 info.tf_nodenum = osb->node_num;
7634 info.tf_start = start;
7635 info.tf_len = len;
7636 info.tf_minlen = minlen;
7637
7422 /* Determine first and last group to examine based on start and len */ 7638 /* Determine first and last group to examine based on start and len */
7423 first_group = ocfs2_which_cluster_group(main_bm_inode, start); 7639 first_group = ocfs2_which_cluster_group(main_bm_inode, start);
7424 if (first_group == osb->first_cluster_group_blkno) 7640 if (first_group == osb->first_cluster_group_blkno)
@@ -7463,6 +7679,13 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
7463 group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); 7679 group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
7464 } 7680 }
7465 range->len = trimmed * sb->s_blocksize; 7681 range->len = trimmed * sb->s_blocksize;
7682
7683 info.tf_trimlen = range->len;
7684 info.tf_success = (ret ? 0 : 1);
7685 pinfo = &info;
7686out_trimunlock:
7687 ocfs2_trim_fs_unlock(osb, pinfo);
7688 ocfs2_trim_fs_lock_res_uninit(osb);
7466out_unlock: 7689out_unlock:
7467 ocfs2_inode_unlock(main_bm_inode, 0); 7690 ocfs2_inode_unlock(main_bm_inode, 0);
7468 brelse(main_bm_bh); 7691 brelse(main_bm_bh);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 27b75cf32cfa..250bcacdf9e9 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -61,6 +61,7 @@ struct ocfs2_extent_tree {
61 ocfs2_journal_access_func et_root_journal_access; 61 ocfs2_journal_access_func et_root_journal_access;
62 void *et_object; 62 void *et_object;
63 unsigned int et_max_leaf_clusters; 63 unsigned int et_max_leaf_clusters;
64 struct ocfs2_cached_dealloc_ctxt *et_dealloc;
64}; 65};
65 66
66/* 67/*
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d1516327b787..e8e205bf2e41 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -797,6 +797,7 @@ struct ocfs2_write_ctxt {
797 struct ocfs2_cached_dealloc_ctxt w_dealloc; 797 struct ocfs2_cached_dealloc_ctxt w_dealloc;
798 798
799 struct list_head w_unwritten_list; 799 struct list_head w_unwritten_list;
800 unsigned int w_unwritten_count;
800}; 801};
801 802
802void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) 803void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
@@ -1386,6 +1387,7 @@ retry:
1386 desc->c_clear_unwritten = 0; 1387 desc->c_clear_unwritten = 0;
1387 list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list); 1388 list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list);
1388 list_add_tail(&new->ue_node, &wc->w_unwritten_list); 1389 list_add_tail(&new->ue_node, &wc->w_unwritten_list);
1390 wc->w_unwritten_count++;
1389 new = NULL; 1391 new = NULL;
1390unlock: 1392unlock:
1391 spin_unlock(&oi->ip_lock); 1393 spin_unlock(&oi->ip_lock);
@@ -2256,7 +2258,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
2256 ue->ue_phys = desc->c_phys; 2258 ue->ue_phys = desc->c_phys;
2257 2259
2258 list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list); 2260 list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list);
2259 dwc->dw_zero_count++; 2261 dwc->dw_zero_count += wc->w_unwritten_count;
2260 } 2262 }
2261 2263
2262 ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); 2264 ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc);
@@ -2330,6 +2332,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
2330 2332
2331 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 2333 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
2332 2334
2335 /* Attach dealloc with extent tree in case that we may reuse extents
2336 * which are already unlinked from current extent tree due to extent
2337 * rotation and merging.
2338 */
2339 et.et_dealloc = &dealloc;
2340
2333 ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2, 2341 ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
2334 &data_ac, &meta_ac); 2342 &data_ac, &meta_ac);
2335 if (ret) { 2343 if (ret) {
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 62e8ec619b4c..af2e7473956e 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -314,12 +314,13 @@ void o2quo_conn_err(u8 node)
314 node, qs->qs_connected); 314 node, qs->qs_connected);
315 315
316 clear_bit(node, qs->qs_conn_bm); 316 clear_bit(node, qs->qs_conn_bm);
317
318 if (test_bit(node, qs->qs_hb_bm))
319 o2quo_set_hold(qs, node);
317 } 320 }
318 321
319 mlog(0, "node %u, %d total\n", node, qs->qs_connected); 322 mlog(0, "node %u, %d total\n", node, qs->qs_connected);
320 323
321 if (test_bit(node, qs->qs_hb_bm))
322 o2quo_set_hold(qs, node);
323 324
324 spin_unlock(&qs->qs_lock); 325 spin_unlock(&qs->qs_lock);
325} 326}
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index b95e7df5b76a..0276f7f8d5e6 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -196,7 +196,7 @@ struct o2net_msg_handler {
196 u32 nh_msg_type; 196 u32 nh_msg_type;
197 u32 nh_key; 197 u32 nh_key;
198 o2net_msg_handler_func *nh_func; 198 o2net_msg_handler_func *nh_func;
199 o2net_msg_handler_func *nh_func_data; 199 void *nh_func_data;
200 o2net_post_msg_handler_func 200 o2net_post_msg_handler_func
201 *nh_post_func; 201 *nh_post_func;
202 struct kref nh_kref; 202 struct kref nh_kref;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 32f9c72dff17..b7520e20a770 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1958,7 +1958,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
1958 1958
1959 trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); 1959 trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
1960 1960
1961 error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level); 1961 error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
1962 if (lock_level && error >= 0) { 1962 if (lock_level && error >= 0) {
1963 /* We release EX lock which used to update atime 1963 /* We release EX lock which used to update atime
1964 * and get PR lock again to reduce contention 1964 * and get PR lock again to reduce contention
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9c3e0f13ca87..a7df226f9449 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1122,13 +1122,6 @@ recheck:
1122 /* sleep if we haven't finished voting yet */ 1122 /* sleep if we haven't finished voting yet */
1123 if (sleep) { 1123 if (sleep) {
1124 unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS); 1124 unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS);
1125
1126 /*
1127 if (kref_read(&mle->mle_refs) < 2)
1128 mlog(ML_ERROR, "mle (%p) refs=%d, name=%.*s\n", mle,
1129 kref_read(&mle->mle_refs),
1130 res->lockname.len, res->lockname.name);
1131 */
1132 atomic_set(&mle->woken, 0); 1125 atomic_set(&mle->woken, 0);
1133 (void)wait_event_timeout(mle->wq, 1126 (void)wait_event_timeout(mle->wq,
1134 (atomic_read(&mle->woken) == 1), 1127 (atomic_read(&mle->woken) == 1),
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4689940a953c..9479f99c2145 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -259,6 +259,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
259 .flags = 0, 259 .flags = 0,
260}; 260};
261 261
262static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
263 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
264};
265
262static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 266static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
263 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 267 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
264}; 268};
@@ -676,6 +680,24 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
676 &ocfs2_nfs_sync_lops, osb); 680 &ocfs2_nfs_sync_lops, osb);
677} 681}
678 682
683void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
684{
685 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
686
687 ocfs2_lock_res_init_once(lockres);
688 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, lockres->l_name);
689 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_TRIM_FS,
690 &ocfs2_trim_fs_lops, osb);
691}
692
693void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb)
694{
695 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
696
697 ocfs2_simple_drop_lockres(osb, lockres);
698 ocfs2_lock_res_free(lockres);
699}
700
679static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 701static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
680 struct ocfs2_super *osb) 702 struct ocfs2_super *osb)
681{ 703{
@@ -1742,6 +1764,27 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1742 return status; 1764 return status;
1743} 1765}
1744 1766
1767int ocfs2_try_rw_lock(struct inode *inode, int write)
1768{
1769 int status, level;
1770 struct ocfs2_lock_res *lockres;
1771 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1772
1773 mlog(0, "inode %llu try to take %s RW lock\n",
1774 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1775 write ? "EXMODE" : "PRMODE");
1776
1777 if (ocfs2_mount_local(osb))
1778 return 0;
1779
1780 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1781
1782 level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1783
1784 status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
1785 return status;
1786}
1787
1745void ocfs2_rw_unlock(struct inode *inode, int write) 1788void ocfs2_rw_unlock(struct inode *inode, int write)
1746{ 1789{
1747 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1790 int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
@@ -2486,6 +2529,15 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
2486 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2529 ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2487 if (ret == -EAGAIN) { 2530 if (ret == -EAGAIN) {
2488 unlock_page(page); 2531 unlock_page(page);
2532 /*
2533 * If we can't get inode lock immediately, we should not return
2534 * directly here, since this will lead to a softlockup problem.
2535 * The method is to get a blocking lock and immediately unlock
2536 * before returning, this can avoid CPU resource waste due to
2537 * lots of retries, and benefits fairness in getting lock.
2538 */
2539 if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2540 ocfs2_inode_unlock(inode, ex);
2489 ret = AOP_TRUNCATED_PAGE; 2541 ret = AOP_TRUNCATED_PAGE;
2490 } 2542 }
2491 2543
@@ -2494,13 +2546,18 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
2494 2546
2495int ocfs2_inode_lock_atime(struct inode *inode, 2547int ocfs2_inode_lock_atime(struct inode *inode,
2496 struct vfsmount *vfsmnt, 2548 struct vfsmount *vfsmnt,
2497 int *level) 2549 int *level, int wait)
2498{ 2550{
2499 int ret; 2551 int ret;
2500 2552
2501 ret = ocfs2_inode_lock(inode, NULL, 0); 2553 if (wait)
2554 ret = ocfs2_inode_lock(inode, NULL, 0);
2555 else
2556 ret = ocfs2_try_inode_lock(inode, NULL, 0);
2557
2502 if (ret < 0) { 2558 if (ret < 0) {
2503 mlog_errno(ret); 2559 if (ret != -EAGAIN)
2560 mlog_errno(ret);
2504 return ret; 2561 return ret;
2505 } 2562 }
2506 2563
@@ -2512,9 +2569,14 @@ int ocfs2_inode_lock_atime(struct inode *inode,
2512 struct buffer_head *bh = NULL; 2569 struct buffer_head *bh = NULL;
2513 2570
2514 ocfs2_inode_unlock(inode, 0); 2571 ocfs2_inode_unlock(inode, 0);
2515 ret = ocfs2_inode_lock(inode, &bh, 1); 2572 if (wait)
2573 ret = ocfs2_inode_lock(inode, &bh, 1);
2574 else
2575 ret = ocfs2_try_inode_lock(inode, &bh, 1);
2576
2516 if (ret < 0) { 2577 if (ret < 0) {
2517 mlog_errno(ret); 2578 if (ret != -EAGAIN)
2579 mlog_errno(ret);
2518 return ret; 2580 return ret;
2519 } 2581 }
2520 *level = 1; 2582 *level = 1;
@@ -2745,6 +2807,70 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
2745 ex ? LKM_EXMODE : LKM_PRMODE); 2807 ex ? LKM_EXMODE : LKM_PRMODE);
2746} 2808}
2747 2809
2810int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
2811 struct ocfs2_trim_fs_info *info, int trylock)
2812{
2813 int status;
2814 struct ocfs2_trim_fs_lvb *lvb;
2815 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
2816
2817 if (info)
2818 info->tf_valid = 0;
2819
2820 if (ocfs2_is_hard_readonly(osb))
2821 return -EROFS;
2822
2823 if (ocfs2_mount_local(osb))
2824 return 0;
2825
2826 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX,
2827 trylock ? DLM_LKF_NOQUEUE : 0, 0);
2828 if (status < 0) {
2829 if (status != -EAGAIN)
2830 mlog_errno(status);
2831 return status;
2832 }
2833
2834 if (info) {
2835 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2836 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2837 lvb->lvb_version == OCFS2_TRIMFS_LVB_VERSION) {
2838 info->tf_valid = 1;
2839 info->tf_success = lvb->lvb_success;
2840 info->tf_nodenum = be32_to_cpu(lvb->lvb_nodenum);
2841 info->tf_start = be64_to_cpu(lvb->lvb_start);
2842 info->tf_len = be64_to_cpu(lvb->lvb_len);
2843 info->tf_minlen = be64_to_cpu(lvb->lvb_minlen);
2844 info->tf_trimlen = be64_to_cpu(lvb->lvb_trimlen);
2845 }
2846 }
2847
2848 return status;
2849}
2850
2851void ocfs2_trim_fs_unlock(struct ocfs2_super *osb,
2852 struct ocfs2_trim_fs_info *info)
2853{
2854 struct ocfs2_trim_fs_lvb *lvb;
2855 struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
2856
2857 if (ocfs2_mount_local(osb))
2858 return;
2859
2860 if (info) {
2861 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2862 lvb->lvb_version = OCFS2_TRIMFS_LVB_VERSION;
2863 lvb->lvb_success = info->tf_success;
2864 lvb->lvb_nodenum = cpu_to_be32(info->tf_nodenum);
2865 lvb->lvb_start = cpu_to_be64(info->tf_start);
2866 lvb->lvb_len = cpu_to_be64(info->tf_len);
2867 lvb->lvb_minlen = cpu_to_be64(info->tf_minlen);
2868 lvb->lvb_trimlen = cpu_to_be64(info->tf_trimlen);
2869 }
2870
2871 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2872}
2873
2748int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2874int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2749{ 2875{
2750 int ret; 2876 int ret;
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index a7fc18ba0dc1..256e0a9067b8 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -70,6 +70,29 @@ struct ocfs2_orphan_scan_lvb {
70 __be32 lvb_os_seqno; 70 __be32 lvb_os_seqno;
71}; 71};
72 72
73#define OCFS2_TRIMFS_LVB_VERSION 1
74
75struct ocfs2_trim_fs_lvb {
76 __u8 lvb_version;
77 __u8 lvb_success;
78 __u8 lvb_reserved[2];
79 __be32 lvb_nodenum;
80 __be64 lvb_start;
81 __be64 lvb_len;
82 __be64 lvb_minlen;
83 __be64 lvb_trimlen;
84};
85
86struct ocfs2_trim_fs_info {
87 u8 tf_valid; /* lvb is valid, or not */
88 u8 tf_success; /* trim is successful, or not */
89 u32 tf_nodenum; /* osb node number */
90 u64 tf_start; /* trim start offset in clusters */
91 u64 tf_len; /* trim end offset in clusters */
92 u64 tf_minlen; /* trim minimum contiguous free clusters */
93 u64 tf_trimlen; /* trimmed length in bytes */
94};
95
73struct ocfs2_lock_holder { 96struct ocfs2_lock_holder {
74 struct list_head oh_list; 97 struct list_head oh_list;
75 struct pid *oh_owner_pid; 98 struct pid *oh_owner_pid;
@@ -116,13 +139,14 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
116int ocfs2_create_new_inode_locks(struct inode *inode); 139int ocfs2_create_new_inode_locks(struct inode *inode);
117int ocfs2_drop_inode_locks(struct inode *inode); 140int ocfs2_drop_inode_locks(struct inode *inode);
118int ocfs2_rw_lock(struct inode *inode, int write); 141int ocfs2_rw_lock(struct inode *inode, int write);
142int ocfs2_try_rw_lock(struct inode *inode, int write);
119void ocfs2_rw_unlock(struct inode *inode, int write); 143void ocfs2_rw_unlock(struct inode *inode, int write);
120int ocfs2_open_lock(struct inode *inode); 144int ocfs2_open_lock(struct inode *inode);
121int ocfs2_try_open_lock(struct inode *inode, int write); 145int ocfs2_try_open_lock(struct inode *inode, int write);
122void ocfs2_open_unlock(struct inode *inode); 146void ocfs2_open_unlock(struct inode *inode);
123int ocfs2_inode_lock_atime(struct inode *inode, 147int ocfs2_inode_lock_atime(struct inode *inode,
124 struct vfsmount *vfsmnt, 148 struct vfsmount *vfsmnt,
125 int *level); 149 int *level, int wait);
126int ocfs2_inode_lock_full_nested(struct inode *inode, 150int ocfs2_inode_lock_full_nested(struct inode *inode,
127 struct buffer_head **ret_bh, 151 struct buffer_head **ret_bh,
128 int ex, 152 int ex,
@@ -140,6 +164,9 @@ int ocfs2_inode_lock_with_page(struct inode *inode,
140/* 99% of the time we don't want to supply any additional flags -- 164/* 99% of the time we don't want to supply any additional flags --
141 * those are for very specific cases only. */ 165 * those are for very specific cases only. */
142#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL) 166#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
167#define ocfs2_try_inode_lock(i, b, e)\
168 ocfs2_inode_lock_full_nested(i, b, e, OCFS2_META_LOCK_NOQUEUE,\
169 OI_LS_NORMAL)
143void ocfs2_inode_unlock(struct inode *inode, 170void ocfs2_inode_unlock(struct inode *inode,
144 int ex); 171 int ex);
145int ocfs2_super_lock(struct ocfs2_super *osb, 172int ocfs2_super_lock(struct ocfs2_super *osb,
@@ -153,6 +180,12 @@ int ocfs2_rename_lock(struct ocfs2_super *osb);
153void ocfs2_rename_unlock(struct ocfs2_super *osb); 180void ocfs2_rename_unlock(struct ocfs2_super *osb);
154int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); 181int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex);
155void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex); 182void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex);
183void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb);
184void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb);
185int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
186 struct ocfs2_trim_fs_info *info, int trylock);
187void ocfs2_trim_fs_unlock(struct ocfs2_super *osb,
188 struct ocfs2_trim_fs_info *info);
156int ocfs2_dentry_lock(struct dentry *dentry, int ex); 189int ocfs2_dentry_lock(struct dentry *dentry, int ex);
157void ocfs2_dentry_unlock(struct dentry *dentry, int ex); 190void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
158int ocfs2_file_lock(struct file *file, int ex, int trylock); 191int ocfs2_file_lock(struct file *file, int ex, int trylock);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e4719e0a3f99..06cb96462bf9 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -38,6 +38,7 @@
38#include "inode.h" 38#include "inode.h"
39#include "super.h" 39#include "super.h"
40#include "symlink.h" 40#include "symlink.h"
41#include "aops.h"
41#include "ocfs2_trace.h" 42#include "ocfs2_trace.h"
42 43
43#include "buffer_head_io.h" 44#include "buffer_head_io.h"
@@ -832,6 +833,50 @@ out:
832 return ret; 833 return ret;
833} 834}
834 835
836/* Is IO overwriting allocated blocks? */
837int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
838 u64 map_start, u64 map_len)
839{
840 int ret = 0, is_last;
841 u32 mapping_end, cpos;
842 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
843 struct ocfs2_extent_rec rec;
844
845 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
846 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
847 return ret;
848 else
849 return -EAGAIN;
850 }
851
852 cpos = map_start >> osb->s_clustersize_bits;
853 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
854 map_start + map_len);
855 is_last = 0;
856 while (cpos < mapping_end && !is_last) {
857 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
858 NULL, &rec, &is_last);
859 if (ret) {
860 mlog_errno(ret);
861 goto out;
862 }
863
864 if (rec.e_blkno == 0ULL)
865 break;
866
867 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
868 break;
869
870 cpos = le32_to_cpu(rec.e_cpos) +
871 le16_to_cpu(rec.e_leaf_clusters);
872 }
873
874 if (cpos < mapping_end)
875 ret = -EAGAIN;
876out:
877 return ret;
878}
879
835int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) 880int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
836{ 881{
837 struct inode *inode = file->f_mapping->host; 882 struct inode *inode = file->f_mapping->host;
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 67ea57d2fd59..1057586ec19f 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
54 u64 map_start, u64 map_len); 54 u64 map_start, u64 map_len);
55 55
56int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
57 u64 map_start, u64 map_len);
58
56int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin); 59int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin);
57 60
58int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 61int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a1d051055472..5d1784a365a3 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
140 spin_unlock(&oi->ip_lock); 140 spin_unlock(&oi->ip_lock);
141 } 141 }
142 142
143 file->f_mode |= FMODE_NOWAIT;
144
143leave: 145leave:
144 return status; 146 return status;
145} 147}
@@ -2132,12 +2134,12 @@ out:
2132} 2134}
2133 2135
2134static int ocfs2_prepare_inode_for_write(struct file *file, 2136static int ocfs2_prepare_inode_for_write(struct file *file,
2135 loff_t pos, 2137 loff_t pos, size_t count, int wait)
2136 size_t count)
2137{ 2138{
2138 int ret = 0, meta_level = 0; 2139 int ret = 0, meta_level = 0, overwrite_io = 0;
2139 struct dentry *dentry = file->f_path.dentry; 2140 struct dentry *dentry = file->f_path.dentry;
2140 struct inode *inode = d_inode(dentry); 2141 struct inode *inode = d_inode(dentry);
2142 struct buffer_head *di_bh = NULL;
2141 loff_t end; 2143 loff_t end;
2142 2144
2143 /* 2145 /*
@@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2145 * if we need to make modifications here. 2147 * if we need to make modifications here.
2146 */ 2148 */
2147 for(;;) { 2149 for(;;) {
2148 ret = ocfs2_inode_lock(inode, NULL, meta_level); 2150 if (wait)
2151 ret = ocfs2_inode_lock(inode, NULL, meta_level);
2152 else
2153 ret = ocfs2_try_inode_lock(inode,
2154 overwrite_io ? NULL : &di_bh, meta_level);
2149 if (ret < 0) { 2155 if (ret < 0) {
2150 meta_level = -1; 2156 meta_level = -1;
2151 mlog_errno(ret); 2157 if (ret != -EAGAIN)
2158 mlog_errno(ret);
2152 goto out; 2159 goto out;
2153 } 2160 }
2154 2161
2162 /*
2163 * Check if IO will overwrite allocated blocks in case
2164 * IOCB_NOWAIT flag is set.
2165 */
2166 if (!wait && !overwrite_io) {
2167 overwrite_io = 1;
2168 if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
2169 ret = -EAGAIN;
2170 goto out_unlock;
2171 }
2172
2173 ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
2174 brelse(di_bh);
2175 di_bh = NULL;
2176 up_read(&OCFS2_I(inode)->ip_alloc_sem);
2177 if (ret < 0) {
2178 if (ret != -EAGAIN)
2179 mlog_errno(ret);
2180 goto out_unlock;
2181 }
2182 }
2183
2155 /* Clear suid / sgid if necessary. We do this here 2184 /* Clear suid / sgid if necessary. We do this here
2156 * instead of later in the write path because 2185 * instead of later in the write path because
2157 * remove_suid() calls ->setattr without any hint that 2186 * remove_suid() calls ->setattr without any hint that
@@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2199 2228
2200out_unlock: 2229out_unlock:
2201 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, 2230 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
2202 pos, count); 2231 pos, count, wait);
2232
2233 brelse(di_bh);
2203 2234
2204 if (meta_level >= 0) 2235 if (meta_level >= 0)
2205 ocfs2_inode_unlock(inode, meta_level); 2236 ocfs2_inode_unlock(inode, meta_level);
@@ -2211,7 +2242,7 @@ out:
2211static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, 2242static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2212 struct iov_iter *from) 2243 struct iov_iter *from)
2213{ 2244{
2214 int direct_io, rw_level; 2245 int rw_level;
2215 ssize_t written = 0; 2246 ssize_t written = 0;
2216 ssize_t ret; 2247 ssize_t ret;
2217 size_t count = iov_iter_count(from); 2248 size_t count = iov_iter_count(from);
@@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2223 void *saved_ki_complete = NULL; 2254 void *saved_ki_complete = NULL;
2224 int append_write = ((iocb->ki_pos + count) >= 2255 int append_write = ((iocb->ki_pos + count) >=
2225 i_size_read(inode) ? 1 : 0); 2256 i_size_read(inode) ? 1 : 0);
2257 int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2258 int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
2226 2259
2227 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, 2260 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
2228 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2261 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2230,12 +2263,17 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2230 file->f_path.dentry->d_name.name, 2263 file->f_path.dentry->d_name.name,
2231 (unsigned int)from->nr_segs); /* GRRRRR */ 2264 (unsigned int)from->nr_segs); /* GRRRRR */
2232 2265
2266 if (!direct_io && nowait)
2267 return -EOPNOTSUPP;
2268
2233 if (count == 0) 2269 if (count == 0)
2234 return 0; 2270 return 0;
2235 2271
2236 direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; 2272 if (nowait) {
2237 2273 if (!inode_trylock(inode))
2238 inode_lock(inode); 2274 return -EAGAIN;
2275 } else
2276 inode_lock(inode);
2239 2277
2240 /* 2278 /*
2241 * Concurrent O_DIRECT writes are allowed with 2279 * Concurrent O_DIRECT writes are allowed with
@@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2244 */ 2282 */
2245 rw_level = (!direct_io || full_coherency || append_write); 2283 rw_level = (!direct_io || full_coherency || append_write);
2246 2284
2247 ret = ocfs2_rw_lock(inode, rw_level); 2285 if (nowait)
2286 ret = ocfs2_try_rw_lock(inode, rw_level);
2287 else
2288 ret = ocfs2_rw_lock(inode, rw_level);
2248 if (ret < 0) { 2289 if (ret < 0) {
2249 mlog_errno(ret); 2290 if (ret != -EAGAIN)
2291 mlog_errno(ret);
2250 goto out_mutex; 2292 goto out_mutex;
2251 } 2293 }
2252 2294
@@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2260 * other nodes to drop their caches. Buffered I/O 2302 * other nodes to drop their caches. Buffered I/O
2261 * already does this in write_begin(). 2303 * already does this in write_begin().
2262 */ 2304 */
2263 ret = ocfs2_inode_lock(inode, NULL, 1); 2305 if (nowait)
2306 ret = ocfs2_try_inode_lock(inode, NULL, 1);
2307 else
2308 ret = ocfs2_inode_lock(inode, NULL, 1);
2264 if (ret < 0) { 2309 if (ret < 0) {
2265 mlog_errno(ret); 2310 if (ret != -EAGAIN)
2311 mlog_errno(ret);
2266 goto out; 2312 goto out;
2267 } 2313 }
2268 2314
@@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2277 } 2323 }
2278 count = ret; 2324 count = ret;
2279 2325
2280 ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count); 2326 ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
2281 if (ret < 0) { 2327 if (ret < 0) {
2282 mlog_errno(ret); 2328 if (ret != -EAGAIN)
2329 mlog_errno(ret);
2283 goto out; 2330 goto out;
2284 } 2331 }
2285 2332
@@ -2355,6 +2402,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2355 int ret = 0, rw_level = -1, lock_level = 0; 2402 int ret = 0, rw_level = -1, lock_level = 0;
2356 struct file *filp = iocb->ki_filp; 2403 struct file *filp = iocb->ki_filp;
2357 struct inode *inode = file_inode(filp); 2404 struct inode *inode = file_inode(filp);
2405 int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2406 int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
2358 2407
2359 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, 2408 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
2360 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2409 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2369,14 +2418,22 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2369 goto bail; 2418 goto bail;
2370 } 2419 }
2371 2420
2421 if (!direct_io && nowait)
2422 return -EOPNOTSUPP;
2423
2372 /* 2424 /*
2373 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2425 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2374 * need locks to protect pending reads from racing with truncate. 2426 * need locks to protect pending reads from racing with truncate.
2375 */ 2427 */
2376 if (iocb->ki_flags & IOCB_DIRECT) { 2428 if (direct_io) {
2377 ret = ocfs2_rw_lock(inode, 0); 2429 if (nowait)
2430 ret = ocfs2_try_rw_lock(inode, 0);
2431 else
2432 ret = ocfs2_rw_lock(inode, 0);
2433
2378 if (ret < 0) { 2434 if (ret < 0) {
2379 mlog_errno(ret); 2435 if (ret != -EAGAIN)
2436 mlog_errno(ret);
2380 goto bail; 2437 goto bail;
2381 } 2438 }
2382 rw_level = 0; 2439 rw_level = 0;
@@ -2393,9 +2450,11 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2393 * like i_size. This allows the checks down below 2450 * like i_size. This allows the checks down below
2394 * generic_file_aio_read() a chance of actually working. 2451 * generic_file_aio_read() a chance of actually working.
2395 */ 2452 */
2396 ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); 2453 ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
2454 !nowait);
2397 if (ret < 0) { 2455 if (ret < 0) {
2398 mlog_errno(ret); 2456 if (ret != -EAGAIN)
2457 mlog_errno(ret);
2399 goto bail; 2458 goto bail;
2400 } 2459 }
2401 ocfs2_inode_unlock(inode, lock_level); 2460 ocfs2_inode_unlock(inode, lock_level);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 36304434eacf..e5dcea6cee5f 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -666,23 +666,24 @@ static int __ocfs2_journal_access(handle_t *handle,
666 /* we can safely remove this assertion after testing. */ 666 /* we can safely remove this assertion after testing. */
667 if (!buffer_uptodate(bh)) { 667 if (!buffer_uptodate(bh)) {
668 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n"); 668 mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n");
669 mlog(ML_ERROR, "b_blocknr=%llu\n", 669 mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n",
670 (unsigned long long)bh->b_blocknr); 670 (unsigned long long)bh->b_blocknr, bh->b_state);
671 671
672 lock_buffer(bh); 672 lock_buffer(bh);
673 /* 673 /*
674 * A previous attempt to write this buffer head failed. 674 * A previous transaction with a couple of buffer heads fail
675 * Nothing we can do but to retry the write and hope for 675 * to checkpoint, so all the bhs are marked as BH_Write_EIO.
676 * the best. 676 * For current transaction, the bh is just among those error
677 * bhs which previous transaction handle. We can't just clear
678 * its BH_Write_EIO and reuse directly, since other bhs are
679 * not written to disk yet and that will cause metadata
680 * inconsistency. So we should set fs read-only to avoid
681 * further damage.
677 */ 682 */
678 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) { 683 if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) {
679 clear_buffer_write_io_error(bh);
680 set_buffer_uptodate(bh);
681 }
682
683 if (!buffer_uptodate(bh)) {
684 unlock_buffer(bh); 684 unlock_buffer(bh);
685 return -EIO; 685 return ocfs2_error(osb->sb, "A previous attempt to "
686 "write this buffer head failed\n");
686 } 687 }
687 unlock_buffer(bh); 688 unlock_buffer(bh);
688 } 689 }
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 098f5c712569..fb9a20e3d608 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
184 int ret = 0, lock_level = 0; 184 int ret = 0, lock_level = 0;
185 185
186 ret = ocfs2_inode_lock_atime(file_inode(file), 186 ret = ocfs2_inode_lock_atime(file_inode(file),
187 file->f_path.mnt, &lock_level); 187 file->f_path.mnt, &lock_level, 1);
188 if (ret < 0) { 188 if (ret < 0) {
189 mlog_errno(ret); 189 mlog_errno(ret);
190 goto out; 190 goto out;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9a50f222ac97..6867eef2e06b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -404,6 +404,7 @@ struct ocfs2_super
404 struct ocfs2_lock_res osb_super_lockres; 404 struct ocfs2_lock_res osb_super_lockres;
405 struct ocfs2_lock_res osb_rename_lockres; 405 struct ocfs2_lock_res osb_rename_lockres;
406 struct ocfs2_lock_res osb_nfs_sync_lockres; 406 struct ocfs2_lock_res osb_nfs_sync_lockres;
407 struct ocfs2_lock_res osb_trim_fs_lockres;
407 struct ocfs2_dlm_debug *osb_dlm_debug; 408 struct ocfs2_dlm_debug *osb_dlm_debug;
408 409
409 struct dentry *osb_debug_root; 410 struct dentry *osb_debug_root;
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index d277aabf5dfb..7051b994c776 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -50,6 +50,7 @@ enum ocfs2_lock_type {
50 OCFS2_LOCK_TYPE_NFS_SYNC, 50 OCFS2_LOCK_TYPE_NFS_SYNC,
51 OCFS2_LOCK_TYPE_ORPHAN_SCAN, 51 OCFS2_LOCK_TYPE_ORPHAN_SCAN,
52 OCFS2_LOCK_TYPE_REFCOUNT, 52 OCFS2_LOCK_TYPE_REFCOUNT,
53 OCFS2_LOCK_TYPE_TRIM_FS,
53 OCFS2_NUM_LOCK_TYPES 54 OCFS2_NUM_LOCK_TYPES
54}; 55};
55 56
@@ -93,6 +94,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
93 case OCFS2_LOCK_TYPE_REFCOUNT: 94 case OCFS2_LOCK_TYPE_REFCOUNT:
94 c = 'T'; 95 c = 'T';
95 break; 96 break;
97 case OCFS2_LOCK_TYPE_TRIM_FS:
98 c = 'I';
99 break;
96 default: 100 default:
97 c = '\0'; 101 c = '\0';
98 } 102 }
@@ -115,6 +119,7 @@ static char *ocfs2_lock_type_strings[] = {
115 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", 119 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
116 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", 120 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
117 [OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount", 121 [OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount",
122 [OCFS2_LOCK_TYPE_TRIM_FS] = "TrimFs",
118}; 123};
119 124
120static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 125static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index a0b5d00ef0a9..e2a11aaece10 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1449,20 +1449,22 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
1449 1449
1450TRACE_EVENT(ocfs2_prepare_inode_for_write, 1450TRACE_EVENT(ocfs2_prepare_inode_for_write,
1451 TP_PROTO(unsigned long long ino, unsigned long long saved_pos, 1451 TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
1452 unsigned long count), 1452 unsigned long count, int wait),
1453 TP_ARGS(ino, saved_pos, count), 1453 TP_ARGS(ino, saved_pos, count, wait),
1454 TP_STRUCT__entry( 1454 TP_STRUCT__entry(
1455 __field(unsigned long long, ino) 1455 __field(unsigned long long, ino)
1456 __field(unsigned long long, saved_pos) 1456 __field(unsigned long long, saved_pos)
1457 __field(unsigned long, count) 1457 __field(unsigned long, count)
1458 __field(int, wait)
1458 ), 1459 ),
1459 TP_fast_assign( 1460 TP_fast_assign(
1460 __entry->ino = ino; 1461 __entry->ino = ino;
1461 __entry->saved_pos = saved_pos; 1462 __entry->saved_pos = saved_pos;
1462 __entry->count = count; 1463 __entry->count = count;
1464 __entry->wait = wait;
1463 ), 1465 ),
1464 TP_printk("%llu %llu %lu", __entry->ino, 1466 TP_printk("%llu %llu %lu %d", __entry->ino,
1465 __entry->saved_pos, __entry->count) 1467 __entry->saved_pos, __entry->count, __entry->wait)
1466); 1468);
1467 1469
1468DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret); 1470DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 9f0b95abc09f..d8f5f6ce99dc 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2438,6 +2438,8 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
2438 } 2438 }
2439 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 2439 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
2440 if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { 2440 if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
2441 if (undo_fn)
2442 jbd_unlock_bh_state(group_bh);
2441 return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n", 2443 return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
2442 (unsigned long long)le64_to_cpu(bg->bg_blkno), 2444 (unsigned long long)le64_to_cpu(bg->bg_blkno),
2443 le16_to_cpu(bg->bg_bits), 2445 le16_to_cpu(bg->bg_bits),
@@ -2563,16 +2565,16 @@ static int _ocfs2_free_clusters(handle_t *handle,
2563 int status; 2565 int status;
2564 u16 bg_start_bit; 2566 u16 bg_start_bit;
2565 u64 bg_blkno; 2567 u64 bg_blkno;
2566 struct ocfs2_dinode *fe;
2567 2568
2568 /* You can't ever have a contiguous set of clusters 2569 /* You can't ever have a contiguous set of clusters
2569 * bigger than a block group bitmap so we never have to worry 2570 * bigger than a block group bitmap so we never have to worry
2570 * about looping on them. 2571 * about looping on them.
2571 * This is expensive. We can safely remove once this stuff has 2572 * This is expensive. We can safely remove once this stuff has
2572 * gotten tested really well. */ 2573 * gotten tested really well. */
2573 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); 2574 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb,
2575 ocfs2_blocks_to_clusters(bitmap_inode->i_sb,
2576 start_blk)));
2574 2577
2575 fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
2576 2578
2577 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, 2579 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
2578 &bg_start_bit); 2580 &bg_start_bit);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 80efa5699fb0..ffa4952d432b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -474,9 +474,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
474 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 474 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
475 if (!new) { 475 if (!new) {
476 ocfs2_release_system_inodes(osb); 476 ocfs2_release_system_inodes(osb);
477 status = -EINVAL; 477 status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
478 mlog_errno(status); 478 mlog_errno(status);
479 /* FIXME: Should ERROR_RO_FS */
480 mlog(ML_ERROR, "Unable to load system inode %d, " 479 mlog(ML_ERROR, "Unable to load system inode %d, "
481 "possibly corrupt fs?", i); 480 "possibly corrupt fs?", i);
482 goto bail; 481 goto bail;
@@ -505,7 +504,7 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
505 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 504 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
506 if (!new) { 505 if (!new) {
507 ocfs2_release_system_inodes(osb); 506 ocfs2_release_system_inodes(osb);
508 status = -EINVAL; 507 status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
509 mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n", 508 mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n",
510 status, i, osb->slot_num); 509 status, i, osb->slot_num);
511 goto bail; 510 goto bail;
@@ -1208,14 +1207,15 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1208read_super_error: 1207read_super_error:
1209 brelse(bh); 1208 brelse(bh);
1210 1209
1210 if (status)
1211 mlog_errno(status);
1212
1211 if (osb) { 1213 if (osb) {
1212 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1214 atomic_set(&osb->vol_state, VOLUME_DISABLED);
1213 wake_up(&osb->osb_mount_event); 1215 wake_up(&osb->osb_mount_event);
1214 ocfs2_dismount_volume(sb, 1); 1216 ocfs2_dismount_volume(sb, 1);
1215 } 1217 }
1216 1218
1217 if (status)
1218 mlog_errno(status);
1219 return status; 1219 return status;
1220} 1220}
1221 1221
@@ -1843,6 +1843,9 @@ static int ocfs2_mount_volume(struct super_block *sb)
1843 status = ocfs2_dlm_init(osb); 1843 status = ocfs2_dlm_init(osb);
1844 if (status < 0) { 1844 if (status < 0) {
1845 mlog_errno(status); 1845 mlog_errno(status);
1846 if (status == -EBADR && ocfs2_userspace_stack(osb))
1847 mlog(ML_ERROR, "couldn't mount because cluster name on"
1848 " disk does not match the running cluster name.\n");
1846 goto leave; 1849 goto leave;
1847 } 1850 }
1848 1851
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c5898c59d411..c261c1dfd374 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -638,14 +638,17 @@ int ocfs2_calc_xattr_init(struct inode *dir,
638 si->value_len); 638 si->value_len);
639 639
640 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 640 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
641 down_read(&OCFS2_I(dir)->ip_xattr_sem);
641 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, 642 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
642 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, 643 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
643 "", NULL, 0); 644 "", NULL, 0);
645 up_read(&OCFS2_I(dir)->ip_xattr_sem);
644 if (acl_len > 0) { 646 if (acl_len > 0) {
645 a_size = ocfs2_xattr_entry_real_size(0, acl_len); 647 a_size = ocfs2_xattr_entry_real_size(0, acl_len);
646 if (S_ISDIR(mode)) 648 if (S_ISDIR(mode))
647 a_size <<= 1; 649 a_size <<= 1;
648 } else if (acl_len != 0 && acl_len != -ENODATA) { 650 } else if (acl_len != 0 && acl_len != -ENODATA) {
651 ret = acl_len;
649 mlog_errno(ret); 652 mlog_errno(ret);
650 return ret; 653 return ret;
651 } 654 }
@@ -6415,7 +6418,7 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
6415 * and then insert the extents one by one. 6418 * and then insert the extents one by one.
6416 */ 6419 */
6417 if (xv->xr_list.l_tree_depth) { 6420 if (xv->xr_list.l_tree_depth) {
6418 memcpy(new_xv, &def_xv, sizeof(def_xv)); 6421 memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE);
6419 vb->vb_xv = new_xv; 6422 vb->vb_xv = new_xv;
6420 vb->vb_bh = value_bh; 6423 vb->vb_bh = value_bh;
6421 ocfs2_init_xattr_value_extent_tree(&data_et, 6424 ocfs2_init_xattr_value_extent_tree(&data_et,
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 36f1390b5ed7..62d49e53061c 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -610,11 +610,16 @@ void orangefs_kill_sb(struct super_block *sb)
610 610
611int orangefs_inode_cache_initialize(void) 611int orangefs_inode_cache_initialize(void)
612{ 612{
613 orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache", 613 orangefs_inode_cache = kmem_cache_create_usercopy(
614 sizeof(struct orangefs_inode_s), 614 "orangefs_inode_cache",
615 0, 615 sizeof(struct orangefs_inode_s),
616 ORANGEFS_CACHE_CREATE_FLAGS, 616 0,
617 orangefs_inode_cache_ctor); 617 ORANGEFS_CACHE_CREATE_FLAGS,
618 offsetof(struct orangefs_inode_s,
619 link_target),
620 sizeof_field(struct orangefs_inode_s,
621 link_target),
622 orangefs_inode_cache_ctor);
618 623
619 if (!orangefs_inode_cache) { 624 if (!orangefs_inode_cache) {
620 gossip_err("Cannot create orangefs_inode_cache\n"); 625 gossip_err("Cannot create orangefs_inode_cache\n");
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 339e4c1c044d..ec6d2983a5cb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -47,8 +47,11 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
47 if (hiwater_rss < mm->hiwater_rss) 47 if (hiwater_rss < mm->hiwater_rss)
48 hiwater_rss = mm->hiwater_rss; 48 hiwater_rss = mm->hiwater_rss;
49 49
50 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; 50 /* split executable areas between text and lib */
51 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; 51 text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK);
52 text = min(text, mm->exec_vm << PAGE_SHIFT);
53 lib = (mm->exec_vm << PAGE_SHIFT) - text;
54
52 swap = get_mm_counter(mm, MM_SWAPENTS); 55 swap = get_mm_counter(mm, MM_SWAPENTS);
53 seq_printf(m, 56 seq_printf(m,
54 "VmPeak:\t%8lu kB\n" 57 "VmPeak:\t%8lu kB\n"
@@ -76,7 +79,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
76 file << (PAGE_SHIFT-10), 79 file << (PAGE_SHIFT-10),
77 shmem << (PAGE_SHIFT-10), 80 shmem << (PAGE_SHIFT-10),
78 mm->data_vm << (PAGE_SHIFT-10), 81 mm->data_vm << (PAGE_SHIFT-10),
79 mm->stack_vm << (PAGE_SHIFT-10), text, lib, 82 mm->stack_vm << (PAGE_SHIFT-10),
83 text >> 10,
84 lib >> 10,
80 mm_pgtables_bytes(mm) >> 10, 85 mm_pgtables_bytes(mm) >> 10,
81 swap << (PAGE_SHIFT-10)); 86 swap << (PAGE_SHIFT-10));
82 hugetlb_report_usage(m, mm); 87 hugetlb_report_usage(m, mm);
@@ -977,14 +982,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
977static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, 982static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
978 unsigned long addr, pmd_t *pmdp) 983 unsigned long addr, pmd_t *pmdp)
979{ 984{
980 pmd_t pmd = *pmdp; 985 pmd_t old, pmd = *pmdp;
981 986
982 if (pmd_present(pmd)) { 987 if (pmd_present(pmd)) {
983 /* See comment in change_huge_pmd() */ 988 /* See comment in change_huge_pmd() */
984 pmdp_invalidate(vma, addr, pmdp); 989 old = pmdp_invalidate(vma, addr, pmdp);
985 if (pmd_dirty(*pmdp)) 990 if (pmd_dirty(old))
986 pmd = pmd_mkdirty(pmd); 991 pmd = pmd_mkdirty(pmd);
987 if (pmd_young(*pmdp)) 992 if (pmd_young(old))
988 pmd = pmd_mkyoung(pmd); 993 pmd = pmd_mkyoung(pmd);
989 994
990 pmd = pmd_wrprotect(pmd); 995 pmd = pmd_wrprotect(pmd);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 691032107f8c..c3129b131e4d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -41,7 +41,6 @@
41#include <linux/timer.h> 41#include <linux/timer.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/uaccess.h> 43#include <linux/uaccess.h>
44#include <linux/hardirq.h>
45#include <linux/jiffies.h> 44#include <linux/jiffies.h>
46#include <linux/workqueue.h> 45#include <linux/workqueue.h>
47 46
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index 7cd46666ba2c..86e71c0caf48 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -57,8 +57,7 @@ config REISERFS_FS_XATTR
57 depends on REISERFS_FS 57 depends on REISERFS_FS
58 help 58 help
59 Extended attributes are name:value pairs associated with inodes by 59 Extended attributes are name:value pairs associated with inodes by
60 the kernel or by users (see the attr(5) manual page, or visit 60 the kernel or by users (see the attr(5) manual page for details).
61 <http://acl.bestbits.at/> for details).
62 61
63 If unsure, say N. 62 If unsure, say N.
64 63
@@ -70,9 +69,6 @@ config REISERFS_FS_POSIX_ACL
70 Posix Access Control Lists (ACLs) support permissions for users and 69 Posix Access Control Lists (ACLs) support permissions for users and
71 groups beyond the owner/group/world scheme. 70 groups beyond the owner/group/world scheme.
72 71
73 To learn more about Access Control Lists, visit the Posix ACLs for
74 Linux website <http://acl.bestbits.at/>.
75
76 If you don't know what Access Control Lists are, say N 72 If you don't know what Access Control Lists are, say N
77 73
78config REISERFS_FS_SECURITY 74config REISERFS_FS_SECURITY
diff --git a/fs/super.c b/fs/super.c
index 06bd25d90ba5..672538ca9831 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -225,7 +225,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
225 if (s->s_user_ns != &init_user_ns) 225 if (s->s_user_ns != &init_user_ns)
226 s->s_iflags |= SB_I_NODEV; 226 s->s_iflags |= SB_I_NODEV;
227 INIT_HLIST_NODE(&s->s_instances); 227 INIT_HLIST_NODE(&s->s_instances);
228 INIT_HLIST_BL_HEAD(&s->s_anon); 228 INIT_HLIST_BL_HEAD(&s->s_roots);
229 mutex_init(&s->s_sync_lock); 229 mutex_init(&s->s_sync_lock);
230 INIT_LIST_HEAD(&s->s_inodes); 230 INIT_LIST_HEAD(&s->s_inodes);
231 spin_lock_init(&s->s_inode_list_lock); 231 spin_lock_init(&s->s_inode_list_lock);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2b67bda2021b..58eba92a0e41 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * fs/sysfs/dir.c - sysfs core and dir operation implementation 3 * fs/sysfs/dir.c - sysfs core and dir operation implementation
3 * 4 *
@@ -5,12 +6,10 @@
5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 7 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 * 8 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information. 9 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 10 */
12 11
13#undef DEBUG 12#define pr_fmt(fmt) "sysfs: " fmt
14 13
15#include <linux/fs.h> 14#include <linux/fs.h>
16#include <linux/kobject.h> 15#include <linux/kobject.h>
@@ -27,8 +26,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
27 if (buf) 26 if (buf)
28 kernfs_path(parent, buf, PATH_MAX); 27 kernfs_path(parent, buf, PATH_MAX);
29 28
30 WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n", 29 pr_warn("cannot create duplicate filename '%s/%s'\n", buf, name);
31 buf, name); 30 dump_stack();
32 31
33 kfree(buf); 32 kfree(buf);
34} 33}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 39c75a86c67f..5c13f29bfcdb 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * fs/sysfs/file.c - sysfs regular (text) file implementation 3 * fs/sysfs/file.c - sysfs regular (text) file implementation
3 * 4 *
@@ -5,14 +6,11 @@
5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 7 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 * 8 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information. 9 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 10 */
12 11
13#include <linux/module.h> 12#include <linux/module.h>
14#include <linux/kobject.h> 13#include <linux/kobject.h>
15#include <linux/kallsyms.h>
16#include <linux/slab.h> 14#include <linux/slab.h>
17#include <linux/list.h> 15#include <linux/list.h>
18#include <linux/mutex.h> 16#include <linux/mutex.h>
@@ -70,8 +68,8 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
70 * indicate truncated result or overflow in normal use cases. 68 * indicate truncated result or overflow in normal use cases.
71 */ 69 */
72 if (count >= (ssize_t)PAGE_SIZE) { 70 if (count >= (ssize_t)PAGE_SIZE) {
73 print_symbol("fill_read_buffer: %s returned bad count\n", 71 printk("fill_read_buffer: %pS returned bad count\n",
74 (unsigned long)ops->show); 72 ops->show);
75 /* Try to struggle along */ 73 /* Try to struggle along */
76 count = PAGE_SIZE - 1; 74 count = PAGE_SIZE - 1;
77 } 75 }
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index ac2de0ed69ad..4802ec0e1e3a 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * fs/sysfs/group.c - Operations for adding/removing multiple files at once. 3 * fs/sysfs/group.c - Operations for adding/removing multiple files at once.
3 * 4 *
@@ -5,9 +6,6 @@
5 * Copyright (c) 2003 Open Source Development Lab 6 * Copyright (c) 2003 Open Source Development Lab
6 * Copyright (c) 2013 Greg Kroah-Hartman 7 * Copyright (c) 2013 Greg Kroah-Hartman
7 * Copyright (c) 2013 The Linux Foundation 8 * Copyright (c) 2013 The Linux Foundation
8 *
9 * This file is released undert the GPL v2.
10 *
11 */ 9 */
12 10
13#include <linux/kobject.h> 11#include <linux/kobject.h>
@@ -406,6 +404,6 @@ int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
406 404
407 kernfs_put(entry); 405 kernfs_put(entry);
408 kernfs_put(target); 406 kernfs_put(target);
409 return IS_ERR(link) ? PTR_ERR(link) : 0; 407 return PTR_ERR_OR_ZERO(link);
410} 408}
411EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); 409EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index fb49510c5dcf..b428d317ae92 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * fs/sysfs/symlink.c - operations for initializing and mounting sysfs 3 * fs/sysfs/symlink.c - operations for initializing and mounting sysfs
3 * 4 *
@@ -5,13 +6,9 @@
5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 7 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 * 8 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information. 9 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 10 */
12 11
13#define DEBUG
14
15#include <linux/fs.h> 12#include <linux/fs.h>
16#include <linux/magic.h> 13#include <linux/magic.h>
17#include <linux/mount.h> 14#include <linux/mount.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index aecb15f84557..8664db25a9a6 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * fs/sysfs/symlink.c - sysfs symlink implementation 3 * fs/sysfs/symlink.c - sysfs symlink implementation
3 * 4 *
@@ -5,8 +6,6 @@
5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 7 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 * 8 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information. 9 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 10 */
12 11
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0e2f1cccb812..d098e015fcc9 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,11 +1,10 @@
1/* SPDX-License-Identifier: GPL-2.0 */
1/* 2/*
2 * fs/sysfs/sysfs.h - sysfs internal header file 3 * fs/sysfs/sysfs.h - sysfs internal header file
3 * 4 *
4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 7 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 */ 8 */
10 9
11#ifndef __SYSFS_INTERNAL_H 10#ifndef __SYSFS_INTERNAL_H
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index a2ea4856e67b..9d7fb88e172e 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1138,38 +1138,24 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
1138 struct ubifs_info *c = dir->i_sb->s_fs_info; 1138 struct ubifs_info *c = dir->i_sb->s_fs_info;
1139 int err, len = strlen(symname); 1139 int err, len = strlen(symname);
1140 int sz_change = CALC_DENT_SIZE(len); 1140 int sz_change = CALC_DENT_SIZE(len);
1141 struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); 1141 struct fscrypt_str disk_link;
1142 struct fscrypt_symlink_data *sd = NULL;
1143 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 1142 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
1144 .new_ino_d = ALIGN(len, 8), 1143 .new_ino_d = ALIGN(len, 8),
1145 .dirtied_ino = 1 }; 1144 .dirtied_ino = 1 };
1146 struct fscrypt_name nm; 1145 struct fscrypt_name nm;
1147 1146
1148 if (ubifs_crypt_is_encrypted(dir)) { 1147 dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
1149 err = fscrypt_get_encryption_info(dir); 1148 symname, dir->i_ino);
1150 if (err)
1151 goto out_budg;
1152
1153 if (!fscrypt_has_encryption_key(dir)) {
1154 err = -EPERM;
1155 goto out_budg;
1156 }
1157 1149
1158 disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + 1150 err = fscrypt_prepare_symlink(dir, symname, len, UBIFS_MAX_INO_DATA,
1159 sizeof(struct fscrypt_symlink_data)); 1151 &disk_link);
1160 } 1152 if (err)
1153 return err;
1161 1154
1162 /* 1155 /*
1163 * Budget request settings: new inode, new direntry and changing parent 1156 * Budget request settings: new inode, new direntry and changing parent
1164 * directory inode. 1157 * directory inode.
1165 */ 1158 */
1166
1167 dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
1168 symname, dir->i_ino);
1169
1170 if (disk_link.len > UBIFS_MAX_INO_DATA)
1171 return -ENAMETOOLONG;
1172
1173 err = ubifs_budget_space(c, &req); 1159 err = ubifs_budget_space(c, &req);
1174 if (err) 1160 if (err)
1175 return err; 1161 return err;
@@ -1191,38 +1177,20 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
1191 goto out_inode; 1177 goto out_inode;
1192 } 1178 }
1193 1179
1194 if (ubifs_crypt_is_encrypted(dir)) { 1180 if (IS_ENCRYPTED(inode)) {
1195 struct qstr istr = QSTR_INIT(symname, len); 1181 disk_link.name = ui->data; /* encrypt directly into ui->data */
1196 struct fscrypt_str ostr; 1182 err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
1197 1183 if (err)
1198 sd = kzalloc(disk_link.len, GFP_NOFS);
1199 if (!sd) {
1200 err = -ENOMEM;
1201 goto out_inode;
1202 }
1203
1204 ostr.name = sd->encrypted_path;
1205 ostr.len = disk_link.len;
1206
1207 err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
1208 if (err) {
1209 kfree(sd);
1210 goto out_inode; 1184 goto out_inode;
1211 }
1212
1213 sd->len = cpu_to_le16(ostr.len);
1214 disk_link.name = (char *)sd;
1215 } else { 1185 } else {
1186 memcpy(ui->data, disk_link.name, disk_link.len);
1216 inode->i_link = ui->data; 1187 inode->i_link = ui->data;
1217 } 1188 }
1218 1189
1219 memcpy(ui->data, disk_link.name, disk_link.len);
1220 ((char *)ui->data)[disk_link.len - 1] = '\0';
1221
1222 /* 1190 /*
1223 * The terminating zero byte is not written to the flash media and it 1191 * The terminating zero byte is not written to the flash media and it
1224 * is put just to make later in-memory string processing simpler. Thus, 1192 * is put just to make later in-memory string processing simpler. Thus,
1225 * data length is @len, not @len + %1. 1193 * data length is @disk_link.len - 1, not @disk_link.len.
1226 */ 1194 */
1227 ui->data_len = disk_link.len - 1; 1195 ui->data_len = disk_link.len - 1;
1228 inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1; 1196 inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1;
@@ -1240,11 +1208,10 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
1240 goto out_cancel; 1208 goto out_cancel;
1241 mutex_unlock(&dir_ui->ui_mutex); 1209 mutex_unlock(&dir_ui->ui_mutex);
1242 1210
1243 ubifs_release_budget(c, &req);
1244 insert_inode_hash(inode); 1211 insert_inode_hash(inode);
1245 d_instantiate(dentry, inode); 1212 d_instantiate(dentry, inode);
1246 fscrypt_free_filename(&nm); 1213 err = 0;
1247 return 0; 1214 goto out_fname;
1248 1215
1249out_cancel: 1216out_cancel:
1250 dir->i_size -= sz_change; 1217 dir->i_size -= sz_change;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 9fe194a4fa9b..cf348ba99238 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1629,49 +1629,17 @@ static const char *ubifs_get_link(struct dentry *dentry,
1629 struct inode *inode, 1629 struct inode *inode,
1630 struct delayed_call *done) 1630 struct delayed_call *done)
1631{ 1631{
1632 int err;
1633 struct fscrypt_symlink_data *sd;
1634 struct ubifs_inode *ui = ubifs_inode(inode); 1632 struct ubifs_inode *ui = ubifs_inode(inode);
1635 struct fscrypt_str cstr;
1636 struct fscrypt_str pstr;
1637 1633
1638 if (!ubifs_crypt_is_encrypted(inode)) 1634 if (!IS_ENCRYPTED(inode))
1639 return ui->data; 1635 return ui->data;
1640 1636
1641 if (!dentry) 1637 if (!dentry)
1642 return ERR_PTR(-ECHILD); 1638 return ERR_PTR(-ECHILD);
1643 1639
1644 err = fscrypt_get_encryption_info(inode); 1640 return fscrypt_get_symlink(inode, ui->data, ui->data_len, done);
1645 if (err)
1646 return ERR_PTR(err);
1647
1648 sd = (struct fscrypt_symlink_data *)ui->data;
1649 cstr.name = sd->encrypted_path;
1650 cstr.len = le16_to_cpu(sd->len);
1651
1652 if (cstr.len == 0)
1653 return ERR_PTR(-ENOENT);
1654
1655 if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > ui->data_len)
1656 return ERR_PTR(-EIO);
1657
1658 err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
1659 if (err)
1660 return ERR_PTR(err);
1661
1662 err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
1663 if (err) {
1664 fscrypt_fname_free_buffer(&pstr);
1665 return ERR_PTR(err);
1666 }
1667
1668 pstr.name[pstr.len] = '\0';
1669
1670 set_delayed_call(done, kfree_link, pstr.name);
1671 return pstr.name;
1672} 1641}
1673 1642
1674
1675const struct address_space_operations ubifs_file_address_operations = { 1643const struct address_space_operations ubifs_file_address_operations = {
1676 .readpage = ubifs_readpage, 1644 .readpage = ubifs_readpage,
1677 .writepage = ubifs_writepage, 1645 .writepage = ubifs_writepage,
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 0beb285b143d..b16ef162344a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -379,9 +379,7 @@ out:
379 } 379 }
380done: 380done:
381 clear_inode(inode); 381 clear_inode(inode);
382#ifdef CONFIG_UBIFS_FS_ENCRYPTION 382 fscrypt_put_encryption_info(inode);
383 fscrypt_put_encryption_info(inode, NULL);
384#endif
385} 383}
386 384
387static void ubifs_dirty_inode(struct inode *inode, int flags) 385static void ubifs_dirty_inode(struct inode *inode, int flags)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b6ba80e05bff..8254b8b3690f 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1467,11 +1467,14 @@ static void init_once(void *foo)
1467 1467
1468static int __init init_inodecache(void) 1468static int __init init_inodecache(void)
1469{ 1469{
1470 ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", 1470 ufs_inode_cachep = kmem_cache_create_usercopy("ufs_inode_cache",
1471 sizeof(struct ufs_inode_info), 1471 sizeof(struct ufs_inode_info), 0,
1472 0, (SLAB_RECLAIM_ACCOUNT| 1472 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1473 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1473 SLAB_ACCOUNT),
1474 init_once); 1474 offsetof(struct ufs_inode_info, i_u1.i_symlink),
1475 sizeof_field(struct ufs_inode_info,
1476 i_u1.i_symlink),
1477 init_once);
1475 if (ufs_inode_cachep == NULL) 1478 if (ufs_inode_cachep == NULL)
1476 return -ENOMEM; 1479 return -ENOMEM;
1477 return 0; 1480 return 0;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 743eaa646898..87a13a7c8270 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -294,10 +294,13 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
294 * pmd_trans_unstable) of the pmd. 294 * pmd_trans_unstable) of the pmd.
295 */ 295 */
296 _pmd = READ_ONCE(*pmd); 296 _pmd = READ_ONCE(*pmd);
297 if (!pmd_present(_pmd)) 297 if (pmd_none(_pmd))
298 goto out; 298 goto out;
299 299
300 ret = false; 300 ret = false;
301 if (!pmd_present(_pmd))
302 goto out;
303
301 if (pmd_trans_huge(_pmd)) 304 if (pmd_trans_huge(_pmd))
302 goto out; 305 goto out;
303 306
@@ -985,24 +988,14 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *ctx,
985 struct uffd_msg *msg) 988 struct uffd_msg *msg)
986{ 989{
987 int fd; 990 int fd;
988 struct file *file;
989 unsigned int flags = new->flags & UFFD_SHARED_FCNTL_FLAGS;
990 991
991 fd = get_unused_fd_flags(flags); 992 fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, new,
993 O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS));
992 if (fd < 0) 994 if (fd < 0)
993 return fd; 995 return fd;
994 996
995 file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, new,
996 O_RDWR | flags);
997 if (IS_ERR(file)) {
998 put_unused_fd(fd);
999 return PTR_ERR(file);
1000 }
1001
1002 fd_install(fd, file);
1003 msg->arg.reserved.reserved1 = 0; 997 msg->arg.reserved.reserved1 = 0;
1004 msg->arg.fork.ufd = fd; 998 msg->arg.fork.ufd = fd;
1005
1006 return 0; 999 return 0;
1007} 1000}
1008 1001
@@ -1884,24 +1877,10 @@ static void init_once_userfaultfd_ctx(void *mem)
1884 seqcount_init(&ctx->refile_seq); 1877 seqcount_init(&ctx->refile_seq);
1885} 1878}
1886 1879
1887/** 1880SYSCALL_DEFINE1(userfaultfd, int, flags)
1888 * userfaultfd_file_create - Creates a userfaultfd file pointer.
1889 * @flags: Flags for the userfaultfd file.
1890 *
1891 * This function creates a userfaultfd file pointer, w/out installing
1892 * it into the fd table. This is useful when the userfaultfd file is
1893 * used during the initialization of data structures that require
1894 * extra setup after the userfaultfd creation. So the userfaultfd
1895 * creation is split into the file pointer creation phase, and the
1896 * file descriptor installation phase. In this way races with
1897 * userspace closing the newly installed file descriptor can be
1898 * avoided. Returns a userfaultfd file pointer, or a proper error
1899 * pointer.
1900 */
1901static struct file *userfaultfd_file_create(int flags)
1902{ 1881{
1903 struct file *file;
1904 struct userfaultfd_ctx *ctx; 1882 struct userfaultfd_ctx *ctx;
1883 int fd;
1905 1884
1906 BUG_ON(!current->mm); 1885 BUG_ON(!current->mm);
1907 1886
@@ -1909,14 +1888,12 @@ static struct file *userfaultfd_file_create(int flags)
1909 BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC); 1888 BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC);
1910 BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK); 1889 BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK);
1911 1890
1912 file = ERR_PTR(-EINVAL);
1913 if (flags & ~UFFD_SHARED_FCNTL_FLAGS) 1891 if (flags & ~UFFD_SHARED_FCNTL_FLAGS)
1914 goto out; 1892 return -EINVAL;
1915 1893
1916 file = ERR_PTR(-ENOMEM);
1917 ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); 1894 ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL);
1918 if (!ctx) 1895 if (!ctx)
1919 goto out; 1896 return -ENOMEM;
1920 1897
1921 atomic_set(&ctx->refcount, 1); 1898 atomic_set(&ctx->refcount, 1);
1922 ctx->flags = flags; 1899 ctx->flags = flags;
@@ -1927,39 +1904,13 @@ static struct file *userfaultfd_file_create(int flags)
1927 /* prevent the mm struct to be freed */ 1904 /* prevent the mm struct to be freed */
1928 mmgrab(ctx->mm); 1905 mmgrab(ctx->mm);
1929 1906
1930 file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, 1907 fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
1931 O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); 1908 O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
1932 if (IS_ERR(file)) { 1909 if (fd < 0) {
1933 mmdrop(ctx->mm); 1910 mmdrop(ctx->mm);
1934 kmem_cache_free(userfaultfd_ctx_cachep, ctx); 1911 kmem_cache_free(userfaultfd_ctx_cachep, ctx);
1935 } 1912 }
1936out:
1937 return file;
1938}
1939
1940SYSCALL_DEFINE1(userfaultfd, int, flags)
1941{
1942 int fd, error;
1943 struct file *file;
1944
1945 error = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS);
1946 if (error < 0)
1947 return error;
1948 fd = error;
1949
1950 file = userfaultfd_file_create(flags);
1951 if (IS_ERR(file)) {
1952 error = PTR_ERR(file);
1953 goto err_put_unused_fd;
1954 }
1955 fd_install(fd, file);
1956
1957 return fd; 1913 return fd;
1958
1959err_put_unused_fd:
1960 put_unused_fd(fd);
1961
1962 return error;
1963} 1914}
1964 1915
1965static int __init userfaultfd_init(void) 1916static int __init userfaultfd_init(void)
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index f42fcf1b5465..46bcf0e649f5 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -48,9 +48,6 @@ config XFS_POSIX_ACL
48 POSIX Access Control Lists (ACLs) support permissions for users and 48 POSIX Access Control Lists (ACLs) support permissions for users and
49 groups beyond the owner/group/world scheme. 49 groups beyond the owner/group/world scheme.
50 50
51 To learn more about Access Control Lists, visit the POSIX ACLs for
52 Linux website <http://acl.bestbits.at/>.
53
54 If you don't know what Access Control Lists are, say N. 51 If you don't know what Access Control Lists are, say N.
55 52
56config XFS_RT 53config XFS_RT
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 83ed7715f856..c02781a4c091 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -167,7 +167,7 @@ xfs_alloc_lookup_ge(
167 * Lookup the first record less than or equal to [bno, len] 167 * Lookup the first record less than or equal to [bno, len]
168 * in the btree given by cur. 168 * in the btree given by cur.
169 */ 169 */
170static int /* error */ 170int /* error */
171xfs_alloc_lookup_le( 171xfs_alloc_lookup_le(
172 struct xfs_btree_cur *cur, /* btree cursor */ 172 struct xfs_btree_cur *cur, /* btree cursor */
173 xfs_agblock_t bno, /* starting block of extent */ 173 xfs_agblock_t bno, /* starting block of extent */
@@ -520,7 +520,7 @@ xfs_alloc_fixup_trees(
520 return 0; 520 return 0;
521} 521}
522 522
523static bool 523static xfs_failaddr_t
524xfs_agfl_verify( 524xfs_agfl_verify(
525 struct xfs_buf *bp) 525 struct xfs_buf *bp)
526{ 526{
@@ -528,10 +528,19 @@ xfs_agfl_verify(
528 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 528 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
529 int i; 529 int i;
530 530
531 /*
532 * There is no verification of non-crc AGFLs because mkfs does not
533 * initialise the AGFL to zero or NULL. Hence the only valid part of the
534 * AGFL is what the AGF says is active. We can't get to the AGF, so we
535 * can't verify just those entries are valid.
536 */
537 if (!xfs_sb_version_hascrc(&mp->m_sb))
538 return NULL;
539
531 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 540 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
532 return false; 541 return __this_address;
533 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 542 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
534 return false; 543 return __this_address;
535 /* 544 /*
536 * during growfs operations, the perag is not fully initialised, 545 * during growfs operations, the perag is not fully initialised,
537 * so we can't use it for any useful checking. growfs ensures we can't 546 * so we can't use it for any useful checking. growfs ensures we can't
@@ -539,16 +548,17 @@ xfs_agfl_verify(
539 * so we can detect and avoid this problem. 548 * so we can detect and avoid this problem.
540 */ 549 */
541 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 550 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
542 return false; 551 return __this_address;
543 552
544 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 553 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
545 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 554 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
546 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 555 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
547 return false; 556 return __this_address;
548 } 557 }
549 558
550 return xfs_log_check_lsn(mp, 559 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
551 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); 560 return __this_address;
561 return NULL;
552} 562}
553 563
554static void 564static void
@@ -556,6 +566,7 @@ xfs_agfl_read_verify(
556 struct xfs_buf *bp) 566 struct xfs_buf *bp)
557{ 567{
558 struct xfs_mount *mp = bp->b_target->bt_mount; 568 struct xfs_mount *mp = bp->b_target->bt_mount;
569 xfs_failaddr_t fa;
559 570
560 /* 571 /*
561 * There is no verification of non-crc AGFLs because mkfs does not 572 * There is no verification of non-crc AGFLs because mkfs does not
@@ -567,28 +578,29 @@ xfs_agfl_read_verify(
567 return; 578 return;
568 579
569 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 580 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
570 xfs_buf_ioerror(bp, -EFSBADCRC); 581 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
571 else if (!xfs_agfl_verify(bp)) 582 else {
572 xfs_buf_ioerror(bp, -EFSCORRUPTED); 583 fa = xfs_agfl_verify(bp);
573 584 if (fa)
574 if (bp->b_error) 585 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
575 xfs_verifier_error(bp); 586 }
576} 587}
577 588
578static void 589static void
579xfs_agfl_write_verify( 590xfs_agfl_write_verify(
580 struct xfs_buf *bp) 591 struct xfs_buf *bp)
581{ 592{
582 struct xfs_mount *mp = bp->b_target->bt_mount; 593 struct xfs_mount *mp = bp->b_target->bt_mount;
583 struct xfs_buf_log_item *bip = bp->b_fspriv; 594 struct xfs_buf_log_item *bip = bp->b_log_item;
595 xfs_failaddr_t fa;
584 596
585 /* no verification of non-crc AGFLs */ 597 /* no verification of non-crc AGFLs */
586 if (!xfs_sb_version_hascrc(&mp->m_sb)) 598 if (!xfs_sb_version_hascrc(&mp->m_sb))
587 return; 599 return;
588 600
589 if (!xfs_agfl_verify(bp)) { 601 fa = xfs_agfl_verify(bp);
590 xfs_buf_ioerror(bp, -EFSCORRUPTED); 602 if (fa) {
591 xfs_verifier_error(bp); 603 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
592 return; 604 return;
593 } 605 }
594 606
@@ -602,6 +614,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
602 .name = "xfs_agfl", 614 .name = "xfs_agfl",
603 .verify_read = xfs_agfl_read_verify, 615 .verify_read = xfs_agfl_read_verify,
604 .verify_write = xfs_agfl_write_verify, 616 .verify_write = xfs_agfl_write_verify,
617 .verify_struct = xfs_agfl_verify,
605}; 618};
606 619
607/* 620/*
@@ -2397,19 +2410,19 @@ xfs_alloc_put_freelist(
2397 return 0; 2410 return 0;
2398} 2411}
2399 2412
2400static bool 2413static xfs_failaddr_t
2401xfs_agf_verify( 2414xfs_agf_verify(
2402 struct xfs_mount *mp, 2415 struct xfs_buf *bp)
2403 struct xfs_buf *bp) 2416{
2404 { 2417 struct xfs_mount *mp = bp->b_target->bt_mount;
2405 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2418 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
2406 2419
2407 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2420 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2408 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2421 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2409 return false; 2422 return __this_address;
2410 if (!xfs_log_check_lsn(mp, 2423 if (!xfs_log_check_lsn(mp,
2411 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2424 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2412 return false; 2425 return __this_address;
2413 } 2426 }
2414 2427
2415 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2428 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
@@ -2418,18 +2431,18 @@ xfs_agf_verify(
2418 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2431 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2419 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2432 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2420 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2433 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2421 return false; 2434 return __this_address;
2422 2435
2423 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2436 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2424 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2437 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2425 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2438 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2426 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2439 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2427 return false; 2440 return __this_address;
2428 2441
2429 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2442 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2430 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2443 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2431 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2444 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2432 return false; 2445 return __this_address;
2433 2446
2434 /* 2447 /*
2435 * during growfs operations, the perag is not fully initialised, 2448 * during growfs operations, the perag is not fully initialised,
@@ -2438,18 +2451,18 @@ xfs_agf_verify(
2438 * so we can detect and avoid this problem. 2451 * so we can detect and avoid this problem.
2439 */ 2452 */
2440 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2453 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2441 return false; 2454 return __this_address;
2442 2455
2443 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2456 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2444 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2457 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2445 return false; 2458 return __this_address;
2446 2459
2447 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2460 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2448 (be32_to_cpu(agf->agf_refcount_level) < 1 || 2461 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2449 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2462 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2450 return false; 2463 return __this_address;
2451 2464
2452 return true;; 2465 return NULL;
2453 2466
2454} 2467}
2455 2468
@@ -2458,28 +2471,29 @@ xfs_agf_read_verify(
2458 struct xfs_buf *bp) 2471 struct xfs_buf *bp)
2459{ 2472{
2460 struct xfs_mount *mp = bp->b_target->bt_mount; 2473 struct xfs_mount *mp = bp->b_target->bt_mount;
2474 xfs_failaddr_t fa;
2461 2475
2462 if (xfs_sb_version_hascrc(&mp->m_sb) && 2476 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2463 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2477 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2464 xfs_buf_ioerror(bp, -EFSBADCRC); 2478 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2465 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, 2479 else {
2466 XFS_ERRTAG_ALLOC_READ_AGF)) 2480 fa = xfs_agf_verify(bp);
2467 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2481 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2468 2482 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2469 if (bp->b_error) 2483 }
2470 xfs_verifier_error(bp);
2471} 2484}
2472 2485
2473static void 2486static void
2474xfs_agf_write_verify( 2487xfs_agf_write_verify(
2475 struct xfs_buf *bp) 2488 struct xfs_buf *bp)
2476{ 2489{
2477 struct xfs_mount *mp = bp->b_target->bt_mount; 2490 struct xfs_mount *mp = bp->b_target->bt_mount;
2478 struct xfs_buf_log_item *bip = bp->b_fspriv; 2491 struct xfs_buf_log_item *bip = bp->b_log_item;
2492 xfs_failaddr_t fa;
2479 2493
2480 if (!xfs_agf_verify(mp, bp)) { 2494 fa = xfs_agf_verify(bp);
2481 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2495 if (fa) {
2482 xfs_verifier_error(bp); 2496 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2483 return; 2497 return;
2484 } 2498 }
2485 2499
@@ -2496,6 +2510,7 @@ const struct xfs_buf_ops xfs_agf_buf_ops = {
2496 .name = "xfs_agf", 2510 .name = "xfs_agf",
2497 .verify_read = xfs_agf_read_verify, 2511 .verify_read = xfs_agf_read_verify,
2498 .verify_write = xfs_agf_write_verify, 2512 .verify_write = xfs_agf_write_verify,
2513 .verify_struct = xfs_agf_verify,
2499}; 2514};
2500 2515
2501/* 2516/*
@@ -2981,3 +2996,22 @@ xfs_verify_fsbno(
2981 return false; 2996 return false;
2982 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); 2997 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
2983} 2998}
2999
3000/* Is there a record covering a given extent? */
3001int
3002xfs_alloc_has_record(
3003 struct xfs_btree_cur *cur,
3004 xfs_agblock_t bno,
3005 xfs_extlen_t len,
3006 bool *exists)
3007{
3008 union xfs_btree_irec low;
3009 union xfs_btree_irec high;
3010
3011 memset(&low, 0, sizeof(low));
3012 low.a.ar_startblock = bno;
3013 memset(&high, 0xFF, sizeof(high));
3014 high.a.ar_startblock = bno + len - 1;
3015
3016 return xfs_btree_has_record(cur, &low, &high, exists);
3017}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 7ba2d129d504..65a0cafe06e4 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -198,6 +198,13 @@ xfs_free_extent(
198 enum xfs_ag_resv_type type); /* block reservation type */ 198 enum xfs_ag_resv_type type); /* block reservation type */
199 199
200int /* error */ 200int /* error */
201xfs_alloc_lookup_le(
202 struct xfs_btree_cur *cur, /* btree cursor */
203 xfs_agblock_t bno, /* starting block of extent */
204 xfs_extlen_t len, /* length of extent */
205 int *stat); /* success/failure */
206
207int /* error */
201xfs_alloc_lookup_ge( 208xfs_alloc_lookup_ge(
202 struct xfs_btree_cur *cur, /* btree cursor */ 209 struct xfs_btree_cur *cur, /* btree cursor */
203 xfs_agblock_t bno, /* starting block of extent */ 210 xfs_agblock_t bno, /* starting block of extent */
@@ -237,4 +244,7 @@ bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno,
237 xfs_agblock_t agbno); 244 xfs_agblock_t agbno);
238bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); 245bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
239 246
247int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
248 xfs_extlen_t len, bool *exist);
249
240#endif /* __XFS_ALLOC_H__ */ 250#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index cfde0a0f9706..6840b588187e 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -307,13 +307,14 @@ xfs_cntbt_diff_two_keys(
307 be32_to_cpu(k2->alloc.ar_startblock); 307 be32_to_cpu(k2->alloc.ar_startblock);
308} 308}
309 309
310static bool 310static xfs_failaddr_t
311xfs_allocbt_verify( 311xfs_allocbt_verify(
312 struct xfs_buf *bp) 312 struct xfs_buf *bp)
313{ 313{
314 struct xfs_mount *mp = bp->b_target->bt_mount; 314 struct xfs_mount *mp = bp->b_target->bt_mount;
315 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 315 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
316 struct xfs_perag *pag = bp->b_pag; 316 struct xfs_perag *pag = bp->b_pag;
317 xfs_failaddr_t fa;
317 unsigned int level; 318 unsigned int level;
318 319
319 /* 320 /*
@@ -331,29 +332,31 @@ xfs_allocbt_verify(
331 level = be16_to_cpu(block->bb_level); 332 level = be16_to_cpu(block->bb_level);
332 switch (block->bb_magic) { 333 switch (block->bb_magic) {
333 case cpu_to_be32(XFS_ABTB_CRC_MAGIC): 334 case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
334 if (!xfs_btree_sblock_v5hdr_verify(bp)) 335 fa = xfs_btree_sblock_v5hdr_verify(bp);
335 return false; 336 if (fa)
337 return fa;
336 /* fall through */ 338 /* fall through */
337 case cpu_to_be32(XFS_ABTB_MAGIC): 339 case cpu_to_be32(XFS_ABTB_MAGIC):
338 if (pag && pag->pagf_init) { 340 if (pag && pag->pagf_init) {
339 if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) 341 if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
340 return false; 342 return __this_address;
341 } else if (level >= mp->m_ag_maxlevels) 343 } else if (level >= mp->m_ag_maxlevels)
342 return false; 344 return __this_address;
343 break; 345 break;
344 case cpu_to_be32(XFS_ABTC_CRC_MAGIC): 346 case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
345 if (!xfs_btree_sblock_v5hdr_verify(bp)) 347 fa = xfs_btree_sblock_v5hdr_verify(bp);
346 return false; 348 if (fa)
349 return fa;
347 /* fall through */ 350 /* fall through */
348 case cpu_to_be32(XFS_ABTC_MAGIC): 351 case cpu_to_be32(XFS_ABTC_MAGIC):
349 if (pag && pag->pagf_init) { 352 if (pag && pag->pagf_init) {
350 if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) 353 if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
351 return false; 354 return __this_address;
352 } else if (level >= mp->m_ag_maxlevels) 355 } else if (level >= mp->m_ag_maxlevels)
353 return false; 356 return __this_address;
354 break; 357 break;
355 default: 358 default:
356 return false; 359 return __this_address;
357 } 360 }
358 361
359 return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); 362 return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
@@ -363,25 +366,30 @@ static void
363xfs_allocbt_read_verify( 366xfs_allocbt_read_verify(
364 struct xfs_buf *bp) 367 struct xfs_buf *bp)
365{ 368{
369 xfs_failaddr_t fa;
370
366 if (!xfs_btree_sblock_verify_crc(bp)) 371 if (!xfs_btree_sblock_verify_crc(bp))
367 xfs_buf_ioerror(bp, -EFSBADCRC); 372 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
368 else if (!xfs_allocbt_verify(bp)) 373 else {
369 xfs_buf_ioerror(bp, -EFSCORRUPTED); 374 fa = xfs_allocbt_verify(bp);
375 if (fa)
376 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
377 }
370 378
371 if (bp->b_error) { 379 if (bp->b_error)
372 trace_xfs_btree_corrupt(bp, _RET_IP_); 380 trace_xfs_btree_corrupt(bp, _RET_IP_);
373 xfs_verifier_error(bp);
374 }
375} 381}
376 382
377static void 383static void
378xfs_allocbt_write_verify( 384xfs_allocbt_write_verify(
379 struct xfs_buf *bp) 385 struct xfs_buf *bp)
380{ 386{
381 if (!xfs_allocbt_verify(bp)) { 387 xfs_failaddr_t fa;
388
389 fa = xfs_allocbt_verify(bp);
390 if (fa) {
382 trace_xfs_btree_corrupt(bp, _RET_IP_); 391 trace_xfs_btree_corrupt(bp, _RET_IP_);
383 xfs_buf_ioerror(bp, -EFSCORRUPTED); 392 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
384 xfs_verifier_error(bp);
385 return; 393 return;
386 } 394 }
387 xfs_btree_sblock_calc_crc(bp); 395 xfs_btree_sblock_calc_crc(bp);
@@ -392,6 +400,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
392 .name = "xfs_allocbt", 400 .name = "xfs_allocbt",
393 .verify_read = xfs_allocbt_read_verify, 401 .verify_read = xfs_allocbt_read_verify,
394 .verify_write = xfs_allocbt_write_verify, 402 .verify_write = xfs_allocbt_write_verify,
403 .verify_struct = xfs_allocbt_verify,
395}; 404};
396 405
397 406
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index a76914db72ef..ce4a34a2751d 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -717,7 +717,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
717 return error; 717 return error;
718out_defer_cancel: 718out_defer_cancel:
719 xfs_defer_cancel(args->dfops); 719 xfs_defer_cancel(args->dfops);
720 args->trans = NULL;
721 return error; 720 return error;
722} 721}
723 722
@@ -770,7 +769,6 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
770 return 0; 769 return 0;
771out_defer_cancel: 770out_defer_cancel:
772 xfs_defer_cancel(args->dfops); 771 xfs_defer_cancel(args->dfops);
773 args->trans = NULL;
774 return error; 772 return error;
775} 773}
776 774
@@ -1045,7 +1043,6 @@ out:
1045 return retval; 1043 return retval;
1046out_defer_cancel: 1044out_defer_cancel:
1047 xfs_defer_cancel(args->dfops); 1045 xfs_defer_cancel(args->dfops);
1048 args->trans = NULL;
1049 goto out; 1046 goto out;
1050} 1047}
1051 1048
@@ -1186,7 +1183,6 @@ out:
1186 return error; 1183 return error;
1187out_defer_cancel: 1184out_defer_cancel:
1188 xfs_defer_cancel(args->dfops); 1185 xfs_defer_cancel(args->dfops);
1189 args->trans = NULL;
1190 goto out; 1186 goto out;
1191} 1187}
1192 1188
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 601eaa36f1ad..2135b8e67dcc 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -247,14 +247,15 @@ xfs_attr3_leaf_hdr_to_disk(
247 } 247 }
248} 248}
249 249
250static bool 250static xfs_failaddr_t
251xfs_attr3_leaf_verify( 251xfs_attr3_leaf_verify(
252 struct xfs_buf *bp) 252 struct xfs_buf *bp)
253{ 253{
254 struct xfs_mount *mp = bp->b_target->bt_mount; 254 struct xfs_attr3_icleaf_hdr ichdr;
255 struct xfs_attr_leafblock *leaf = bp->b_addr; 255 struct xfs_mount *mp = bp->b_target->bt_mount;
256 struct xfs_perag *pag = bp->b_pag; 256 struct xfs_attr_leafblock *leaf = bp->b_addr;
257 struct xfs_attr3_icleaf_hdr ichdr; 257 struct xfs_perag *pag = bp->b_pag;
258 struct xfs_attr_leaf_entry *entries;
258 259
259 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); 260 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
260 261
@@ -262,17 +263,17 @@ xfs_attr3_leaf_verify(
262 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 263 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
263 264
264 if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) 265 if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
265 return false; 266 return __this_address;
266 267
267 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 268 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
268 return false; 269 return __this_address;
269 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 270 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
270 return false; 271 return __this_address;
271 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 272 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
272 return false; 273 return __this_address;
273 } else { 274 } else {
274 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) 275 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
275 return false; 276 return __this_address;
276 } 277 }
277 /* 278 /*
278 * In recovery there is a transient state where count == 0 is valid 279 * In recovery there is a transient state where count == 0 is valid
@@ -280,12 +281,27 @@ xfs_attr3_leaf_verify(
280 * if the attr didn't fit in shortform. 281 * if the attr didn't fit in shortform.
281 */ 282 */
282 if (pag && pag->pagf_init && ichdr.count == 0) 283 if (pag && pag->pagf_init && ichdr.count == 0)
283 return false; 284 return __this_address;
285
286 /*
287 * firstused is the block offset of the first name info structure.
288 * Make sure it doesn't go off the block or crash into the header.
289 */
290 if (ichdr.firstused > mp->m_attr_geo->blksize)
291 return __this_address;
292 if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf))
293 return __this_address;
294
295 /* Make sure the entries array doesn't crash into the name info. */
296 entries = xfs_attr3_leaf_entryp(bp->b_addr);
297 if ((char *)&entries[ichdr.count] >
298 (char *)bp->b_addr + ichdr.firstused)
299 return __this_address;
284 300
285 /* XXX: need to range check rest of attr header values */ 301 /* XXX: need to range check rest of attr header values */
286 /* XXX: hash order check? */ 302 /* XXX: hash order check? */
287 303
288 return true; 304 return NULL;
289} 305}
290 306
291static void 307static void
@@ -293,12 +309,13 @@ xfs_attr3_leaf_write_verify(
293 struct xfs_buf *bp) 309 struct xfs_buf *bp)
294{ 310{
295 struct xfs_mount *mp = bp->b_target->bt_mount; 311 struct xfs_mount *mp = bp->b_target->bt_mount;
296 struct xfs_buf_log_item *bip = bp->b_fspriv; 312 struct xfs_buf_log_item *bip = bp->b_log_item;
297 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 313 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
314 xfs_failaddr_t fa;
298 315
299 if (!xfs_attr3_leaf_verify(bp)) { 316 fa = xfs_attr3_leaf_verify(bp);
300 xfs_buf_ioerror(bp, -EFSCORRUPTED); 317 if (fa) {
301 xfs_verifier_error(bp); 318 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
302 return; 319 return;
303 } 320 }
304 321
@@ -322,21 +339,23 @@ xfs_attr3_leaf_read_verify(
322 struct xfs_buf *bp) 339 struct xfs_buf *bp)
323{ 340{
324 struct xfs_mount *mp = bp->b_target->bt_mount; 341 struct xfs_mount *mp = bp->b_target->bt_mount;
342 xfs_failaddr_t fa;
325 343
326 if (xfs_sb_version_hascrc(&mp->m_sb) && 344 if (xfs_sb_version_hascrc(&mp->m_sb) &&
327 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) 345 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
328 xfs_buf_ioerror(bp, -EFSBADCRC); 346 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
329 else if (!xfs_attr3_leaf_verify(bp)) 347 else {
330 xfs_buf_ioerror(bp, -EFSCORRUPTED); 348 fa = xfs_attr3_leaf_verify(bp);
331 349 if (fa)
332 if (bp->b_error) 350 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
333 xfs_verifier_error(bp); 351 }
334} 352}
335 353
336const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { 354const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
337 .name = "xfs_attr3_leaf", 355 .name = "xfs_attr3_leaf",
338 .verify_read = xfs_attr3_leaf_read_verify, 356 .verify_read = xfs_attr3_leaf_read_verify,
339 .verify_write = xfs_attr3_leaf_write_verify, 357 .verify_write = xfs_attr3_leaf_write_verify,
358 .verify_struct = xfs_attr3_leaf_verify,
340}; 359};
341 360
342int 361int
@@ -870,6 +889,80 @@ xfs_attr_shortform_allfit(
870 return xfs_attr_shortform_bytesfit(dp, bytes); 889 return xfs_attr_shortform_bytesfit(dp, bytes);
871} 890}
872 891
892/* Verify the consistency of an inline attribute fork. */
893xfs_failaddr_t
894xfs_attr_shortform_verify(
895 struct xfs_inode *ip)
896{
897 struct xfs_attr_shortform *sfp;
898 struct xfs_attr_sf_entry *sfep;
899 struct xfs_attr_sf_entry *next_sfep;
900 char *endp;
901 struct xfs_ifork *ifp;
902 int i;
903 int size;
904
905 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
906 ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
907 sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
908 size = ifp->if_bytes;
909
910 /*
911 * Give up if the attribute is way too short.
912 */
913 if (size < sizeof(struct xfs_attr_sf_hdr))
914 return __this_address;
915
916 endp = (char *)sfp + size;
917
918 /* Check all reported entries */
919 sfep = &sfp->list[0];
920 for (i = 0; i < sfp->hdr.count; i++) {
921 /*
922 * struct xfs_attr_sf_entry has a variable length.
923 * Check the fixed-offset parts of the structure are
924 * within the data buffer.
925 */
926 if (((char *)sfep + sizeof(*sfep)) >= endp)
927 return __this_address;
928
929 /* Don't allow names with known bad length. */
930 if (sfep->namelen == 0)
931 return __this_address;
932
933 /*
934 * Check that the variable-length part of the structure is
935 * within the data buffer. The next entry starts after the
936 * name component, so nextentry is an acceptable test.
937 */
938 next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep);
939 if ((char *)next_sfep > endp)
940 return __this_address;
941
942 /*
943 * Check for unknown flags. Short form doesn't support
944 * the incomplete or local bits, so we can use the namespace
945 * mask here.
946 */
947 if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK)
948 return __this_address;
949
950 /*
951 * Check for invalid namespace combinations. We only allow
952 * one namespace flag per xattr, so we can just count the
953 * bits (i.e. hweight) here.
954 */
955 if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
956 return __this_address;
957
958 sfep = next_sfep;
959 }
960 if ((void *)sfep != (void *)endp)
961 return __this_address;
962
963 return NULL;
964}
965
873/* 966/*
874 * Convert a leaf attribute list to shortform attribute list 967 * Convert a leaf attribute list to shortform attribute list
875 */ 968 */
@@ -2173,7 +2266,8 @@ xfs_attr3_leaf_lookup_int(
2173 leaf = bp->b_addr; 2266 leaf = bp->b_addr;
2174 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); 2267 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2175 entries = xfs_attr3_leaf_entryp(leaf); 2268 entries = xfs_attr3_leaf_entryp(leaf);
2176 ASSERT(ichdr.count < args->geo->blksize / 8); 2269 if (ichdr.count >= args->geo->blksize / 8)
2270 return -EFSCORRUPTED;
2177 2271
2178 /* 2272 /*
2179 * Binary search. (note: small blocks will skip this loop) 2273 * Binary search. (note: small blocks will skip this loop)
@@ -2189,8 +2283,10 @@ xfs_attr3_leaf_lookup_int(
2189 else 2283 else
2190 break; 2284 break;
2191 } 2285 }
2192 ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); 2286 if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count)))
2193 ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); 2287 return -EFSCORRUPTED;
2288 if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval))
2289 return -EFSCORRUPTED;
2194 2290
2195 /* 2291 /*
2196 * Since we may have duplicate hashval's, find the first matching 2292 * Since we may have duplicate hashval's, find the first matching
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 894124efb421..4da08af5b134 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -53,6 +53,7 @@ int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
53int xfs_attr_shortform_remove(struct xfs_da_args *args); 53int xfs_attr_shortform_remove(struct xfs_da_args *args);
54int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
55int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 55int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
56xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip);
56void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); 57void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
57 58
58/* 59/*
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d56caf037ca0..21be186067a2 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -65,7 +65,7 @@ xfs_attr3_rmt_blocks(
65 * does CRC, location and bounds checking, the unpacking function checks the 65 * does CRC, location and bounds checking, the unpacking function checks the
66 * attribute parameters and owner. 66 * attribute parameters and owner.
67 */ 67 */
68static bool 68static xfs_failaddr_t
69xfs_attr3_rmt_hdr_ok( 69xfs_attr3_rmt_hdr_ok(
70 void *ptr, 70 void *ptr,
71 xfs_ino_t ino, 71 xfs_ino_t ino,
@@ -76,19 +76,19 @@ xfs_attr3_rmt_hdr_ok(
76 struct xfs_attr3_rmt_hdr *rmt = ptr; 76 struct xfs_attr3_rmt_hdr *rmt = ptr;
77 77
78 if (bno != be64_to_cpu(rmt->rm_blkno)) 78 if (bno != be64_to_cpu(rmt->rm_blkno))
79 return false; 79 return __this_address;
80 if (offset != be32_to_cpu(rmt->rm_offset)) 80 if (offset != be32_to_cpu(rmt->rm_offset))
81 return false; 81 return __this_address;
82 if (size != be32_to_cpu(rmt->rm_bytes)) 82 if (size != be32_to_cpu(rmt->rm_bytes))
83 return false; 83 return __this_address;
84 if (ino != be64_to_cpu(rmt->rm_owner)) 84 if (ino != be64_to_cpu(rmt->rm_owner))
85 return false; 85 return __this_address;
86 86
87 /* ok */ 87 /* ok */
88 return true; 88 return NULL;
89} 89}
90 90
91static bool 91static xfs_failaddr_t
92xfs_attr3_rmt_verify( 92xfs_attr3_rmt_verify(
93 struct xfs_mount *mp, 93 struct xfs_mount *mp,
94 void *ptr, 94 void *ptr,
@@ -98,27 +98,29 @@ xfs_attr3_rmt_verify(
98 struct xfs_attr3_rmt_hdr *rmt = ptr; 98 struct xfs_attr3_rmt_hdr *rmt = ptr;
99 99
100 if (!xfs_sb_version_hascrc(&mp->m_sb)) 100 if (!xfs_sb_version_hascrc(&mp->m_sb))
101 return false; 101 return __this_address;
102 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) 102 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
103 return false; 103 return __this_address;
104 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 104 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
105 return false; 105 return __this_address;
106 if (be64_to_cpu(rmt->rm_blkno) != bno) 106 if (be64_to_cpu(rmt->rm_blkno) != bno)
107 return false; 107 return __this_address;
108 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 108 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
109 return false; 109 return __this_address;
110 if (be32_to_cpu(rmt->rm_offset) + 110 if (be32_to_cpu(rmt->rm_offset) +
111 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 111 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
112 return false; 112 return __this_address;
113 if (rmt->rm_owner == 0) 113 if (rmt->rm_owner == 0)
114 return false; 114 return __this_address;
115 115
116 return true; 116 return NULL;
117} 117}
118 118
119static void 119static int
120xfs_attr3_rmt_read_verify( 120__xfs_attr3_rmt_read_verify(
121 struct xfs_buf *bp) 121 struct xfs_buf *bp,
122 bool check_crc,
123 xfs_failaddr_t *failaddr)
122{ 124{
123 struct xfs_mount *mp = bp->b_target->bt_mount; 125 struct xfs_mount *mp = bp->b_target->bt_mount;
124 char *ptr; 126 char *ptr;
@@ -128,7 +130,7 @@ xfs_attr3_rmt_read_verify(
128 130
129 /* no verification of non-crc buffers */ 131 /* no verification of non-crc buffers */
130 if (!xfs_sb_version_hascrc(&mp->m_sb)) 132 if (!xfs_sb_version_hascrc(&mp->m_sb))
131 return; 133 return 0;
132 134
133 ptr = bp->b_addr; 135 ptr = bp->b_addr;
134 bno = bp->b_bn; 136 bno = bp->b_bn;
@@ -136,23 +138,48 @@ xfs_attr3_rmt_read_verify(
136 ASSERT(len >= blksize); 138 ASSERT(len >= blksize);
137 139
138 while (len > 0) { 140 while (len > 0) {
139 if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 141 if (check_crc &&
140 xfs_buf_ioerror(bp, -EFSBADCRC); 142 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
141 break; 143 *failaddr = __this_address;
142 } 144 return -EFSBADCRC;
143 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
144 xfs_buf_ioerror(bp, -EFSCORRUPTED);
145 break;
146 } 145 }
146 *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
147 if (*failaddr)
148 return -EFSCORRUPTED;
147 len -= blksize; 149 len -= blksize;
148 ptr += blksize; 150 ptr += blksize;
149 bno += BTOBB(blksize); 151 bno += BTOBB(blksize);
150 } 152 }
151 153
152 if (bp->b_error) 154 if (len != 0) {
153 xfs_verifier_error(bp); 155 *failaddr = __this_address;
154 else 156 return -EFSCORRUPTED;
155 ASSERT(len == 0); 157 }
158
159 return 0;
160}
161
162static void
163xfs_attr3_rmt_read_verify(
164 struct xfs_buf *bp)
165{
166 xfs_failaddr_t fa;
167 int error;
168
169 error = __xfs_attr3_rmt_read_verify(bp, true, &fa);
170 if (error)
171 xfs_verifier_error(bp, error, fa);
172}
173
174static xfs_failaddr_t
175xfs_attr3_rmt_verify_struct(
176 struct xfs_buf *bp)
177{
178 xfs_failaddr_t fa;
179 int error;
180
181 error = __xfs_attr3_rmt_read_verify(bp, false, &fa);
182 return error ? fa : NULL;
156} 183}
157 184
158static void 185static void
@@ -160,6 +187,7 @@ xfs_attr3_rmt_write_verify(
160 struct xfs_buf *bp) 187 struct xfs_buf *bp)
161{ 188{
162 struct xfs_mount *mp = bp->b_target->bt_mount; 189 struct xfs_mount *mp = bp->b_target->bt_mount;
190 xfs_failaddr_t fa;
163 int blksize = mp->m_attr_geo->blksize; 191 int blksize = mp->m_attr_geo->blksize;
164 char *ptr; 192 char *ptr;
165 int len; 193 int len;
@@ -177,9 +205,9 @@ xfs_attr3_rmt_write_verify(
177 while (len > 0) { 205 while (len > 0) {
178 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 206 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
179 207
180 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 208 fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
181 xfs_buf_ioerror(bp, -EFSCORRUPTED); 209 if (fa) {
182 xfs_verifier_error(bp); 210 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
183 return; 211 return;
184 } 212 }
185 213
@@ -188,8 +216,7 @@ xfs_attr3_rmt_write_verify(
188 * xfs_attr3_rmt_hdr_set() for the explanation. 216 * xfs_attr3_rmt_hdr_set() for the explanation.
189 */ 217 */
190 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 218 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
191 xfs_buf_ioerror(bp, -EFSCORRUPTED); 219 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
192 xfs_verifier_error(bp);
193 return; 220 return;
194 } 221 }
195 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 222 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
@@ -198,13 +225,16 @@ xfs_attr3_rmt_write_verify(
198 ptr += blksize; 225 ptr += blksize;
199 bno += BTOBB(blksize); 226 bno += BTOBB(blksize);
200 } 227 }
201 ASSERT(len == 0); 228
229 if (len != 0)
230 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
202} 231}
203 232
204const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 233const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
205 .name = "xfs_attr3_rmt", 234 .name = "xfs_attr3_rmt",
206 .verify_read = xfs_attr3_rmt_read_verify, 235 .verify_read = xfs_attr3_rmt_read_verify,
207 .verify_write = xfs_attr3_rmt_write_verify, 236 .verify_write = xfs_attr3_rmt_write_verify,
237 .verify_struct = xfs_attr3_rmt_verify_struct,
208}; 238};
209 239
210STATIC int 240STATIC int
@@ -269,7 +299,7 @@ xfs_attr_rmtval_copyout(
269 byte_cnt = min(*valuelen, byte_cnt); 299 byte_cnt = min(*valuelen, byte_cnt);
270 300
271 if (xfs_sb_version_hascrc(&mp->m_sb)) { 301 if (xfs_sb_version_hascrc(&mp->m_sb)) {
272 if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, 302 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
273 byte_cnt, bno)) { 303 byte_cnt, bno)) {
274 xfs_alert(mp, 304 xfs_alert(mp,
275"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 305"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1bddbba6b80c..daae00ed30c5 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -400,7 +400,7 @@ xfs_bmap_check_leaf_extents(
400 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 400 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
401 bno = be64_to_cpu(*pp); 401 bno = be64_to_cpu(*pp);
402 XFS_WANT_CORRUPTED_GOTO(mp, 402 XFS_WANT_CORRUPTED_GOTO(mp,
403 XFS_FSB_SANITY_CHECK(mp, bno), error0); 403 xfs_verify_fsbno(mp, bno), error0);
404 if (bp_release) { 404 if (bp_release) {
405 bp_release = 0; 405 bp_release = 0;
406 xfs_trans_brelse(NULL, bp); 406 xfs_trans_brelse(NULL, bp);
@@ -1220,7 +1220,7 @@ xfs_iread_extents(
1220 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1220 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1221 bno = be64_to_cpu(*pp); 1221 bno = be64_to_cpu(*pp);
1222 XFS_WANT_CORRUPTED_GOTO(mp, 1222 XFS_WANT_CORRUPTED_GOTO(mp,
1223 XFS_FSB_SANITY_CHECK(mp, bno), out_brelse); 1223 xfs_verify_fsbno(mp, bno), out_brelse);
1224 xfs_trans_brelse(tp, bp); 1224 xfs_trans_brelse(tp, bp);
1225 } 1225 }
1226 1226
@@ -3337,6 +3337,49 @@ xfs_bmap_btalloc_filestreams(
3337 return 0; 3337 return 0;
3338} 3338}
3339 3339
3340/* Update all inode and quota accounting for the allocation we just did. */
3341static void
3342xfs_bmap_btalloc_accounting(
3343 struct xfs_bmalloca *ap,
3344 struct xfs_alloc_arg *args)
3345{
3346 if (ap->flags & XFS_BMAPI_COWFORK) {
3347 /*
3348 * COW fork blocks are in-core only and thus are treated as
3349 * in-core quota reservation (like delalloc blocks) even when
3350 * converted to real blocks. The quota reservation is not
3351 * accounted to disk until blocks are remapped to the data
3352 * fork. So if these blocks were previously delalloc, we
3353 * already have quota reservation and there's nothing to do
3354 * yet.
3355 */
3356 if (ap->wasdel)
3357 return;
3358
3359 /*
3360 * Otherwise, we've allocated blocks in a hole. The transaction
3361 * has acquired in-core quota reservation for this extent.
3362 * Rather than account these as real blocks, however, we reduce
3363 * the transaction quota reservation based on the allocation.
3364 * This essentially transfers the transaction quota reservation
3365 * to that of a delalloc extent.
3366 */
3367 ap->ip->i_delayed_blks += args->len;
3368 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3369 -(long)args->len);
3370 return;
3371 }
3372
3373 /* data/attr fork only */
3374 ap->ip->i_d.di_nblocks += args->len;
3375 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3376 if (ap->wasdel)
3377 ap->ip->i_delayed_blks -= args->len;
3378 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3379 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3380 args->len);
3381}
3382
3340STATIC int 3383STATIC int
3341xfs_bmap_btalloc( 3384xfs_bmap_btalloc(
3342 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3385 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
@@ -3347,6 +3390,8 @@ xfs_bmap_btalloc(
3347 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3390 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3348 xfs_agnumber_t ag; 3391 xfs_agnumber_t ag;
3349 xfs_alloc_arg_t args; 3392 xfs_alloc_arg_t args;
3393 xfs_fileoff_t orig_offset;
3394 xfs_extlen_t orig_length;
3350 xfs_extlen_t blen; 3395 xfs_extlen_t blen;
3351 xfs_extlen_t nextminlen = 0; 3396 xfs_extlen_t nextminlen = 0;
3352 int nullfb; /* true if ap->firstblock isn't set */ 3397 int nullfb; /* true if ap->firstblock isn't set */
@@ -3356,6 +3401,8 @@ xfs_bmap_btalloc(
3356 int stripe_align; 3401 int stripe_align;
3357 3402
3358 ASSERT(ap->length); 3403 ASSERT(ap->length);
3404 orig_offset = ap->offset;
3405 orig_length = ap->length;
3359 3406
3360 mp = ap->ip->i_mount; 3407 mp = ap->ip->i_mount;
3361 3408
@@ -3571,19 +3618,23 @@ xfs_bmap_btalloc(
3571 *ap->firstblock = args.fsbno; 3618 *ap->firstblock = args.fsbno;
3572 ASSERT(nullfb || fb_agno <= args.agno); 3619 ASSERT(nullfb || fb_agno <= args.agno);
3573 ap->length = args.len; 3620 ap->length = args.len;
3574 if (!(ap->flags & XFS_BMAPI_COWFORK))
3575 ap->ip->i_d.di_nblocks += args.len;
3576 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3577 if (ap->wasdel)
3578 ap->ip->i_delayed_blks -= args.len;
3579 /* 3621 /*
3580 * Adjust the disk quota also. This was reserved 3622 * If the extent size hint is active, we tried to round the
3581 * earlier. 3623 * caller's allocation request offset down to extsz and the
3624 * length up to another extsz boundary. If we found a free
3625 * extent we mapped it in starting at this new offset. If the
3626 * newly mapped space isn't long enough to cover any of the
3627 * range of offsets that was originally requested, move the
3628 * mapping up so that we can fill as much of the caller's
3629 * original request as possible. Free space is apparently
3630 * very fragmented so we're unlikely to be able to satisfy the
3631 * hints anyway.
3582 */ 3632 */
3583 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 3633 if (ap->length <= orig_length)
3584 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : 3634 ap->offset = orig_offset;
3585 XFS_TRANS_DQ_BCOUNT, 3635 else if (ap->offset + ap->length < orig_offset + orig_length)
3586 (long) args.len); 3636 ap->offset = orig_offset + orig_length - ap->length;
3637 xfs_bmap_btalloc_accounting(ap, &args);
3587 } else { 3638 } else {
3588 ap->blkno = NULLFSBLOCK; 3639 ap->blkno = NULLFSBLOCK;
3589 ap->length = 0; 3640 ap->length = 0;
@@ -3876,8 +3927,6 @@ xfs_bmapi_reserve_delalloc(
3876 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3927 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3877 xfs_extlen_t alen; 3928 xfs_extlen_t alen;
3878 xfs_extlen_t indlen; 3929 xfs_extlen_t indlen;
3879 char rt = XFS_IS_REALTIME_INODE(ip);
3880 xfs_extlen_t extsz;
3881 int error; 3930 int error;
3882 xfs_fileoff_t aoff = off; 3931 xfs_fileoff_t aoff = off;
3883 3932
@@ -3892,31 +3941,25 @@ xfs_bmapi_reserve_delalloc(
3892 prealloc = alen - len; 3941 prealloc = alen - len;
3893 3942
3894 /* Figure out the extent size, adjust alen */ 3943 /* Figure out the extent size, adjust alen */
3895 if (whichfork == XFS_COW_FORK) 3944 if (whichfork == XFS_COW_FORK) {
3896 extsz = xfs_get_cowextsz_hint(ip);
3897 else
3898 extsz = xfs_get_extsz_hint(ip);
3899 if (extsz) {
3900 struct xfs_bmbt_irec prev; 3945 struct xfs_bmbt_irec prev;
3946 xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
3901 3947
3902 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 3948 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3903 prev.br_startoff = NULLFILEOFF; 3949 prev.br_startoff = NULLFILEOFF;
3904 3950
3905 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 3951 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
3906 1, 0, &aoff, &alen); 3952 1, 0, &aoff, &alen);
3907 ASSERT(!error); 3953 ASSERT(!error);
3908 } 3954 }
3909 3955
3910 if (rt)
3911 extsz = alen / mp->m_sb.sb_rextsize;
3912
3913 /* 3956 /*
3914 * Make a transaction-less quota reservation for delayed allocation 3957 * Make a transaction-less quota reservation for delayed allocation
3915 * blocks. This number gets adjusted later. We return if we haven't 3958 * blocks. This number gets adjusted later. We return if we haven't
3916 * allocated blocks already inside this loop. 3959 * allocated blocks already inside this loop.
3917 */ 3960 */
3918 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, 3961 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
3919 rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 3962 XFS_QMOPT_RES_REGBLKS);
3920 if (error) 3963 if (error)
3921 return error; 3964 return error;
3922 3965
@@ -3927,12 +3970,7 @@ xfs_bmapi_reserve_delalloc(
3927 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); 3970 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
3928 ASSERT(indlen > 0); 3971 ASSERT(indlen > 0);
3929 3972
3930 if (rt) { 3973 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
3931 error = xfs_mod_frextents(mp, -((int64_t)extsz));
3932 } else {
3933 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
3934 }
3935
3936 if (error) 3974 if (error)
3937 goto out_unreserve_quota; 3975 goto out_unreserve_quota;
3938 3976
@@ -3963,14 +4001,11 @@ xfs_bmapi_reserve_delalloc(
3963 return 0; 4001 return 0;
3964 4002
3965out_unreserve_blocks: 4003out_unreserve_blocks:
3966 if (rt) 4004 xfs_mod_fdblocks(mp, alen, false);
3967 xfs_mod_frextents(mp, extsz);
3968 else
3969 xfs_mod_fdblocks(mp, alen, false);
3970out_unreserve_quota: 4005out_unreserve_quota:
3971 if (XFS_IS_QUOTA_ON(mp)) 4006 if (XFS_IS_QUOTA_ON(mp))
3972 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? 4007 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
3973 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 4008 XFS_QMOPT_RES_REGBLKS);
3974 return error; 4009 return error;
3975} 4010}
3976 4011
@@ -4304,8 +4339,16 @@ xfs_bmapi_write(
4304 while (bno < end && n < *nmap) { 4339 while (bno < end && n < *nmap) {
4305 bool need_alloc = false, wasdelay = false; 4340 bool need_alloc = false, wasdelay = false;
4306 4341
4307 /* in hole or beyoned EOF? */ 4342 /* in hole or beyond EOF? */
4308 if (eof || bma.got.br_startoff > bno) { 4343 if (eof || bma.got.br_startoff > bno) {
4344 /*
4345 * CoW fork conversions should /never/ hit EOF or
4346 * holes. There should always be something for us
4347 * to work on.
4348 */
4349 ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4350 (flags & XFS_BMAPI_COWFORK)));
4351
4309 if (flags & XFS_BMAPI_DELALLOC) { 4352 if (flags & XFS_BMAPI_DELALLOC) {
4310 /* 4353 /*
4311 * For the COW fork we can reasonably get a 4354 * For the COW fork we can reasonably get a
@@ -4824,6 +4867,7 @@ xfs_bmap_del_extent_cow(
4824 xfs_iext_insert(ip, icur, &new, state); 4867 xfs_iext_insert(ip, icur, &new, state);
4825 break; 4868 break;
4826 } 4869 }
4870 ip->i_delayed_blks -= del->br_blockcount;
4827} 4871}
4828 4872
4829/* 4873/*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index c10aecaaae44..9faf479aba49 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -425,33 +425,29 @@ xfs_bmbt_diff_two_keys(
425 be64_to_cpu(k2->bmbt.br_startoff); 425 be64_to_cpu(k2->bmbt.br_startoff);
426} 426}
427 427
428static bool 428static xfs_failaddr_t
429xfs_bmbt_verify( 429xfs_bmbt_verify(
430 struct xfs_buf *bp) 430 struct xfs_buf *bp)
431{ 431{
432 struct xfs_mount *mp = bp->b_target->bt_mount; 432 struct xfs_mount *mp = bp->b_target->bt_mount;
433 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 433 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
434 xfs_failaddr_t fa;
434 unsigned int level; 435 unsigned int level;
435 436
436 switch (block->bb_magic) { 437 switch (block->bb_magic) {
437 case cpu_to_be32(XFS_BMAP_CRC_MAGIC): 438 case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
438 if (!xfs_sb_version_hascrc(&mp->m_sb))
439 return false;
440 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
441 return false;
442 if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
443 return false;
444 /* 439 /*
445 * XXX: need a better way of verifying the owner here. Right now 440 * XXX: need a better way of verifying the owner here. Right now
446 * just make sure there has been one set. 441 * just make sure there has been one set.
447 */ 442 */
448 if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) 443 fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
449 return false; 444 if (fa)
445 return fa;
450 /* fall through */ 446 /* fall through */
451 case cpu_to_be32(XFS_BMAP_MAGIC): 447 case cpu_to_be32(XFS_BMAP_MAGIC):
452 break; 448 break;
453 default: 449 default:
454 return false; 450 return __this_address;
455 } 451 }
456 452
457 /* 453 /*
@@ -463,46 +459,39 @@ xfs_bmbt_verify(
463 */ 459 */
464 level = be16_to_cpu(block->bb_level); 460 level = be16_to_cpu(block->bb_level);
465 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) 461 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
466 return false; 462 return __this_address;
467 if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) 463
468 return false; 464 return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]);
469
470 /* sibling pointer verification */
471 if (!block->bb_u.l.bb_leftsib ||
472 (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
473 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
474 return false;
475 if (!block->bb_u.l.bb_rightsib ||
476 (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
477 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
478 return false;
479
480 return true;
481} 465}
482 466
483static void 467static void
484xfs_bmbt_read_verify( 468xfs_bmbt_read_verify(
485 struct xfs_buf *bp) 469 struct xfs_buf *bp)
486{ 470{
471 xfs_failaddr_t fa;
472
487 if (!xfs_btree_lblock_verify_crc(bp)) 473 if (!xfs_btree_lblock_verify_crc(bp))
488 xfs_buf_ioerror(bp, -EFSBADCRC); 474 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
489 else if (!xfs_bmbt_verify(bp)) 475 else {
490 xfs_buf_ioerror(bp, -EFSCORRUPTED); 476 fa = xfs_bmbt_verify(bp);
477 if (fa)
478 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
479 }
491 480
492 if (bp->b_error) { 481 if (bp->b_error)
493 trace_xfs_btree_corrupt(bp, _RET_IP_); 482 trace_xfs_btree_corrupt(bp, _RET_IP_);
494 xfs_verifier_error(bp);
495 }
496} 483}
497 484
498static void 485static void
499xfs_bmbt_write_verify( 486xfs_bmbt_write_verify(
500 struct xfs_buf *bp) 487 struct xfs_buf *bp)
501{ 488{
502 if (!xfs_bmbt_verify(bp)) { 489 xfs_failaddr_t fa;
490
491 fa = xfs_bmbt_verify(bp);
492 if (fa) {
503 trace_xfs_btree_corrupt(bp, _RET_IP_); 493 trace_xfs_btree_corrupt(bp, _RET_IP_);
504 xfs_buf_ioerror(bp, -EFSCORRUPTED); 494 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
505 xfs_verifier_error(bp);
506 return; 495 return;
507 } 496 }
508 xfs_btree_lblock_calc_crc(bp); 497 xfs_btree_lblock_calc_crc(bp);
@@ -512,6 +501,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
512 .name = "xfs_bmbt", 501 .name = "xfs_bmbt",
513 .verify_read = xfs_bmbt_read_verify, 502 .verify_read = xfs_bmbt_read_verify,
514 .verify_write = xfs_bmbt_write_verify, 503 .verify_write = xfs_bmbt_write_verify,
504 .verify_struct = xfs_bmbt_verify,
515}; 505};
516 506
517 507
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5f33adf8eecb..79ee4a1951d1 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc(
273 struct xfs_buf *bp) 273 struct xfs_buf *bp)
274{ 274{
275 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 275 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
276 struct xfs_buf_log_item *bip = bp->b_fspriv; 276 struct xfs_buf_log_item *bip = bp->b_log_item;
277 277
278 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 278 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
279 return; 279 return;
@@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc(
311 struct xfs_buf *bp) 311 struct xfs_buf *bp)
312{ 312{
313 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 313 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
314 struct xfs_buf_log_item *bip = bp->b_fspriv; 314 struct xfs_buf_log_item *bip = bp->b_log_item;
315 315
316 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 316 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
317 return; 317 return;
@@ -329,7 +329,7 @@ xfs_btree_sblock_verify_crc(
329 329
330 if (xfs_sb_version_hascrc(&mp->m_sb)) { 330 if (xfs_sb_version_hascrc(&mp->m_sb)) {
331 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 331 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
332 return false; 332 return __this_address;
333 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 333 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
334 } 334 }
335 335
@@ -853,7 +853,7 @@ xfs_btree_read_bufl(
853 xfs_daddr_t d; /* real disk block address */ 853 xfs_daddr_t d; /* real disk block address */
854 int error; 854 int error;
855 855
856 if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) 856 if (!xfs_verify_fsbno(mp, fsbno))
857 return -EFSCORRUPTED; 857 return -EFSCORRUPTED;
858 d = XFS_FSB_TO_DADDR(mp, fsbno); 858 d = XFS_FSB_TO_DADDR(mp, fsbno);
859 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 859 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
@@ -4529,6 +4529,51 @@ xfs_btree_change_owner(
4529 &bbcoi); 4529 &bbcoi);
4530} 4530}
4531 4531
4532/* Verify the v5 fields of a long-format btree block. */
4533xfs_failaddr_t
4534xfs_btree_lblock_v5hdr_verify(
4535 struct xfs_buf *bp,
4536 uint64_t owner)
4537{
4538 struct xfs_mount *mp = bp->b_target->bt_mount;
4539 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4540
4541 if (!xfs_sb_version_hascrc(&mp->m_sb))
4542 return __this_address;
4543 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
4544 return __this_address;
4545 if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn))
4546 return __this_address;
4547 if (owner != XFS_RMAP_OWN_UNKNOWN &&
4548 be64_to_cpu(block->bb_u.l.bb_owner) != owner)
4549 return __this_address;
4550 return NULL;
4551}
4552
4553/* Verify a long-format btree block. */
4554xfs_failaddr_t
4555xfs_btree_lblock_verify(
4556 struct xfs_buf *bp,
4557 unsigned int max_recs)
4558{
4559 struct xfs_mount *mp = bp->b_target->bt_mount;
4560 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4561
4562 /* numrecs verification */
4563 if (be16_to_cpu(block->bb_numrecs) > max_recs)
4564 return __this_address;
4565
4566 /* sibling pointer verification */
4567 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
4568 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
4569 return __this_address;
4570 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
4571 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
4572 return __this_address;
4573
4574 return NULL;
4575}
4576
4532/** 4577/**
4533 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4578 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4534 * btree block 4579 * btree block
@@ -4537,7 +4582,7 @@ xfs_btree_change_owner(
4537 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4582 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4538 * @pag_max_level: pointer to the per-ag max level field 4583 * @pag_max_level: pointer to the per-ag max level field
4539 */ 4584 */
4540bool 4585xfs_failaddr_t
4541xfs_btree_sblock_v5hdr_verify( 4586xfs_btree_sblock_v5hdr_verify(
4542 struct xfs_buf *bp) 4587 struct xfs_buf *bp)
4543{ 4588{
@@ -4546,14 +4591,14 @@ xfs_btree_sblock_v5hdr_verify(
4546 struct xfs_perag *pag = bp->b_pag; 4591 struct xfs_perag *pag = bp->b_pag;
4547 4592
4548 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4593 if (!xfs_sb_version_hascrc(&mp->m_sb))
4549 return false; 4594 return __this_address;
4550 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4595 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4551 return false; 4596 return __this_address;
4552 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4597 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4553 return false; 4598 return __this_address;
4554 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4599 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4555 return false; 4600 return __this_address;
4556 return true; 4601 return NULL;
4557} 4602}
4558 4603
4559/** 4604/**
@@ -4562,29 +4607,29 @@ xfs_btree_sblock_v5hdr_verify(
4562 * @bp: buffer containing the btree block 4607 * @bp: buffer containing the btree block
4563 * @max_recs: maximum records allowed in this btree node 4608 * @max_recs: maximum records allowed in this btree node
4564 */ 4609 */
4565bool 4610xfs_failaddr_t
4566xfs_btree_sblock_verify( 4611xfs_btree_sblock_verify(
4567 struct xfs_buf *bp, 4612 struct xfs_buf *bp,
4568 unsigned int max_recs) 4613 unsigned int max_recs)
4569{ 4614{
4570 struct xfs_mount *mp = bp->b_target->bt_mount; 4615 struct xfs_mount *mp = bp->b_target->bt_mount;
4571 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4616 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4617 xfs_agblock_t agno;
4572 4618
4573 /* numrecs verification */ 4619 /* numrecs verification */
4574 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4620 if (be16_to_cpu(block->bb_numrecs) > max_recs)
4575 return false; 4621 return __this_address;
4576 4622
4577 /* sibling pointer verification */ 4623 /* sibling pointer verification */
4578 if (!block->bb_u.s.bb_leftsib || 4624 agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
4579 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && 4625 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
4580 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) 4626 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
4581 return false; 4627 return __this_address;
4582 if (!block->bb_u.s.bb_rightsib || 4628 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
4583 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && 4629 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
4584 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) 4630 return __this_address;
4585 return false;
4586 4631
4587 return true; 4632 return NULL;
4588} 4633}
4589 4634
4590/* 4635/*
@@ -4953,3 +4998,33 @@ xfs_btree_diff_two_ptrs(
4953 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4998 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
4954 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4999 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
4955} 5000}
5001
5002/* If there's an extent, we're done. */
5003STATIC int
5004xfs_btree_has_record_helper(
5005 struct xfs_btree_cur *cur,
5006 union xfs_btree_rec *rec,
5007 void *priv)
5008{
5009 return XFS_BTREE_QUERY_RANGE_ABORT;
5010}
5011
5012/* Is there a record covering a given range of keys? */
5013int
5014xfs_btree_has_record(
5015 struct xfs_btree_cur *cur,
5016 union xfs_btree_irec *low,
5017 union xfs_btree_irec *high,
5018 bool *exists)
5019{
5020 int error;
5021
5022 error = xfs_btree_query_range(cur, low, high,
5023 &xfs_btree_has_record_helper, NULL);
5024 if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
5025 *exists = true;
5026 return 0;
5027 }
5028 *exists = false;
5029 return error;
5030}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b57501c6f71d..50440b5618e8 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -473,10 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
473#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) 473#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
474#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) 474#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
475 475
476#define XFS_FSB_SANITY_CHECK(mp,fsb) \
477 (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
478 XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
479
480/* 476/*
481 * Trace hooks. Currently not implemented as they need to be ported 477 * Trace hooks. Currently not implemented as they need to be ported
482 * over to the generic tracing functionality, which is some effort. 478 * over to the generic tracing functionality, which is some effort.
@@ -496,8 +492,14 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
496#define XFS_BTREE_TRACE_ARGR(c, r) 492#define XFS_BTREE_TRACE_ARGR(c, r)
497#define XFS_BTREE_TRACE_CURSOR(c, t) 493#define XFS_BTREE_TRACE_CURSOR(c, t)
498 494
499bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); 495xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
500bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); 496xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp,
497 unsigned int max_recs);
498xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp,
499 uint64_t owner);
500xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
501 unsigned int max_recs);
502
501uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, 503uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
502 unsigned long len); 504 unsigned long len);
503xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, 505xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
@@ -545,5 +547,7 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur,
545 struct xfs_btree_block *block, union xfs_btree_key *key); 547 struct xfs_btree_block *block, union xfs_btree_key *key);
546union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, 548union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
547 union xfs_btree_key *key); 549 union xfs_btree_key *key);
550int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
551 union xfs_btree_irec *high, bool *exists);
548 552
549#endif /* __XFS_BTREE_H__ */ 553#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 651611530d2f..ea187b4a7991 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -128,7 +128,7 @@ xfs_da_state_free(xfs_da_state_t *state)
128 kmem_zone_free(xfs_da_state_zone, state); 128 kmem_zone_free(xfs_da_state_zone, state);
129} 129}
130 130
131static bool 131static xfs_failaddr_t
132xfs_da3_node_verify( 132xfs_da3_node_verify(
133 struct xfs_buf *bp) 133 struct xfs_buf *bp)
134{ 134{
@@ -145,24 +145,24 @@ xfs_da3_node_verify(
145 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 145 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
146 146
147 if (ichdr.magic != XFS_DA3_NODE_MAGIC) 147 if (ichdr.magic != XFS_DA3_NODE_MAGIC)
148 return false; 148 return __this_address;
149 149
150 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 150 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
151 return false; 151 return __this_address;
152 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 152 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
153 return false; 153 return __this_address;
154 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 154 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
155 return false; 155 return __this_address;
156 } else { 156 } else {
157 if (ichdr.magic != XFS_DA_NODE_MAGIC) 157 if (ichdr.magic != XFS_DA_NODE_MAGIC)
158 return false; 158 return __this_address;
159 } 159 }
160 if (ichdr.level == 0) 160 if (ichdr.level == 0)
161 return false; 161 return __this_address;
162 if (ichdr.level > XFS_DA_NODE_MAXDEPTH) 162 if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
163 return false; 163 return __this_address;
164 if (ichdr.count == 0) 164 if (ichdr.count == 0)
165 return false; 165 return __this_address;
166 166
167 /* 167 /*
168 * we don't know if the node is for and attribute or directory tree, 168 * we don't know if the node is for and attribute or directory tree,
@@ -170,11 +170,11 @@ xfs_da3_node_verify(
170 */ 170 */
171 if (ichdr.count > mp->m_dir_geo->node_ents && 171 if (ichdr.count > mp->m_dir_geo->node_ents &&
172 ichdr.count > mp->m_attr_geo->node_ents) 172 ichdr.count > mp->m_attr_geo->node_ents)
173 return false; 173 return __this_address;
174 174
175 /* XXX: hash order check? */ 175 /* XXX: hash order check? */
176 176
177 return true; 177 return NULL;
178} 178}
179 179
180static void 180static void
@@ -182,12 +182,13 @@ xfs_da3_node_write_verify(
182 struct xfs_buf *bp) 182 struct xfs_buf *bp)
183{ 183{
184 struct xfs_mount *mp = bp->b_target->bt_mount; 184 struct xfs_mount *mp = bp->b_target->bt_mount;
185 struct xfs_buf_log_item *bip = bp->b_fspriv; 185 struct xfs_buf_log_item *bip = bp->b_log_item;
186 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 186 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
187 xfs_failaddr_t fa;
187 188
188 if (!xfs_da3_node_verify(bp)) { 189 fa = xfs_da3_node_verify(bp);
189 xfs_buf_ioerror(bp, -EFSCORRUPTED); 190 if (fa) {
190 xfs_verifier_error(bp); 191 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
191 return; 192 return;
192 } 193 }
193 194
@@ -211,19 +212,20 @@ xfs_da3_node_read_verify(
211 struct xfs_buf *bp) 212 struct xfs_buf *bp)
212{ 213{
213 struct xfs_da_blkinfo *info = bp->b_addr; 214 struct xfs_da_blkinfo *info = bp->b_addr;
215 xfs_failaddr_t fa;
214 216
215 switch (be16_to_cpu(info->magic)) { 217 switch (be16_to_cpu(info->magic)) {
216 case XFS_DA3_NODE_MAGIC: 218 case XFS_DA3_NODE_MAGIC:
217 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 219 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
218 xfs_buf_ioerror(bp, -EFSBADCRC); 220 xfs_verifier_error(bp, -EFSBADCRC,
221 __this_address);
219 break; 222 break;
220 } 223 }
221 /* fall through */ 224 /* fall through */
222 case XFS_DA_NODE_MAGIC: 225 case XFS_DA_NODE_MAGIC:
223 if (!xfs_da3_node_verify(bp)) { 226 fa = xfs_da3_node_verify(bp);
224 xfs_buf_ioerror(bp, -EFSCORRUPTED); 227 if (fa)
225 break; 228 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
226 }
227 return; 229 return;
228 case XFS_ATTR_LEAF_MAGIC: 230 case XFS_ATTR_LEAF_MAGIC:
229 case XFS_ATTR3_LEAF_MAGIC: 231 case XFS_ATTR3_LEAF_MAGIC:
@@ -236,18 +238,40 @@ xfs_da3_node_read_verify(
236 bp->b_ops->verify_read(bp); 238 bp->b_ops->verify_read(bp);
237 return; 239 return;
238 default: 240 default:
239 xfs_buf_ioerror(bp, -EFSCORRUPTED); 241 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
240 break; 242 break;
241 } 243 }
244}
245
246/* Verify the structure of a da3 block. */
247static xfs_failaddr_t
248xfs_da3_node_verify_struct(
249 struct xfs_buf *bp)
250{
251 struct xfs_da_blkinfo *info = bp->b_addr;
242 252
243 /* corrupt block */ 253 switch (be16_to_cpu(info->magic)) {
244 xfs_verifier_error(bp); 254 case XFS_DA3_NODE_MAGIC:
255 case XFS_DA_NODE_MAGIC:
256 return xfs_da3_node_verify(bp);
257 case XFS_ATTR_LEAF_MAGIC:
258 case XFS_ATTR3_LEAF_MAGIC:
259 bp->b_ops = &xfs_attr3_leaf_buf_ops;
260 return bp->b_ops->verify_struct(bp);
261 case XFS_DIR2_LEAFN_MAGIC:
262 case XFS_DIR3_LEAFN_MAGIC:
263 bp->b_ops = &xfs_dir3_leafn_buf_ops;
264 return bp->b_ops->verify_struct(bp);
265 default:
266 return __this_address;
267 }
245} 268}
246 269
247const struct xfs_buf_ops xfs_da3_node_buf_ops = { 270const struct xfs_buf_ops xfs_da3_node_buf_ops = {
248 .name = "xfs_da3_node", 271 .name = "xfs_da3_node",
249 .verify_read = xfs_da3_node_read_verify, 272 .verify_read = xfs_da3_node_read_verify,
250 .verify_write = xfs_da3_node_write_verify, 273 .verify_write = xfs_da3_node_write_verify,
274 .verify_struct = xfs_da3_node_verify_struct,
251}; 275};
252 276
253int 277int
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 3771edcb301d..7e77299b7789 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -875,4 +875,10 @@ struct xfs_attr3_rmt_hdr {
875 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ 875 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
876 sizeof(struct xfs_attr3_rmt_hdr) : 0)) 876 sizeof(struct xfs_attr3_rmt_hdr) : 0))
877 877
878/* Number of bytes in a directory block. */
879static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
880{
881 return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog);
882}
883
878#endif /* __XFS_DA_FORMAT_H__ */ 884#endif /* __XFS_DA_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index e10778c102ea..92f94e190f04 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -119,8 +119,7 @@ xfs_da_mount(
119 119
120 120
121 ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 121 ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
122 ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= 122 ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE);
123 XFS_MAX_BLOCKSIZE);
124 123
125 mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); 124 mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
126 mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); 125 mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
@@ -140,7 +139,7 @@ xfs_da_mount(
140 dageo = mp->m_dir_geo; 139 dageo = mp->m_dir_geo;
141 dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; 140 dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
142 dageo->fsblog = mp->m_sb.sb_blocklog; 141 dageo->fsblog = mp->m_sb.sb_blocklog;
143 dageo->blksize = 1 << dageo->blklog; 142 dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb);
144 dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; 143 dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
145 144
146 /* 145 /*
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 1a8f2cf977ca..388d67c5c903 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -340,5 +340,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
340#define XFS_READDIR_BUFSIZE (32768) 340#define XFS_READDIR_BUFSIZE (32768)
341 341
342unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); 342unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype);
343void *xfs_dir3_data_endp(struct xfs_da_geometry *geo,
344 struct xfs_dir2_data_hdr *hdr);
343 345
344#endif /* __XFS_DIR2_H__ */ 346#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 43c902f7a68d..2da86a394bcf 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -58,7 +58,7 @@ xfs_dir_startup(void)
58 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); 58 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
59} 59}
60 60
61static bool 61static xfs_failaddr_t
62xfs_dir3_block_verify( 62xfs_dir3_block_verify(
63 struct xfs_buf *bp) 63 struct xfs_buf *bp)
64{ 64{
@@ -67,20 +67,18 @@ xfs_dir3_block_verify(
67 67
68 if (xfs_sb_version_hascrc(&mp->m_sb)) { 68 if (xfs_sb_version_hascrc(&mp->m_sb)) {
69 if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) 69 if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
70 return false; 70 return __this_address;
71 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 71 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
72 return false; 72 return __this_address;
73 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 73 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
74 return false; 74 return __this_address;
75 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 75 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
76 return false; 76 return __this_address;
77 } else { 77 } else {
78 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) 78 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
79 return false; 79 return __this_address;
80 } 80 }
81 if (__xfs_dir3_data_check(NULL, bp)) 81 return __xfs_dir3_data_check(NULL, bp);
82 return false;
83 return true;
84} 82}
85 83
86static void 84static void
@@ -88,15 +86,16 @@ xfs_dir3_block_read_verify(
88 struct xfs_buf *bp) 86 struct xfs_buf *bp)
89{ 87{
90 struct xfs_mount *mp = bp->b_target->bt_mount; 88 struct xfs_mount *mp = bp->b_target->bt_mount;
89 xfs_failaddr_t fa;
91 90
92 if (xfs_sb_version_hascrc(&mp->m_sb) && 91 if (xfs_sb_version_hascrc(&mp->m_sb) &&
93 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 92 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
94 xfs_buf_ioerror(bp, -EFSBADCRC); 93 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
95 else if (!xfs_dir3_block_verify(bp)) 94 else {
96 xfs_buf_ioerror(bp, -EFSCORRUPTED); 95 fa = xfs_dir3_block_verify(bp);
97 96 if (fa)
98 if (bp->b_error) 97 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
99 xfs_verifier_error(bp); 98 }
100} 99}
101 100
102static void 101static void
@@ -104,12 +103,13 @@ xfs_dir3_block_write_verify(
104 struct xfs_buf *bp) 103 struct xfs_buf *bp)
105{ 104{
106 struct xfs_mount *mp = bp->b_target->bt_mount; 105 struct xfs_mount *mp = bp->b_target->bt_mount;
107 struct xfs_buf_log_item *bip = bp->b_fspriv; 106 struct xfs_buf_log_item *bip = bp->b_log_item;
108 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 107 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
108 xfs_failaddr_t fa;
109 109
110 if (!xfs_dir3_block_verify(bp)) { 110 fa = xfs_dir3_block_verify(bp);
111 xfs_buf_ioerror(bp, -EFSCORRUPTED); 111 if (fa) {
112 xfs_verifier_error(bp); 112 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
113 return; 113 return;
114 } 114 }
115 115
@@ -126,6 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
126 .name = "xfs_dir3_block", 126 .name = "xfs_dir3_block",
127 .verify_read = xfs_dir3_block_read_verify, 127 .verify_read = xfs_dir3_block_read_verify,
128 .verify_write = xfs_dir3_block_write_verify, 128 .verify_write = xfs_dir3_block_write_verify,
129 .verify_struct = xfs_dir3_block_verify,
129}; 130};
130 131
131int 132int
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 8727a43115ef..920279485275 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -36,9 +36,9 @@
36/* 36/*
37 * Check the consistency of the data block. 37 * Check the consistency of the data block.
38 * The input can also be a block-format directory. 38 * The input can also be a block-format directory.
39 * Return 0 is the buffer is good, otherwise an error. 39 * Return NULL if the buffer is good, otherwise the address of the error.
40 */ 40 */
41int 41xfs_failaddr_t
42__xfs_dir3_data_check( 42__xfs_dir3_data_check(
43 struct xfs_inode *dp, /* incore inode pointer */ 43 struct xfs_inode *dp, /* incore inode pointer */
44 struct xfs_buf *bp) /* data block's buffer */ 44 struct xfs_buf *bp) /* data block's buffer */
@@ -73,6 +73,14 @@ __xfs_dir3_data_check(
73 */ 73 */
74 ops = xfs_dir_get_ops(mp, dp); 74 ops = xfs_dir_get_ops(mp, dp);
75 75
76 /*
77 * If this isn't a directory, or we don't get handed the dir ops,
78 * something is seriously wrong. Bail out.
79 */
80 if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) ||
81 ops != xfs_dir_get_ops(mp, NULL))
82 return __this_address;
83
76 hdr = bp->b_addr; 84 hdr = bp->b_addr;
77 p = (char *)ops->data_entry_p(hdr); 85 p = (char *)ops->data_entry_p(hdr);
78 86
@@ -81,7 +89,6 @@ __xfs_dir3_data_check(
81 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): 89 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
82 btp = xfs_dir2_block_tail_p(geo, hdr); 90 btp = xfs_dir2_block_tail_p(geo, hdr);
83 lep = xfs_dir2_block_leaf_p(btp); 91 lep = xfs_dir2_block_leaf_p(btp);
84 endp = (char *)lep;
85 92
86 /* 93 /*
87 * The number of leaf entries is limited by the size of the 94 * The number of leaf entries is limited by the size of the
@@ -90,17 +97,19 @@ __xfs_dir3_data_check(
90 * so just ensure that the count falls somewhere inside the 97 * so just ensure that the count falls somewhere inside the
91 * block right now. 98 * block right now.
92 */ 99 */
93 XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < 100 if (be32_to_cpu(btp->count) >=
94 ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); 101 ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry))
102 return __this_address;
95 break; 103 break;
96 case cpu_to_be32(XFS_DIR3_DATA_MAGIC): 104 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
97 case cpu_to_be32(XFS_DIR2_DATA_MAGIC): 105 case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
98 endp = (char *)hdr + geo->blksize;
99 break; 106 break;
100 default: 107 default:
101 XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); 108 return __this_address;
102 return -EFSCORRUPTED;
103 } 109 }
110 endp = xfs_dir3_data_endp(geo, hdr);
111 if (!endp)
112 return __this_address;
104 113
105 /* 114 /*
106 * Account for zero bestfree entries. 115 * Account for zero bestfree entries.
@@ -108,22 +117,25 @@ __xfs_dir3_data_check(
108 bf = ops->data_bestfree_p(hdr); 117 bf = ops->data_bestfree_p(hdr);
109 count = lastfree = freeseen = 0; 118 count = lastfree = freeseen = 0;
110 if (!bf[0].length) { 119 if (!bf[0].length) {
111 XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); 120 if (bf[0].offset)
121 return __this_address;
112 freeseen |= 1 << 0; 122 freeseen |= 1 << 0;
113 } 123 }
114 if (!bf[1].length) { 124 if (!bf[1].length) {
115 XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); 125 if (bf[1].offset)
126 return __this_address;
116 freeseen |= 1 << 1; 127 freeseen |= 1 << 1;
117 } 128 }
118 if (!bf[2].length) { 129 if (!bf[2].length) {
119 XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); 130 if (bf[2].offset)
131 return __this_address;
120 freeseen |= 1 << 2; 132 freeseen |= 1 << 2;
121 } 133 }
122 134
123 XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= 135 if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length))
124 be16_to_cpu(bf[1].length)); 136 return __this_address;
125 XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= 137 if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length))
126 be16_to_cpu(bf[2].length)); 138 return __this_address;
127 /* 139 /*
128 * Loop over the data/unused entries. 140 * Loop over the data/unused entries.
129 */ 141 */
@@ -135,22 +147,23 @@ __xfs_dir3_data_check(
135 * doesn't need to be there. 147 * doesn't need to be there.
136 */ 148 */
137 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 149 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
138 XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); 150 if (lastfree != 0)
139 XFS_WANT_CORRUPTED_RETURN(mp, endp >= 151 return __this_address;
140 p + be16_to_cpu(dup->length)); 152 if (endp < p + be16_to_cpu(dup->length))
141 XFS_WANT_CORRUPTED_RETURN(mp, 153 return __this_address;
142 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == 154 if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
143 (char *)dup - (char *)hdr); 155 (char *)dup - (char *)hdr)
156 return __this_address;
144 dfp = xfs_dir2_data_freefind(hdr, bf, dup); 157 dfp = xfs_dir2_data_freefind(hdr, bf, dup);
145 if (dfp) { 158 if (dfp) {
146 i = (int)(dfp - bf); 159 i = (int)(dfp - bf);
147 XFS_WANT_CORRUPTED_RETURN(mp, 160 if ((freeseen & (1 << i)) != 0)
148 (freeseen & (1 << i)) == 0); 161 return __this_address;
149 freeseen |= 1 << i; 162 freeseen |= 1 << i;
150 } else { 163 } else {
151 XFS_WANT_CORRUPTED_RETURN(mp, 164 if (be16_to_cpu(dup->length) >
152 be16_to_cpu(dup->length) <= 165 be16_to_cpu(bf[2].length))
153 be16_to_cpu(bf[2].length)); 166 return __this_address;
154 } 167 }
155 p += be16_to_cpu(dup->length); 168 p += be16_to_cpu(dup->length);
156 lastfree = 1; 169 lastfree = 1;
@@ -163,16 +176,17 @@ __xfs_dir3_data_check(
163 * The linear search is crude but this is DEBUG code. 176 * The linear search is crude but this is DEBUG code.
164 */ 177 */
165 dep = (xfs_dir2_data_entry_t *)p; 178 dep = (xfs_dir2_data_entry_t *)p;
166 XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); 179 if (dep->namelen == 0)
167 XFS_WANT_CORRUPTED_RETURN(mp, 180 return __this_address;
168 !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); 181 if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)))
169 XFS_WANT_CORRUPTED_RETURN(mp, endp >= 182 return __this_address;
170 p + ops->data_entsize(dep->namelen)); 183 if (endp < p + ops->data_entsize(dep->namelen))
171 XFS_WANT_CORRUPTED_RETURN(mp, 184 return __this_address;
172 be16_to_cpu(*ops->data_entry_tag_p(dep)) == 185 if (be16_to_cpu(*ops->data_entry_tag_p(dep)) !=
173 (char *)dep - (char *)hdr); 186 (char *)dep - (char *)hdr)
174 XFS_WANT_CORRUPTED_RETURN(mp, 187 return __this_address;
175 ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); 188 if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX)
189 return __this_address;
176 count++; 190 count++;
177 lastfree = 0; 191 lastfree = 0;
178 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 192 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
@@ -188,34 +202,52 @@ __xfs_dir3_data_check(
188 be32_to_cpu(lep[i].hashval) == hash) 202 be32_to_cpu(lep[i].hashval) == hash)
189 break; 203 break;
190 } 204 }
191 XFS_WANT_CORRUPTED_RETURN(mp, 205 if (i >= be32_to_cpu(btp->count))
192 i < be32_to_cpu(btp->count)); 206 return __this_address;
193 } 207 }
194 p += ops->data_entsize(dep->namelen); 208 p += ops->data_entsize(dep->namelen);
195 } 209 }
196 /* 210 /*
197 * Need to have seen all the entries and all the bestfree slots. 211 * Need to have seen all the entries and all the bestfree slots.
198 */ 212 */
199 XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); 213 if (freeseen != 7)
214 return __this_address;
200 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 215 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
201 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 216 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
202 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 217 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
203 if (lep[i].address == 218 if (lep[i].address ==
204 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 219 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
205 stale++; 220 stale++;
206 if (i > 0) 221 if (i > 0 && be32_to_cpu(lep[i].hashval) <
207 XFS_WANT_CORRUPTED_RETURN(mp, 222 be32_to_cpu(lep[i - 1].hashval))
208 be32_to_cpu(lep[i].hashval) >= 223 return __this_address;
209 be32_to_cpu(lep[i - 1].hashval));
210 } 224 }
211 XFS_WANT_CORRUPTED_RETURN(mp, count == 225 if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale))
212 be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); 226 return __this_address;
213 XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); 227 if (stale != be32_to_cpu(btp->stale))
228 return __this_address;
214 } 229 }
215 return 0; 230 return NULL;
231}
232
233#ifdef DEBUG
234void
235xfs_dir3_data_check(
236 struct xfs_inode *dp,
237 struct xfs_buf *bp)
238{
239 xfs_failaddr_t fa;
240
241 fa = __xfs_dir3_data_check(dp, bp);
242 if (!fa)
243 return;
244 xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
245 bp->b_addr, __FILE__, __LINE__, fa);
246 ASSERT(0);
216} 247}
248#endif
217 249
218static bool 250static xfs_failaddr_t
219xfs_dir3_data_verify( 251xfs_dir3_data_verify(
220 struct xfs_buf *bp) 252 struct xfs_buf *bp)
221{ 253{
@@ -224,20 +256,18 @@ xfs_dir3_data_verify(
224 256
225 if (xfs_sb_version_hascrc(&mp->m_sb)) { 257 if (xfs_sb_version_hascrc(&mp->m_sb)) {
226 if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) 258 if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
227 return false; 259 return __this_address;
228 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 260 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
229 return false; 261 return __this_address;
230 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 262 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
231 return false; 263 return __this_address;
232 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 264 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
233 return false; 265 return __this_address;
234 } else { 266 } else {
235 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) 267 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
236 return false; 268 return __this_address;
237 } 269 }
238 if (__xfs_dir3_data_check(NULL, bp)) 270 return __xfs_dir3_data_check(NULL, bp);
239 return false;
240 return true;
241} 271}
242 272
243/* 273/*
@@ -263,8 +293,7 @@ xfs_dir3_data_reada_verify(
263 bp->b_ops->verify_read(bp); 293 bp->b_ops->verify_read(bp);
264 return; 294 return;
265 default: 295 default:
266 xfs_buf_ioerror(bp, -EFSCORRUPTED); 296 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
267 xfs_verifier_error(bp);
268 break; 297 break;
269 } 298 }
270} 299}
@@ -274,15 +303,16 @@ xfs_dir3_data_read_verify(
274 struct xfs_buf *bp) 303 struct xfs_buf *bp)
275{ 304{
276 struct xfs_mount *mp = bp->b_target->bt_mount; 305 struct xfs_mount *mp = bp->b_target->bt_mount;
306 xfs_failaddr_t fa;
277 307
278 if (xfs_sb_version_hascrc(&mp->m_sb) && 308 if (xfs_sb_version_hascrc(&mp->m_sb) &&
279 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 309 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
280 xfs_buf_ioerror(bp, -EFSBADCRC); 310 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
281 else if (!xfs_dir3_data_verify(bp)) 311 else {
282 xfs_buf_ioerror(bp, -EFSCORRUPTED); 312 fa = xfs_dir3_data_verify(bp);
283 313 if (fa)
284 if (bp->b_error) 314 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
285 xfs_verifier_error(bp); 315 }
286} 316}
287 317
288static void 318static void
@@ -290,12 +320,13 @@ xfs_dir3_data_write_verify(
290 struct xfs_buf *bp) 320 struct xfs_buf *bp)
291{ 321{
292 struct xfs_mount *mp = bp->b_target->bt_mount; 322 struct xfs_mount *mp = bp->b_target->bt_mount;
293 struct xfs_buf_log_item *bip = bp->b_fspriv; 323 struct xfs_buf_log_item *bip = bp->b_log_item;
294 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 324 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
325 xfs_failaddr_t fa;
295 326
296 if (!xfs_dir3_data_verify(bp)) { 327 fa = xfs_dir3_data_verify(bp);
297 xfs_buf_ioerror(bp, -EFSCORRUPTED); 328 if (fa) {
298 xfs_verifier_error(bp); 329 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
299 return; 330 return;
300 } 331 }
301 332
@@ -312,6 +343,7 @@ const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
312 .name = "xfs_dir3_data", 343 .name = "xfs_dir3_data",
313 .verify_read = xfs_dir3_data_read_verify, 344 .verify_read = xfs_dir3_data_read_verify,
314 .verify_write = xfs_dir3_data_write_verify, 345 .verify_write = xfs_dir3_data_write_verify,
346 .verify_struct = xfs_dir3_data_verify,
315}; 347};
316 348
317static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { 349static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
@@ -515,7 +547,6 @@ xfs_dir2_data_freescan_int(
515 struct xfs_dir2_data_hdr *hdr, 547 struct xfs_dir2_data_hdr *hdr,
516 int *loghead) 548 int *loghead)
517{ 549{
518 xfs_dir2_block_tail_t *btp; /* block tail */
519 xfs_dir2_data_entry_t *dep; /* active data entry */ 550 xfs_dir2_data_entry_t *dep; /* active data entry */
520 xfs_dir2_data_unused_t *dup; /* unused data entry */ 551 xfs_dir2_data_unused_t *dup; /* unused data entry */
521 struct xfs_dir2_data_free *bf; 552 struct xfs_dir2_data_free *bf;
@@ -537,12 +568,7 @@ xfs_dir2_data_freescan_int(
537 * Set up pointers. 568 * Set up pointers.
538 */ 569 */
539 p = (char *)ops->data_entry_p(hdr); 570 p = (char *)ops->data_entry_p(hdr);
540 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 571 endp = xfs_dir3_data_endp(geo, hdr);
541 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
542 btp = xfs_dir2_block_tail_p(geo, hdr);
543 endp = (char *)xfs_dir2_block_leaf_p(btp);
544 } else
545 endp = (char *)hdr + geo->blksize;
546 /* 572 /*
547 * Loop over the block's entries. 573 * Loop over the block's entries.
548 */ 574 */
@@ -755,17 +781,9 @@ xfs_dir2_data_make_free(
755 /* 781 /*
756 * Figure out where the end of the data area is. 782 * Figure out where the end of the data area is.
757 */ 783 */
758 if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || 784 endptr = xfs_dir3_data_endp(args->geo, hdr);
759 hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) 785 ASSERT(endptr != NULL);
760 endptr = (char *)hdr + args->geo->blksize;
761 else {
762 xfs_dir2_block_tail_t *btp; /* block tail */
763 786
764 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
765 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
766 btp = xfs_dir2_block_tail_p(args->geo, hdr);
767 endptr = (char *)xfs_dir2_block_leaf_p(btp);
768 }
769 /* 787 /*
770 * If this isn't the start of the block, then back up to 788 * If this isn't the start of the block, then back up to
771 * the previous entry and see if it's free. 789 * the previous entry and see if it's free.
@@ -1067,3 +1085,21 @@ xfs_dir2_data_use_free(
1067 } 1085 }
1068 *needscanp = needscan; 1086 *needscanp = needscan;
1069} 1087}
1088
1089/* Find the end of the entry data in a data/block format dir block. */
1090void *
1091xfs_dir3_data_endp(
1092 struct xfs_da_geometry *geo,
1093 struct xfs_dir2_data_hdr *hdr)
1094{
1095 switch (hdr->magic) {
1096 case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
1097 case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
1098 return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr));
1099 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
1100 case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
1101 return (char *)hdr + geo->blksize;
1102 default:
1103 return NULL;
1104 }
1105}
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 27297a689d9c..d7e630f41f9c 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -50,13 +50,7 @@ static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
50 * Pop an assert if something is wrong. 50 * Pop an assert if something is wrong.
51 */ 51 */
52#ifdef DEBUG 52#ifdef DEBUG
53#define xfs_dir3_leaf_check(dp, bp) \ 53static xfs_failaddr_t
54do { \
55 if (!xfs_dir3_leaf1_check((dp), (bp))) \
56 ASSERT(0); \
57} while (0);
58
59STATIC bool
60xfs_dir3_leaf1_check( 54xfs_dir3_leaf1_check(
61 struct xfs_inode *dp, 55 struct xfs_inode *dp,
62 struct xfs_buf *bp) 56 struct xfs_buf *bp)
@@ -69,17 +63,32 @@ xfs_dir3_leaf1_check(
69 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { 63 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
70 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; 64 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
71 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 65 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
72 return false; 66 return __this_address;
73 } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) 67 } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
74 return false; 68 return __this_address;
75 69
76 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); 70 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
77} 71}
72
73static inline void
74xfs_dir3_leaf_check(
75 struct xfs_inode *dp,
76 struct xfs_buf *bp)
77{
78 xfs_failaddr_t fa;
79
80 fa = xfs_dir3_leaf1_check(dp, bp);
81 if (!fa)
82 return;
83 xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
84 bp->b_addr, __FILE__, __LINE__, fa);
85 ASSERT(0);
86}
78#else 87#else
79#define xfs_dir3_leaf_check(dp, bp) 88#define xfs_dir3_leaf_check(dp, bp)
80#endif 89#endif
81 90
82bool 91xfs_failaddr_t
83xfs_dir3_leaf_check_int( 92xfs_dir3_leaf_check_int(
84 struct xfs_mount *mp, 93 struct xfs_mount *mp,
85 struct xfs_inode *dp, 94 struct xfs_inode *dp,
@@ -114,27 +123,27 @@ xfs_dir3_leaf_check_int(
114 * We can deduce a value for that from di_size. 123 * We can deduce a value for that from di_size.
115 */ 124 */
116 if (hdr->count > ops->leaf_max_ents(geo)) 125 if (hdr->count > ops->leaf_max_ents(geo))
117 return false; 126 return __this_address;
118 127
119 /* Leaves and bests don't overlap in leaf format. */ 128 /* Leaves and bests don't overlap in leaf format. */
120 if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || 129 if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
121 hdr->magic == XFS_DIR3_LEAF1_MAGIC) && 130 hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
122 (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) 131 (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
123 return false; 132 return __this_address;
124 133
125 /* Check hash value order, count stale entries. */ 134 /* Check hash value order, count stale entries. */
126 for (i = stale = 0; i < hdr->count; i++) { 135 for (i = stale = 0; i < hdr->count; i++) {
127 if (i + 1 < hdr->count) { 136 if (i + 1 < hdr->count) {
128 if (be32_to_cpu(ents[i].hashval) > 137 if (be32_to_cpu(ents[i].hashval) >
129 be32_to_cpu(ents[i + 1].hashval)) 138 be32_to_cpu(ents[i + 1].hashval))
130 return false; 139 return __this_address;
131 } 140 }
132 if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 141 if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
133 stale++; 142 stale++;
134 } 143 }
135 if (hdr->stale != stale) 144 if (hdr->stale != stale)
136 return false; 145 return __this_address;
137 return true; 146 return NULL;
138} 147}
139 148
140/* 149/*
@@ -142,7 +151,7 @@ xfs_dir3_leaf_check_int(
142 * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due 151 * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
143 * to incorrect magic numbers. 152 * to incorrect magic numbers.
144 */ 153 */
145static bool 154static xfs_failaddr_t
146xfs_dir3_leaf_verify( 155xfs_dir3_leaf_verify(
147 struct xfs_buf *bp, 156 struct xfs_buf *bp,
148 uint16_t magic) 157 uint16_t magic)
@@ -160,16 +169,16 @@ xfs_dir3_leaf_verify(
160 : XFS_DIR3_LEAFN_MAGIC; 169 : XFS_DIR3_LEAFN_MAGIC;
161 170
162 if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) 171 if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
163 return false; 172 return __this_address;
164 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) 173 if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
165 return false; 174 return __this_address;
166 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 175 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
167 return false; 176 return __this_address;
168 if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) 177 if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
169 return false; 178 return __this_address;
170 } else { 179 } else {
171 if (leaf->hdr.info.magic != cpu_to_be16(magic)) 180 if (leaf->hdr.info.magic != cpu_to_be16(magic))
172 return false; 181 return __this_address;
173 } 182 }
174 183
175 return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); 184 return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
@@ -181,15 +190,16 @@ __read_verify(
181 uint16_t magic) 190 uint16_t magic)
182{ 191{
183 struct xfs_mount *mp = bp->b_target->bt_mount; 192 struct xfs_mount *mp = bp->b_target->bt_mount;
193 xfs_failaddr_t fa;
184 194
185 if (xfs_sb_version_hascrc(&mp->m_sb) && 195 if (xfs_sb_version_hascrc(&mp->m_sb) &&
186 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) 196 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
187 xfs_buf_ioerror(bp, -EFSBADCRC); 197 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
188 else if (!xfs_dir3_leaf_verify(bp, magic)) 198 else {
189 xfs_buf_ioerror(bp, -EFSCORRUPTED); 199 fa = xfs_dir3_leaf_verify(bp, magic);
190 200 if (fa)
191 if (bp->b_error) 201 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
192 xfs_verifier_error(bp); 202 }
193} 203}
194 204
195static void 205static void
@@ -198,12 +208,13 @@ __write_verify(
198 uint16_t magic) 208 uint16_t magic)
199{ 209{
200 struct xfs_mount *mp = bp->b_target->bt_mount; 210 struct xfs_mount *mp = bp->b_target->bt_mount;
201 struct xfs_buf_log_item *bip = bp->b_fspriv; 211 struct xfs_buf_log_item *bip = bp->b_log_item;
202 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 212 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
213 xfs_failaddr_t fa;
203 214
204 if (!xfs_dir3_leaf_verify(bp, magic)) { 215 fa = xfs_dir3_leaf_verify(bp, magic);
205 xfs_buf_ioerror(bp, -EFSCORRUPTED); 216 if (fa) {
206 xfs_verifier_error(bp); 217 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
207 return; 218 return;
208 } 219 }
209 220
@@ -216,6 +227,13 @@ __write_verify(
216 xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); 227 xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
217} 228}
218 229
230static xfs_failaddr_t
231xfs_dir3_leaf1_verify(
232 struct xfs_buf *bp)
233{
234 return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC);
235}
236
219static void 237static void
220xfs_dir3_leaf1_read_verify( 238xfs_dir3_leaf1_read_verify(
221 struct xfs_buf *bp) 239 struct xfs_buf *bp)
@@ -230,6 +248,13 @@ xfs_dir3_leaf1_write_verify(
230 __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); 248 __write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
231} 249}
232 250
251static xfs_failaddr_t
252xfs_dir3_leafn_verify(
253 struct xfs_buf *bp)
254{
255 return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC);
256}
257
233static void 258static void
234xfs_dir3_leafn_read_verify( 259xfs_dir3_leafn_read_verify(
235 struct xfs_buf *bp) 260 struct xfs_buf *bp)
@@ -248,12 +273,14 @@ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
248 .name = "xfs_dir3_leaf1", 273 .name = "xfs_dir3_leaf1",
249 .verify_read = xfs_dir3_leaf1_read_verify, 274 .verify_read = xfs_dir3_leaf1_read_verify,
250 .verify_write = xfs_dir3_leaf1_write_verify, 275 .verify_write = xfs_dir3_leaf1_write_verify,
276 .verify_struct = xfs_dir3_leaf1_verify,
251}; 277};
252 278
253const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { 279const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
254 .name = "xfs_dir3_leafn", 280 .name = "xfs_dir3_leafn",
255 .verify_read = xfs_dir3_leafn_read_verify, 281 .verify_read = xfs_dir3_leafn_read_verify,
256 .verify_write = xfs_dir3_leafn_write_verify, 282 .verify_write = xfs_dir3_leafn_write_verify,
283 .verify_struct = xfs_dir3_leafn_verify,
257}; 284};
258 285
259int 286int
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 682e2bf370c7..239d97a64296 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -53,13 +53,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
53 * Check internal consistency of a leafn block. 53 * Check internal consistency of a leafn block.
54 */ 54 */
55#ifdef DEBUG 55#ifdef DEBUG
56#define xfs_dir3_leaf_check(dp, bp) \ 56static xfs_failaddr_t
57do { \
58 if (!xfs_dir3_leafn_check((dp), (bp))) \
59 ASSERT(0); \
60} while (0);
61
62static bool
63xfs_dir3_leafn_check( 57xfs_dir3_leafn_check(
64 struct xfs_inode *dp, 58 struct xfs_inode *dp,
65 struct xfs_buf *bp) 59 struct xfs_buf *bp)
@@ -72,17 +66,32 @@ xfs_dir3_leafn_check(
72 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { 66 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
73 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; 67 struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
74 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 68 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
75 return false; 69 return __this_address;
76 } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) 70 } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
77 return false; 71 return __this_address;
78 72
79 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); 73 return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
80} 74}
75
76static inline void
77xfs_dir3_leaf_check(
78 struct xfs_inode *dp,
79 struct xfs_buf *bp)
80{
81 xfs_failaddr_t fa;
82
83 fa = xfs_dir3_leafn_check(dp, bp);
84 if (!fa)
85 return;
86 xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
87 bp->b_addr, __FILE__, __LINE__, fa);
88 ASSERT(0);
89}
81#else 90#else
82#define xfs_dir3_leaf_check(dp, bp) 91#define xfs_dir3_leaf_check(dp, bp)
83#endif 92#endif
84 93
85static bool 94static xfs_failaddr_t
86xfs_dir3_free_verify( 95xfs_dir3_free_verify(
87 struct xfs_buf *bp) 96 struct xfs_buf *bp)
88{ 97{
@@ -93,21 +102,21 @@ xfs_dir3_free_verify(
93 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 102 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
94 103
95 if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) 104 if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
96 return false; 105 return __this_address;
97 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 106 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
98 return false; 107 return __this_address;
99 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 108 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
100 return false; 109 return __this_address;
101 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 110 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
102 return false; 111 return __this_address;
103 } else { 112 } else {
104 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) 113 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
105 return false; 114 return __this_address;
106 } 115 }
107 116
108 /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ 117 /* XXX: should bounds check the xfs_dir3_icfree_hdr here */
109 118
110 return true; 119 return NULL;
111} 120}
112 121
113static void 122static void
@@ -115,15 +124,16 @@ xfs_dir3_free_read_verify(
115 struct xfs_buf *bp) 124 struct xfs_buf *bp)
116{ 125{
117 struct xfs_mount *mp = bp->b_target->bt_mount; 126 struct xfs_mount *mp = bp->b_target->bt_mount;
127 xfs_failaddr_t fa;
118 128
119 if (xfs_sb_version_hascrc(&mp->m_sb) && 129 if (xfs_sb_version_hascrc(&mp->m_sb) &&
120 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) 130 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
121 xfs_buf_ioerror(bp, -EFSBADCRC); 131 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
122 else if (!xfs_dir3_free_verify(bp)) 132 else {
123 xfs_buf_ioerror(bp, -EFSCORRUPTED); 133 fa = xfs_dir3_free_verify(bp);
124 134 if (fa)
125 if (bp->b_error) 135 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
126 xfs_verifier_error(bp); 136 }
127} 137}
128 138
129static void 139static void
@@ -131,12 +141,13 @@ xfs_dir3_free_write_verify(
131 struct xfs_buf *bp) 141 struct xfs_buf *bp)
132{ 142{
133 struct xfs_mount *mp = bp->b_target->bt_mount; 143 struct xfs_mount *mp = bp->b_target->bt_mount;
134 struct xfs_buf_log_item *bip = bp->b_fspriv; 144 struct xfs_buf_log_item *bip = bp->b_log_item;
135 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 145 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
146 xfs_failaddr_t fa;
136 147
137 if (!xfs_dir3_free_verify(bp)) { 148 fa = xfs_dir3_free_verify(bp);
138 xfs_buf_ioerror(bp, -EFSCORRUPTED); 149 if (fa) {
139 xfs_verifier_error(bp); 150 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
140 return; 151 return;
141 } 152 }
142 153
@@ -153,10 +164,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
153 .name = "xfs_dir3_free", 164 .name = "xfs_dir3_free",
154 .verify_read = xfs_dir3_free_read_verify, 165 .verify_read = xfs_dir3_free_read_verify,
155 .verify_write = xfs_dir3_free_write_verify, 166 .verify_write = xfs_dir3_free_write_verify,
167 .verify_struct = xfs_dir3_free_verify,
156}; 168};
157 169
158/* Everything ok in the free block header? */ 170/* Everything ok in the free block header? */
159static bool 171static xfs_failaddr_t
160xfs_dir3_free_header_check( 172xfs_dir3_free_header_check(
161 struct xfs_inode *dp, 173 struct xfs_inode *dp,
162 xfs_dablk_t fbno, 174 xfs_dablk_t fbno,
@@ -174,22 +186,22 @@ xfs_dir3_free_header_check(
174 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; 186 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
175 187
176 if (be32_to_cpu(hdr3->firstdb) != firstdb) 188 if (be32_to_cpu(hdr3->firstdb) != firstdb)
177 return false; 189 return __this_address;
178 if (be32_to_cpu(hdr3->nvalid) > maxbests) 190 if (be32_to_cpu(hdr3->nvalid) > maxbests)
179 return false; 191 return __this_address;
180 if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) 192 if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
181 return false; 193 return __this_address;
182 } else { 194 } else {
183 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 195 struct xfs_dir2_free_hdr *hdr = bp->b_addr;
184 196
185 if (be32_to_cpu(hdr->firstdb) != firstdb) 197 if (be32_to_cpu(hdr->firstdb) != firstdb)
186 return false; 198 return __this_address;
187 if (be32_to_cpu(hdr->nvalid) > maxbests) 199 if (be32_to_cpu(hdr->nvalid) > maxbests)
188 return false; 200 return __this_address;
189 if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) 201 if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
190 return false; 202 return __this_address;
191 } 203 }
192 return true; 204 return NULL;
193} 205}
194 206
195static int 207static int
@@ -200,6 +212,7 @@ __xfs_dir3_free_read(
200 xfs_daddr_t mappedbno, 212 xfs_daddr_t mappedbno,
201 struct xfs_buf **bpp) 213 struct xfs_buf **bpp)
202{ 214{
215 xfs_failaddr_t fa;
203 int err; 216 int err;
204 217
205 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, 218 err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
@@ -208,9 +221,9 @@ __xfs_dir3_free_read(
208 return err; 221 return err;
209 222
210 /* Check things that we can't do in the verifier. */ 223 /* Check things that we can't do in the verifier. */
211 if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { 224 fa = xfs_dir3_free_header_check(dp, fbno, *bpp);
212 xfs_buf_ioerror(*bpp, -EFSCORRUPTED); 225 if (fa) {
213 xfs_verifier_error(*bpp); 226 xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
214 xfs_trans_brelse(tp, *bpp); 227 xfs_trans_brelse(tp, *bpp);
215 return -EFSCORRUPTED; 228 return -EFSCORRUPTED;
216 } 229 }
@@ -1906,7 +1919,7 @@ xfs_dir2_node_addname_int(
1906 (unsigned long long)ifbno, lastfbno); 1919 (unsigned long long)ifbno, lastfbno);
1907 if (fblk) { 1920 if (fblk) {
1908 xfs_alert(mp, 1921 xfs_alert(mp,
1909 " fblk 0x%p blkno %llu index %d magic 0x%x", 1922 " fblk "PTR_FMT" blkno %llu index %d magic 0x%x",
1910 fblk, 1923 fblk,
1911 (unsigned long long)fblk->blkno, 1924 (unsigned long long)fblk->blkno,
1912 fblk->index, 1925 fblk->index,
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 4badd26c47e6..753aeeeffc18 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -39,12 +39,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
39 39
40/* xfs_dir2_data.c */ 40/* xfs_dir2_data.c */
41#ifdef DEBUG 41#ifdef DEBUG
42#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); 42extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
43#else 43#else
44#define xfs_dir3_data_check(dp,bp) 44#define xfs_dir3_data_check(dp,bp)
45#endif 45#endif
46 46
47extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); 47extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp,
48 struct xfs_buf *bp);
48extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, 49extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
49 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); 50 xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
50extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, 51extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
@@ -89,8 +90,9 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
89 int lowstale, int highstale, int *lfloglow, int *lfloghigh); 90 int lowstale, int highstale, int *lfloglow, int *lfloghigh);
90extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); 91extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
91 92
92extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, 93extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp,
93 struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); 94 struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr,
95 struct xfs_dir2_leaf *leaf);
94 96
95/* xfs_dir2_node.c */ 97/* xfs_dir2_node.c */
96extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, 98extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
@@ -127,7 +129,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
127extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); 129extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
128extern int xfs_dir2_sf_removename(struct xfs_da_args *args); 130extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
129extern int xfs_dir2_sf_replace(struct xfs_da_args *args); 131extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
130extern int xfs_dir2_sf_verify(struct xfs_inode *ip); 132extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
131 133
132/* xfs_dir2_readdir.c */ 134/* xfs_dir2_readdir.c */
133extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, 135extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index be8b9755f66a..0c75a7f00883 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -156,7 +156,6 @@ xfs_dir2_block_to_sf(
156 xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ 156 xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */
157{ 157{
158 xfs_dir2_data_hdr_t *hdr; /* block header */ 158 xfs_dir2_data_hdr_t *hdr; /* block header */
159 xfs_dir2_block_tail_t *btp; /* block tail pointer */
160 xfs_dir2_data_entry_t *dep; /* data entry pointer */ 159 xfs_dir2_data_entry_t *dep; /* data entry pointer */
161 xfs_inode_t *dp; /* incore directory inode */ 160 xfs_inode_t *dp; /* incore directory inode */
162 xfs_dir2_data_unused_t *dup; /* unused data pointer */ 161 xfs_dir2_data_unused_t *dup; /* unused data pointer */
@@ -192,9 +191,8 @@ xfs_dir2_block_to_sf(
192 /* 191 /*
193 * Set up to loop over the block's entries. 192 * Set up to loop over the block's entries.
194 */ 193 */
195 btp = xfs_dir2_block_tail_p(args->geo, hdr);
196 ptr = (char *)dp->d_ops->data_entry_p(hdr); 194 ptr = (char *)dp->d_ops->data_entry_p(hdr);
197 endptr = (char *)xfs_dir2_block_leaf_p(btp); 195 endptr = xfs_dir3_data_endp(args->geo, hdr);
198 sfep = xfs_dir2_sf_firstentry(sfp); 196 sfep = xfs_dir2_sf_firstentry(sfp);
199 /* 197 /*
200 * Loop over the active and unused entries. 198 * Loop over the active and unused entries.
@@ -630,7 +628,7 @@ xfs_dir2_sf_check(
630#endif /* DEBUG */ 628#endif /* DEBUG */
631 629
632/* Verify the consistency of an inline directory. */ 630/* Verify the consistency of an inline directory. */
633int 631xfs_failaddr_t
634xfs_dir2_sf_verify( 632xfs_dir2_sf_verify(
635 struct xfs_inode *ip) 633 struct xfs_inode *ip)
636{ 634{
@@ -665,7 +663,7 @@ xfs_dir2_sf_verify(
665 */ 663 */
666 if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || 664 if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
667 size < xfs_dir2_sf_hdr_size(sfp->i8count)) 665 size < xfs_dir2_sf_hdr_size(sfp->i8count))
668 return -EFSCORRUPTED; 666 return __this_address;
669 667
670 endp = (char *)sfp + size; 668 endp = (char *)sfp + size;
671 669
@@ -674,7 +672,7 @@ xfs_dir2_sf_verify(
674 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 672 i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
675 error = xfs_dir_ino_validate(mp, ino); 673 error = xfs_dir_ino_validate(mp, ino);
676 if (error) 674 if (error)
677 return error; 675 return __this_address;
678 offset = dops->data_first_offset; 676 offset = dops->data_first_offset;
679 677
680 /* Check all reported entries */ 678 /* Check all reported entries */
@@ -686,11 +684,11 @@ xfs_dir2_sf_verify(
686 * within the data buffer. 684 * within the data buffer.
687 */ 685 */
688 if (((char *)sfep + sizeof(*sfep)) >= endp) 686 if (((char *)sfep + sizeof(*sfep)) >= endp)
689 return -EFSCORRUPTED; 687 return __this_address;
690 688
691 /* Don't allow names with known bad length. */ 689 /* Don't allow names with known bad length. */
692 if (sfep->namelen == 0) 690 if (sfep->namelen == 0)
693 return -EFSCORRUPTED; 691 return __this_address;
694 692
695 /* 693 /*
696 * Check that the variable-length part of the structure is 694 * Check that the variable-length part of the structure is
@@ -699,23 +697,23 @@ xfs_dir2_sf_verify(
699 */ 697 */
700 next_sfep = dops->sf_nextentry(sfp, sfep); 698 next_sfep = dops->sf_nextentry(sfp, sfep);
701 if (endp < (char *)next_sfep) 699 if (endp < (char *)next_sfep)
702 return -EFSCORRUPTED; 700 return __this_address;
703 701
704 /* Check that the offsets always increase. */ 702 /* Check that the offsets always increase. */
705 if (xfs_dir2_sf_get_offset(sfep) < offset) 703 if (xfs_dir2_sf_get_offset(sfep) < offset)
706 return -EFSCORRUPTED; 704 return __this_address;
707 705
708 /* Check the inode number. */ 706 /* Check the inode number. */
709 ino = dops->sf_get_ino(sfp, sfep); 707 ino = dops->sf_get_ino(sfp, sfep);
710 i8count += ino > XFS_DIR2_MAX_SHORT_INUM; 708 i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
711 error = xfs_dir_ino_validate(mp, ino); 709 error = xfs_dir_ino_validate(mp, ino);
712 if (error) 710 if (error)
713 return error; 711 return __this_address;
714 712
715 /* Check the file type. */ 713 /* Check the file type. */
716 filetype = dops->sf_get_ftype(sfep); 714 filetype = dops->sf_get_ftype(sfep);
717 if (filetype >= XFS_DIR3_FT_MAX) 715 if (filetype >= XFS_DIR3_FT_MAX)
718 return -EFSCORRUPTED; 716 return __this_address;
719 717
720 offset = xfs_dir2_sf_get_offset(sfep) + 718 offset = xfs_dir2_sf_get_offset(sfep) +
721 dops->data_entsize(sfep->namelen); 719 dops->data_entsize(sfep->namelen);
@@ -723,16 +721,16 @@ xfs_dir2_sf_verify(
723 sfep = next_sfep; 721 sfep = next_sfep;
724 } 722 }
725 if (i8count != sfp->i8count) 723 if (i8count != sfp->i8count)
726 return -EFSCORRUPTED; 724 return __this_address;
727 if ((void *)sfep != (void *)endp) 725 if ((void *)sfep != (void *)endp)
728 return -EFSCORRUPTED; 726 return __this_address;
729 727
730 /* Make sure this whole thing ought to be in local format. */ 728 /* Make sure this whole thing ought to be in local format. */
731 if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + 729 if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
732 (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) 730 (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
733 return -EFSCORRUPTED; 731 return __this_address;
734 732
735 return 0; 733 return NULL;
736} 734}
737 735
738/* 736/*
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 747085b4ef44..8b7a6c3cb599 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -42,18 +42,14 @@ xfs_calc_dquots_per_chunk(
42/* 42/*
43 * Do some primitive error checking on ondisk dquot data structures. 43 * Do some primitive error checking on ondisk dquot data structures.
44 */ 44 */
45int 45xfs_failaddr_t
46xfs_dqcheck( 46xfs_dquot_verify(
47 struct xfs_mount *mp, 47 struct xfs_mount *mp,
48 xfs_disk_dquot_t *ddq, 48 xfs_disk_dquot_t *ddq,
49 xfs_dqid_t id, 49 xfs_dqid_t id,
50 uint type, /* used only when IO_dorepair is true */ 50 uint type, /* used only when IO_dorepair is true */
51 uint flags, 51 uint flags)
52 const char *str)
53{ 52{
54 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
55 int errs = 0;
56
57 /* 53 /*
58 * We can encounter an uninitialized dquot buffer for 2 reasons: 54 * We can encounter an uninitialized dquot buffer for 2 reasons:
59 * 1. If we crash while deleting the quotainode(s), and those blks got 55 * 1. If we crash while deleting the quotainode(s), and those blks got
@@ -69,87 +65,57 @@ xfs_dqcheck(
69 * This is all fine; things are still consistent, and we haven't lost 65 * This is all fine; things are still consistent, and we haven't lost
70 * any quota information. Just don't complain about bad dquot blks. 66 * any quota information. Just don't complain about bad dquot blks.
71 */ 67 */
72 if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { 68 if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC))
73 if (flags & XFS_QMOPT_DOWARN) 69 return __this_address;
74 xfs_alert(mp, 70 if (ddq->d_version != XFS_DQUOT_VERSION)
75 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 71 return __this_address;
76 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
77 errs++;
78 }
79 if (ddq->d_version != XFS_DQUOT_VERSION) {
80 if (flags & XFS_QMOPT_DOWARN)
81 xfs_alert(mp,
82 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
83 str, id, ddq->d_version, XFS_DQUOT_VERSION);
84 errs++;
85 }
86 72
87 if (ddq->d_flags != XFS_DQ_USER && 73 if (ddq->d_flags != XFS_DQ_USER &&
88 ddq->d_flags != XFS_DQ_PROJ && 74 ddq->d_flags != XFS_DQ_PROJ &&
89 ddq->d_flags != XFS_DQ_GROUP) { 75 ddq->d_flags != XFS_DQ_GROUP)
90 if (flags & XFS_QMOPT_DOWARN) 76 return __this_address;
91 xfs_alert(mp,
92 "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
93 str, id, ddq->d_flags);
94 errs++;
95 }
96 77
97 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 78 if (id != -1 && id != be32_to_cpu(ddq->d_id))
98 if (flags & XFS_QMOPT_DOWARN) 79 return __this_address;
99 xfs_alert(mp,
100 "%s : ondisk-dquot 0x%p, ID mismatch: "
101 "0x%x expected, found id 0x%x",
102 str, ddq, id, be32_to_cpu(ddq->d_id));
103 errs++;
104 }
105 80
106 if (!errs && ddq->d_id) { 81 if (!ddq->d_id)
107 if (ddq->d_blk_softlimit && 82 return NULL;
108 be64_to_cpu(ddq->d_bcount) > 83
109 be64_to_cpu(ddq->d_blk_softlimit)) { 84 if (ddq->d_blk_softlimit &&
110 if (!ddq->d_btimer) { 85 be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) &&
111 if (flags & XFS_QMOPT_DOWARN) 86 !ddq->d_btimer)
112 xfs_alert(mp, 87 return __this_address;
113 "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", 88
114 str, (int)be32_to_cpu(ddq->d_id), ddq); 89 if (ddq->d_ino_softlimit &&
115 errs++; 90 be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) &&
116 } 91 !ddq->d_itimer)
117 } 92 return __this_address;
118 if (ddq->d_ino_softlimit &&
119 be64_to_cpu(ddq->d_icount) >
120 be64_to_cpu(ddq->d_ino_softlimit)) {
121 if (!ddq->d_itimer) {
122 if (flags & XFS_QMOPT_DOWARN)
123 xfs_alert(mp,
124 "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
125 str, (int)be32_to_cpu(ddq->d_id), ddq);
126 errs++;
127 }
128 }
129 if (ddq->d_rtb_softlimit &&
130 be64_to_cpu(ddq->d_rtbcount) >
131 be64_to_cpu(ddq->d_rtb_softlimit)) {
132 if (!ddq->d_rtbtimer) {
133 if (flags & XFS_QMOPT_DOWARN)
134 xfs_alert(mp,
135 "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
136 str, (int)be32_to_cpu(ddq->d_id), ddq);
137 errs++;
138 }
139 }
140 }
141 93
142 if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) 94 if (ddq->d_rtb_softlimit &&
143 return errs; 95 be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) &&
96 !ddq->d_rtbtimer)
97 return __this_address;
98
99 return NULL;
100}
101
102/*
103 * Do some primitive error checking on ondisk dquot data structures.
104 */
105int
106xfs_dquot_repair(
107 struct xfs_mount *mp,
108 struct xfs_disk_dquot *ddq,
109 xfs_dqid_t id,
110 uint type)
111{
112 struct xfs_dqblk *d = (struct xfs_dqblk *)ddq;
144 113
145 if (flags & XFS_QMOPT_DOWARN)
146 xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
147 114
148 /* 115 /*
149 * Typically, a repair is only requested by quotacheck. 116 * Typically, a repair is only requested by quotacheck.
150 */ 117 */
151 ASSERT(id != -1); 118 ASSERT(id != -1);
152 ASSERT(flags & XFS_QMOPT_DQREPAIR);
153 memset(d, 0, sizeof(xfs_dqblk_t)); 119 memset(d, 0, sizeof(xfs_dqblk_t));
154 120
155 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 121 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -163,7 +129,7 @@ xfs_dqcheck(
163 XFS_DQUOT_CRC_OFF); 129 XFS_DQUOT_CRC_OFF);
164 } 130 }
165 131
166 return errs; 132 return 0;
167} 133}
168 134
169STATIC bool 135STATIC bool
@@ -198,13 +164,13 @@ xfs_dquot_buf_verify_crc(
198 return true; 164 return true;
199} 165}
200 166
201STATIC bool 167STATIC xfs_failaddr_t
202xfs_dquot_buf_verify( 168xfs_dquot_buf_verify(
203 struct xfs_mount *mp, 169 struct xfs_mount *mp,
204 struct xfs_buf *bp, 170 struct xfs_buf *bp)
205 int warn)
206{ 171{
207 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; 172 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
173 xfs_failaddr_t fa;
208 xfs_dqid_t id = 0; 174 xfs_dqid_t id = 0;
209 int ndquots; 175 int ndquots;
210 int i; 176 int i;
@@ -228,33 +194,43 @@ xfs_dquot_buf_verify(
228 */ 194 */
229 for (i = 0; i < ndquots; i++) { 195 for (i = 0; i < ndquots; i++) {
230 struct xfs_disk_dquot *ddq; 196 struct xfs_disk_dquot *ddq;
231 int error;
232 197
233 ddq = &d[i].dd_diskdq; 198 ddq = &d[i].dd_diskdq;
234 199
235 if (i == 0) 200 if (i == 0)
236 id = be32_to_cpu(ddq->d_id); 201 id = be32_to_cpu(ddq->d_id);
237 202
238 error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); 203 fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0);
239 if (error) 204 if (fa)
240 return false; 205 return fa;
241 } 206 }
242 return true; 207
208 return NULL;
209}
210
211static xfs_failaddr_t
212xfs_dquot_buf_verify_struct(
213 struct xfs_buf *bp)
214{
215 struct xfs_mount *mp = bp->b_target->bt_mount;
216
217 return xfs_dquot_buf_verify(mp, bp);
243} 218}
244 219
245static void 220static void
246xfs_dquot_buf_read_verify( 221xfs_dquot_buf_read_verify(
247 struct xfs_buf *bp) 222 struct xfs_buf *bp)
248{ 223{
249 struct xfs_mount *mp = bp->b_target->bt_mount; 224 struct xfs_mount *mp = bp->b_target->bt_mount;
225 xfs_failaddr_t fa;
250 226
251 if (!xfs_dquot_buf_verify_crc(mp, bp)) 227 if (!xfs_dquot_buf_verify_crc(mp, bp))
252 xfs_buf_ioerror(bp, -EFSBADCRC); 228 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
253 else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) 229 else {
254 xfs_buf_ioerror(bp, -EFSCORRUPTED); 230 fa = xfs_dquot_buf_verify(mp, bp);
255 231 if (fa)
256 if (bp->b_error) 232 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
257 xfs_verifier_error(bp); 233 }
258} 234}
259 235
260/* 236/*
@@ -270,7 +246,7 @@ xfs_dquot_buf_readahead_verify(
270 struct xfs_mount *mp = bp->b_target->bt_mount; 246 struct xfs_mount *mp = bp->b_target->bt_mount;
271 247
272 if (!xfs_dquot_buf_verify_crc(mp, bp) || 248 if (!xfs_dquot_buf_verify_crc(mp, bp) ||
273 !xfs_dquot_buf_verify(mp, bp, 0)) { 249 xfs_dquot_buf_verify(mp, bp) != NULL) {
274 xfs_buf_ioerror(bp, -EIO); 250 xfs_buf_ioerror(bp, -EIO);
275 bp->b_flags &= ~XBF_DONE; 251 bp->b_flags &= ~XBF_DONE;
276 } 252 }
@@ -283,21 +259,21 @@ xfs_dquot_buf_readahead_verify(
283 */ 259 */
284static void 260static void
285xfs_dquot_buf_write_verify( 261xfs_dquot_buf_write_verify(
286 struct xfs_buf *bp) 262 struct xfs_buf *bp)
287{ 263{
288 struct xfs_mount *mp = bp->b_target->bt_mount; 264 struct xfs_mount *mp = bp->b_target->bt_mount;
265 xfs_failaddr_t fa;
289 266
290 if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { 267 fa = xfs_dquot_buf_verify(mp, bp);
291 xfs_buf_ioerror(bp, -EFSCORRUPTED); 268 if (fa)
292 xfs_verifier_error(bp); 269 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
293 return;
294 }
295} 270}
296 271
297const struct xfs_buf_ops xfs_dquot_buf_ops = { 272const struct xfs_buf_ops xfs_dquot_buf_ops = {
298 .name = "xfs_dquot", 273 .name = "xfs_dquot",
299 .verify_read = xfs_dquot_buf_read_verify, 274 .verify_read = xfs_dquot_buf_read_verify,
300 .verify_write = xfs_dquot_buf_write_verify, 275 .verify_write = xfs_dquot_buf_write_verify,
276 .verify_struct = xfs_dquot_buf_verify_struct,
301}; 277};
302 278
303const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { 279const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b90924104596..faf1a4edd618 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -233,6 +233,13 @@ typedef struct xfs_fsop_resblks {
233#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) 233#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL)
234#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) 234#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL)
235 235
236/*
237 * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than
238 * 16MB or larger than 1TB.
239 */
240#define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */
241#define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */
242
236/* keep the maximum size under 2^31 by a small amount */ 243/* keep the maximum size under 2^31 by a small amount */
237#define XFS_MAX_LOG_BYTES \ 244#define XFS_MAX_LOG_BYTES \
238 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) 245 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 3b57ef0f2f76..0e2cf5f0be1f 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2491,7 +2491,7 @@ xfs_check_agi_unlinked(
2491#define xfs_check_agi_unlinked(agi) 2491#define xfs_check_agi_unlinked(agi)
2492#endif 2492#endif
2493 2493
2494static bool 2494static xfs_failaddr_t
2495xfs_agi_verify( 2495xfs_agi_verify(
2496 struct xfs_buf *bp) 2496 struct xfs_buf *bp)
2497{ 2497{
@@ -2500,28 +2500,28 @@ xfs_agi_verify(
2500 2500
2501 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2501 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2502 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) 2502 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
2503 return false; 2503 return __this_address;
2504 if (!xfs_log_check_lsn(mp, 2504 if (!xfs_log_check_lsn(mp,
2505 be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) 2505 be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
2506 return false; 2506 return __this_address;
2507 } 2507 }
2508 2508
2509 /* 2509 /*
2510 * Validate the magic number of the agi block. 2510 * Validate the magic number of the agi block.
2511 */ 2511 */
2512 if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) 2512 if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
2513 return false; 2513 return __this_address;
2514 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 2514 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
2515 return false; 2515 return __this_address;
2516 2516
2517 if (be32_to_cpu(agi->agi_level) < 1 || 2517 if (be32_to_cpu(agi->agi_level) < 1 ||
2518 be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) 2518 be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
2519 return false; 2519 return __this_address;
2520 2520
2521 if (xfs_sb_version_hasfinobt(&mp->m_sb) && 2521 if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
2522 (be32_to_cpu(agi->agi_free_level) < 1 || 2522 (be32_to_cpu(agi->agi_free_level) < 1 ||
2523 be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) 2523 be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS))
2524 return false; 2524 return __this_address;
2525 2525
2526 /* 2526 /*
2527 * during growfs operations, the perag is not fully initialised, 2527 * during growfs operations, the perag is not fully initialised,
@@ -2530,10 +2530,10 @@ xfs_agi_verify(
2530 * so we can detect and avoid this problem. 2530 * so we can detect and avoid this problem.
2531 */ 2531 */
2532 if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) 2532 if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
2533 return false; 2533 return __this_address;
2534 2534
2535 xfs_check_agi_unlinked(agi); 2535 xfs_check_agi_unlinked(agi);
2536 return true; 2536 return NULL;
2537} 2537}
2538 2538
2539static void 2539static void
@@ -2541,28 +2541,29 @@ xfs_agi_read_verify(
2541 struct xfs_buf *bp) 2541 struct xfs_buf *bp)
2542{ 2542{
2543 struct xfs_mount *mp = bp->b_target->bt_mount; 2543 struct xfs_mount *mp = bp->b_target->bt_mount;
2544 xfs_failaddr_t fa;
2544 2545
2545 if (xfs_sb_version_hascrc(&mp->m_sb) && 2546 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2546 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) 2547 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
2547 xfs_buf_ioerror(bp, -EFSBADCRC); 2548 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2548 else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, 2549 else {
2549 XFS_ERRTAG_IALLOC_READ_AGI)) 2550 fa = xfs_agi_verify(bp);
2550 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2551 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
2551 2552 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2552 if (bp->b_error) 2553 }
2553 xfs_verifier_error(bp);
2554} 2554}
2555 2555
2556static void 2556static void
2557xfs_agi_write_verify( 2557xfs_agi_write_verify(
2558 struct xfs_buf *bp) 2558 struct xfs_buf *bp)
2559{ 2559{
2560 struct xfs_mount *mp = bp->b_target->bt_mount; 2560 struct xfs_mount *mp = bp->b_target->bt_mount;
2561 struct xfs_buf_log_item *bip = bp->b_fspriv; 2561 struct xfs_buf_log_item *bip = bp->b_log_item;
2562 xfs_failaddr_t fa;
2562 2563
2563 if (!xfs_agi_verify(bp)) { 2564 fa = xfs_agi_verify(bp);
2564 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2565 if (fa) {
2565 xfs_verifier_error(bp); 2566 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2566 return; 2567 return;
2567 } 2568 }
2568 2569
@@ -2578,6 +2579,7 @@ const struct xfs_buf_ops xfs_agi_buf_ops = {
2578 .name = "xfs_agi", 2579 .name = "xfs_agi",
2579 .verify_read = xfs_agi_read_verify, 2580 .verify_read = xfs_agi_read_verify,
2580 .verify_write = xfs_agi_write_verify, 2581 .verify_write = xfs_agi_write_verify,
2582 .verify_struct = xfs_agi_verify,
2581}; 2583};
2582 2584
2583/* 2585/*
@@ -2751,3 +2753,102 @@ xfs_verify_dir_ino(
2751 return false; 2753 return false;
2752 return xfs_verify_ino(mp, ino); 2754 return xfs_verify_ino(mp, ino);
2753} 2755}
2756
2757/* Is there an inode record covering a given range of inode numbers? */
2758int
2759xfs_ialloc_has_inode_record(
2760 struct xfs_btree_cur *cur,
2761 xfs_agino_t low,
2762 xfs_agino_t high,
2763 bool *exists)
2764{
2765 struct xfs_inobt_rec_incore irec;
2766 xfs_agino_t agino;
2767 uint16_t holemask;
2768 int has_record;
2769 int i;
2770 int error;
2771
2772 *exists = false;
2773 error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
2774 while (error == 0 && has_record) {
2775 error = xfs_inobt_get_rec(cur, &irec, &has_record);
2776 if (error || irec.ir_startino > high)
2777 break;
2778
2779 agino = irec.ir_startino;
2780 holemask = irec.ir_holemask;
2781 for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
2782 i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
2783 if (holemask & 1)
2784 continue;
2785 if (agino + XFS_INODES_PER_HOLEMASK_BIT > low &&
2786 agino <= high) {
2787 *exists = true;
2788 return 0;
2789 }
2790 }
2791
2792 error = xfs_btree_increment(cur, 0, &has_record);
2793 }
2794 return error;
2795}
2796
2797/* Is there an inode record covering a given extent? */
2798int
2799xfs_ialloc_has_inodes_at_extent(
2800 struct xfs_btree_cur *cur,
2801 xfs_agblock_t bno,
2802 xfs_extlen_t len,
2803 bool *exists)
2804{
2805 xfs_agino_t low;
2806 xfs_agino_t high;
2807
2808 low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0);
2809 high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1;
2810
2811 return xfs_ialloc_has_inode_record(cur, low, high, exists);
2812}
2813
2814struct xfs_ialloc_count_inodes {
2815 xfs_agino_t count;
2816 xfs_agino_t freecount;
2817};
2818
2819/* Record inode counts across all inobt records. */
2820STATIC int
2821xfs_ialloc_count_inodes_rec(
2822 struct xfs_btree_cur *cur,
2823 union xfs_btree_rec *rec,
2824 void *priv)
2825{
2826 struct xfs_inobt_rec_incore irec;
2827 struct xfs_ialloc_count_inodes *ci = priv;
2828
2829 xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
2830 ci->count += irec.ir_count;
2831 ci->freecount += irec.ir_freecount;
2832
2833 return 0;
2834}
2835
2836/* Count allocated and free inodes under an inobt. */
2837int
2838xfs_ialloc_count_inodes(
2839 struct xfs_btree_cur *cur,
2840 xfs_agino_t *count,
2841 xfs_agino_t *freecount)
2842{
2843 struct xfs_ialloc_count_inodes ci = {0};
2844 int error;
2845
2846 ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
2847 error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
2848 if (error)
2849 return error;
2850
2851 *count = ci.count;
2852 *freecount = ci.freecount;
2853 return 0;
2854}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 66a8de0b1caa..c5402bb4ce0c 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -170,6 +170,12 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
170union xfs_btree_rec; 170union xfs_btree_rec;
171void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, 171void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
172 struct xfs_inobt_rec_incore *irec); 172 struct xfs_inobt_rec_incore *irec);
173int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur,
174 xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
175int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low,
176 xfs_agino_t high, bool *exists);
177int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count,
178 xfs_agino_t *freecount);
173 179
174int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 180int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
175void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, 181void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 317caba9faa6..af197a5f3a82 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -141,21 +141,42 @@ xfs_finobt_alloc_block(
141 union xfs_btree_ptr *new, 141 union xfs_btree_ptr *new,
142 int *stat) 142 int *stat)
143{ 143{
144 if (cur->bc_mp->m_inotbt_nores)
145 return xfs_inobt_alloc_block(cur, start, new, stat);
144 return __xfs_inobt_alloc_block(cur, start, new, stat, 146 return __xfs_inobt_alloc_block(cur, start, new, stat,
145 XFS_AG_RESV_METADATA); 147 XFS_AG_RESV_METADATA);
146} 148}
147 149
148STATIC int 150STATIC int
149xfs_inobt_free_block( 151__xfs_inobt_free_block(
150 struct xfs_btree_cur *cur, 152 struct xfs_btree_cur *cur,
151 struct xfs_buf *bp) 153 struct xfs_buf *bp,
154 enum xfs_ag_resv_type resv)
152{ 155{
153 struct xfs_owner_info oinfo; 156 struct xfs_owner_info oinfo;
154 157
155 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 158 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
156 return xfs_free_extent(cur->bc_tp, 159 return xfs_free_extent(cur->bc_tp,
157 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 160 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
158 &oinfo, XFS_AG_RESV_NONE); 161 &oinfo, resv);
162}
163
164STATIC int
165xfs_inobt_free_block(
166 struct xfs_btree_cur *cur,
167 struct xfs_buf *bp)
168{
169 return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE);
170}
171
172STATIC int
173xfs_finobt_free_block(
174 struct xfs_btree_cur *cur,
175 struct xfs_buf *bp)
176{
177 if (cur->bc_mp->m_inotbt_nores)
178 return xfs_inobt_free_block(cur, bp);
179 return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA);
159} 180}
160 181
161STATIC int 182STATIC int
@@ -250,12 +271,13 @@ xfs_inobt_diff_two_keys(
250 be32_to_cpu(k2->inobt.ir_startino); 271 be32_to_cpu(k2->inobt.ir_startino);
251} 272}
252 273
253static int 274static xfs_failaddr_t
254xfs_inobt_verify( 275xfs_inobt_verify(
255 struct xfs_buf *bp) 276 struct xfs_buf *bp)
256{ 277{
257 struct xfs_mount *mp = bp->b_target->bt_mount; 278 struct xfs_mount *mp = bp->b_target->bt_mount;
258 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 279 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
280 xfs_failaddr_t fa;
259 unsigned int level; 281 unsigned int level;
260 282
261 /* 283 /*
@@ -271,20 +293,21 @@ xfs_inobt_verify(
271 switch (block->bb_magic) { 293 switch (block->bb_magic) {
272 case cpu_to_be32(XFS_IBT_CRC_MAGIC): 294 case cpu_to_be32(XFS_IBT_CRC_MAGIC):
273 case cpu_to_be32(XFS_FIBT_CRC_MAGIC): 295 case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
274 if (!xfs_btree_sblock_v5hdr_verify(bp)) 296 fa = xfs_btree_sblock_v5hdr_verify(bp);
275 return false; 297 if (fa)
298 return fa;
276 /* fall through */ 299 /* fall through */
277 case cpu_to_be32(XFS_IBT_MAGIC): 300 case cpu_to_be32(XFS_IBT_MAGIC):
278 case cpu_to_be32(XFS_FIBT_MAGIC): 301 case cpu_to_be32(XFS_FIBT_MAGIC):
279 break; 302 break;
280 default: 303 default:
281 return 0; 304 return NULL;
282 } 305 }
283 306
284 /* level verification */ 307 /* level verification */
285 level = be16_to_cpu(block->bb_level); 308 level = be16_to_cpu(block->bb_level);
286 if (level >= mp->m_in_maxlevels) 309 if (level >= mp->m_in_maxlevels)
287 return false; 310 return __this_address;
288 311
289 return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); 312 return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
290} 313}
@@ -293,25 +316,30 @@ static void
293xfs_inobt_read_verify( 316xfs_inobt_read_verify(
294 struct xfs_buf *bp) 317 struct xfs_buf *bp)
295{ 318{
319 xfs_failaddr_t fa;
320
296 if (!xfs_btree_sblock_verify_crc(bp)) 321 if (!xfs_btree_sblock_verify_crc(bp))
297 xfs_buf_ioerror(bp, -EFSBADCRC); 322 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
298 else if (!xfs_inobt_verify(bp)) 323 else {
299 xfs_buf_ioerror(bp, -EFSCORRUPTED); 324 fa = xfs_inobt_verify(bp);
325 if (fa)
326 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
327 }
300 328
301 if (bp->b_error) { 329 if (bp->b_error)
302 trace_xfs_btree_corrupt(bp, _RET_IP_); 330 trace_xfs_btree_corrupt(bp, _RET_IP_);
303 xfs_verifier_error(bp);
304 }
305} 331}
306 332
307static void 333static void
308xfs_inobt_write_verify( 334xfs_inobt_write_verify(
309 struct xfs_buf *bp) 335 struct xfs_buf *bp)
310{ 336{
311 if (!xfs_inobt_verify(bp)) { 337 xfs_failaddr_t fa;
338
339 fa = xfs_inobt_verify(bp);
340 if (fa) {
312 trace_xfs_btree_corrupt(bp, _RET_IP_); 341 trace_xfs_btree_corrupt(bp, _RET_IP_);
313 xfs_buf_ioerror(bp, -EFSCORRUPTED); 342 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
314 xfs_verifier_error(bp);
315 return; 343 return;
316 } 344 }
317 xfs_btree_sblock_calc_crc(bp); 345 xfs_btree_sblock_calc_crc(bp);
@@ -322,6 +350,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
322 .name = "xfs_inobt", 350 .name = "xfs_inobt",
323 .verify_read = xfs_inobt_read_verify, 351 .verify_read = xfs_inobt_read_verify,
324 .verify_write = xfs_inobt_write_verify, 352 .verify_write = xfs_inobt_write_verify,
353 .verify_struct = xfs_inobt_verify,
325}; 354};
326 355
327STATIC int 356STATIC int
@@ -372,7 +401,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
372 .dup_cursor = xfs_inobt_dup_cursor, 401 .dup_cursor = xfs_inobt_dup_cursor,
373 .set_root = xfs_finobt_set_root, 402 .set_root = xfs_finobt_set_root,
374 .alloc_block = xfs_finobt_alloc_block, 403 .alloc_block = xfs_finobt_alloc_block,
375 .free_block = xfs_inobt_free_block, 404 .free_block = xfs_finobt_free_block,
376 .get_minrecs = xfs_inobt_get_minrecs, 405 .get_minrecs = xfs_inobt_get_minrecs,
377 .get_maxrecs = xfs_inobt_get_maxrecs, 406 .get_maxrecs = xfs_inobt_get_maxrecs,
378 .init_key_from_rec = xfs_inobt_init_key_from_rec, 407 .init_key_from_rec = xfs_inobt_init_key_from_rec,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index b9c0bf80669c..4fe17b368316 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -115,8 +115,7 @@ xfs_inode_buf_verify(
115 return; 115 return;
116 } 116 }
117 117
118 xfs_buf_ioerror(bp, -EFSCORRUPTED); 118 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
119 xfs_verifier_error(bp);
120#ifdef DEBUG 119#ifdef DEBUG
121 xfs_alert(mp, 120 xfs_alert(mp,
122 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 121 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -384,7 +383,7 @@ xfs_log_dinode_to_disk(
384 } 383 }
385} 384}
386 385
387bool 386xfs_failaddr_t
388xfs_dinode_verify( 387xfs_dinode_verify(
389 struct xfs_mount *mp, 388 struct xfs_mount *mp,
390 xfs_ino_t ino, 389 xfs_ino_t ino,
@@ -393,53 +392,122 @@ xfs_dinode_verify(
393 uint16_t mode; 392 uint16_t mode;
394 uint16_t flags; 393 uint16_t flags;
395 uint64_t flags2; 394 uint64_t flags2;
395 uint64_t di_size;
396 396
397 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 397 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
398 return false; 398 return __this_address;
399
400 /* Verify v3 integrity information first */
401 if (dip->di_version >= 3) {
402 if (!xfs_sb_version_hascrc(&mp->m_sb))
403 return __this_address;
404 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
405 XFS_DINODE_CRC_OFF))
406 return __this_address;
407 if (be64_to_cpu(dip->di_ino) != ino)
408 return __this_address;
409 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
410 return __this_address;
411 }
399 412
400 /* don't allow invalid i_size */ 413 /* don't allow invalid i_size */
401 if (be64_to_cpu(dip->di_size) & (1ULL << 63)) 414 di_size = be64_to_cpu(dip->di_size);
402 return false; 415 if (di_size & (1ULL << 63))
416 return __this_address;
403 417
404 mode = be16_to_cpu(dip->di_mode); 418 mode = be16_to_cpu(dip->di_mode);
405 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 419 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
406 return false; 420 return __this_address;
407 421
408 /* No zero-length symlinks/dirs. */ 422 /* No zero-length symlinks/dirs. */
409 if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) 423 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
410 return false; 424 return __this_address;
425
426 /* Fork checks carried over from xfs_iformat_fork */
427 if (mode &&
428 be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
429 be64_to_cpu(dip->di_nblocks))
430 return __this_address;
431
432 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
433 return __this_address;
434
435 flags = be16_to_cpu(dip->di_flags);
436
437 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
438 return __this_address;
439
440 /* Do we have appropriate data fork formats for the mode? */
441 switch (mode & S_IFMT) {
442 case S_IFIFO:
443 case S_IFCHR:
444 case S_IFBLK:
445 case S_IFSOCK:
446 if (dip->di_format != XFS_DINODE_FMT_DEV)
447 return __this_address;
448 break;
449 case S_IFREG:
450 case S_IFLNK:
451 case S_IFDIR:
452 switch (dip->di_format) {
453 case XFS_DINODE_FMT_LOCAL:
454 /*
455 * no local regular files yet
456 */
457 if (S_ISREG(mode))
458 return __this_address;
459 if (di_size > XFS_DFORK_DSIZE(dip, mp))
460 return __this_address;
461 /* fall through */
462 case XFS_DINODE_FMT_EXTENTS:
463 case XFS_DINODE_FMT_BTREE:
464 break;
465 default:
466 return __this_address;
467 }
468 break;
469 case 0:
470 /* Uninitialized inode ok. */
471 break;
472 default:
473 return __this_address;
474 }
475
476 if (XFS_DFORK_Q(dip)) {
477 switch (dip->di_aformat) {
478 case XFS_DINODE_FMT_LOCAL:
479 case XFS_DINODE_FMT_EXTENTS:
480 case XFS_DINODE_FMT_BTREE:
481 break;
482 default:
483 return __this_address;
484 }
485 }
411 486
412 /* only version 3 or greater inodes are extensively verified here */ 487 /* only version 3 or greater inodes are extensively verified here */
413 if (dip->di_version < 3) 488 if (dip->di_version < 3)
414 return true; 489 return NULL;
415
416 if (!xfs_sb_version_hascrc(&mp->m_sb))
417 return false;
418 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
419 XFS_DINODE_CRC_OFF))
420 return false;
421 if (be64_to_cpu(dip->di_ino) != ino)
422 return false;
423 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
424 return false;
425 490
426 flags = be16_to_cpu(dip->di_flags);
427 flags2 = be64_to_cpu(dip->di_flags2); 491 flags2 = be64_to_cpu(dip->di_flags2);
428 492
429 /* don't allow reflink/cowextsize if we don't have reflink */ 493 /* don't allow reflink/cowextsize if we don't have reflink */
430 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 494 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
431 !xfs_sb_version_hasreflink(&mp->m_sb)) 495 !xfs_sb_version_hasreflink(&mp->m_sb))
432 return false; 496 return __this_address;
497
498 /* only regular files get reflink */
499 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
500 return __this_address;
433 501
434 /* don't let reflink and realtime mix */ 502 /* don't let reflink and realtime mix */
435 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 503 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
436 return false; 504 return __this_address;
437 505
438 /* don't let reflink and dax mix */ 506 /* don't let reflink and dax mix */
439 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) 507 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
440 return false; 508 return __this_address;
441 509
442 return true; 510 return NULL;
443} 511}
444 512
445void 513void
@@ -479,6 +547,7 @@ xfs_iread(
479{ 547{
480 xfs_buf_t *bp; 548 xfs_buf_t *bp;
481 xfs_dinode_t *dip; 549 xfs_dinode_t *dip;
550 xfs_failaddr_t fa;
482 int error; 551 int error;
483 552
484 /* 553 /*
@@ -510,11 +579,10 @@ xfs_iread(
510 return error; 579 return error;
511 580
512 /* even unallocated inodes are verified */ 581 /* even unallocated inodes are verified */
513 if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { 582 fa = xfs_dinode_verify(mp, ip->i_ino, dip);
514 xfs_alert(mp, "%s: validation failed for inode %lld", 583 if (fa) {
515 __func__, ip->i_ino); 584 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
516 585 sizeof(*dip), fa);
517 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
518 error = -EFSCORRUPTED; 586 error = -EFSCORRUPTED;
519 goto out_brelse; 587 goto out_brelse;
520 } 588 }
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index a9c97a356c30..8a5e1da52d74 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -82,7 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
82#define xfs_inobp_check(mp, bp) 82#define xfs_inobp_check(mp, bp)
83#endif /* DEBUG */ 83#endif /* DEBUG */
84 84
85bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, 85xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
86 struct xfs_dinode *dip); 86 struct xfs_dinode *dip);
87 87
88#endif /* __XFS_INODE_BUF_H__ */ 88#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index c79a1616b79d..866d2861c625 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -35,6 +35,8 @@
35#include "xfs_da_format.h" 35#include "xfs_da_format.h"
36#include "xfs_da_btree.h" 36#include "xfs_da_btree.h"
37#include "xfs_dir2_priv.h" 37#include "xfs_dir2_priv.h"
38#include "xfs_attr_leaf.h"
39#include "xfs_shared.h"
38 40
39kmem_zone_t *xfs_ifork_zone; 41kmem_zone_t *xfs_ifork_zone;
40 42
@@ -62,69 +64,11 @@ xfs_iformat_fork(
62 int error = 0; 64 int error = 0;
63 xfs_fsize_t di_size; 65 xfs_fsize_t di_size;
64 66
65 if (unlikely(be32_to_cpu(dip->di_nextents) +
66 be16_to_cpu(dip->di_anextents) >
67 be64_to_cpu(dip->di_nblocks))) {
68 xfs_warn(ip->i_mount,
69 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
70 (unsigned long long)ip->i_ino,
71 (int)(be32_to_cpu(dip->di_nextents) +
72 be16_to_cpu(dip->di_anextents)),
73 (unsigned long long)
74 be64_to_cpu(dip->di_nblocks));
75 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
76 ip->i_mount, dip);
77 return -EFSCORRUPTED;
78 }
79
80 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
81 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
82 (unsigned long long)ip->i_ino,
83 dip->di_forkoff);
84 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
85 ip->i_mount, dip);
86 return -EFSCORRUPTED;
87 }
88
89 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
90 !ip->i_mount->m_rtdev_targp)) {
91 xfs_warn(ip->i_mount,
92 "corrupt dinode %Lu, has realtime flag set.",
93 ip->i_ino);
94 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
95 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
96 return -EFSCORRUPTED;
97 }
98
99 if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) {
100 xfs_warn(ip->i_mount,
101 "corrupt dinode %llu, wrong file type for reflink.",
102 ip->i_ino);
103 XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
104 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
105 return -EFSCORRUPTED;
106 }
107
108 if (unlikely(xfs_is_reflink_inode(ip) &&
109 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) {
110 xfs_warn(ip->i_mount,
111 "corrupt dinode %llu, has reflink+realtime flag set.",
112 ip->i_ino);
113 XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
114 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
115 return -EFSCORRUPTED;
116 }
117
118 switch (inode->i_mode & S_IFMT) { 67 switch (inode->i_mode & S_IFMT) {
119 case S_IFIFO: 68 case S_IFIFO:
120 case S_IFCHR: 69 case S_IFCHR:
121 case S_IFBLK: 70 case S_IFBLK:
122 case S_IFSOCK: 71 case S_IFSOCK:
123 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
124 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
125 ip->i_mount, dip);
126 return -EFSCORRUPTED;
127 }
128 ip->i_d.di_size = 0; 72 ip->i_d.di_size = 0;
129 inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); 73 inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
130 break; 74 break;
@@ -134,32 +78,7 @@ xfs_iformat_fork(
134 case S_IFDIR: 78 case S_IFDIR:
135 switch (dip->di_format) { 79 switch (dip->di_format) {
136 case XFS_DINODE_FMT_LOCAL: 80 case XFS_DINODE_FMT_LOCAL:
137 /*
138 * no local regular files yet
139 */
140 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
141 xfs_warn(ip->i_mount,
142 "corrupt inode %Lu (local format for regular file).",
143 (unsigned long long) ip->i_ino);
144 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
145 XFS_ERRLEVEL_LOW,
146 ip->i_mount, dip);
147 return -EFSCORRUPTED;
148 }
149
150 di_size = be64_to_cpu(dip->di_size); 81 di_size = be64_to_cpu(dip->di_size);
151 if (unlikely(di_size < 0 ||
152 di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
153 xfs_warn(ip->i_mount,
154 "corrupt inode %Lu (bad size %Ld for local inode).",
155 (unsigned long long) ip->i_ino,
156 (long long) di_size);
157 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
158 XFS_ERRLEVEL_LOW,
159 ip->i_mount, dip);
160 return -EFSCORRUPTED;
161 }
162
163 size = (int)di_size; 82 size = (int)di_size;
164 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 83 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
165 break; 84 break;
@@ -170,28 +89,16 @@ xfs_iformat_fork(
170 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 89 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
171 break; 90 break;
172 default: 91 default:
173 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
174 ip->i_mount);
175 return -EFSCORRUPTED; 92 return -EFSCORRUPTED;
176 } 93 }
177 break; 94 break;
178 95
179 default: 96 default:
180 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
181 return -EFSCORRUPTED; 97 return -EFSCORRUPTED;
182 } 98 }
183 if (error) 99 if (error)
184 return error; 100 return error;
185 101
186 /* Check inline dir contents. */
187 if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) {
188 error = xfs_dir2_sf_verify(ip);
189 if (error) {
190 xfs_idestroy_fork(ip, XFS_DATA_FORK);
191 return error;
192 }
193 }
194
195 if (xfs_is_reflink_inode(ip)) { 102 if (xfs_is_reflink_inode(ip)) {
196 ASSERT(ip->i_cowfp == NULL); 103 ASSERT(ip->i_cowfp == NULL);
197 xfs_ifork_init_cow(ip); 104 xfs_ifork_init_cow(ip);
@@ -208,18 +115,6 @@ xfs_iformat_fork(
208 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 115 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
209 size = be16_to_cpu(atp->hdr.totsize); 116 size = be16_to_cpu(atp->hdr.totsize);
210 117
211 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
212 xfs_warn(ip->i_mount,
213 "corrupt inode %Lu (bad attr fork size %Ld).",
214 (unsigned long long) ip->i_ino,
215 (long long) size);
216 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
217 XFS_ERRLEVEL_LOW,
218 ip->i_mount, dip);
219 error = -EFSCORRUPTED;
220 break;
221 }
222
223 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 118 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
224 break; 119 break;
225 case XFS_DINODE_FMT_EXTENTS: 120 case XFS_DINODE_FMT_EXTENTS:
@@ -403,6 +298,7 @@ xfs_iformat_btree(
403 */ 298 */
404 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 299 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
405 XFS_IFORK_MAXEXT(ip, whichfork) || 300 XFS_IFORK_MAXEXT(ip, whichfork) ||
301 nrecs == 0 ||
406 XFS_BMDR_SPACE_CALC(nrecs) > 302 XFS_BMDR_SPACE_CALC(nrecs) >
407 XFS_DFORK_SIZE(dip, mp, whichfork) || 303 XFS_DFORK_SIZE(dip, mp, whichfork) ||
408 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || 304 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
@@ -827,3 +723,45 @@ xfs_ifork_init_cow(
827 ip->i_cformat = XFS_DINODE_FMT_EXTENTS; 723 ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
828 ip->i_cnextents = 0; 724 ip->i_cnextents = 0;
829} 725}
726
727/* Default fork content verifiers. */
728struct xfs_ifork_ops xfs_default_ifork_ops = {
729 .verify_attr = xfs_attr_shortform_verify,
730 .verify_dir = xfs_dir2_sf_verify,
731 .verify_symlink = xfs_symlink_shortform_verify,
732};
733
734/* Verify the inline contents of the data fork of an inode. */
735xfs_failaddr_t
736xfs_ifork_verify_data(
737 struct xfs_inode *ip,
738 struct xfs_ifork_ops *ops)
739{
740 /* Non-local data fork, we're done. */
741 if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
742 return NULL;
743
744 /* Check the inline data fork if there is one. */
745 switch (VFS_I(ip)->i_mode & S_IFMT) {
746 case S_IFDIR:
747 return ops->verify_dir(ip);
748 case S_IFLNK:
749 return ops->verify_symlink(ip);
750 default:
751 return NULL;
752 }
753}
754
755/* Verify the inline contents of the attr fork of an inode. */
756xfs_failaddr_t
757xfs_ifork_verify_attr(
758 struct xfs_inode *ip,
759 struct xfs_ifork_ops *ops)
760{
761 /* There has to be an attr fork allocated if aformat is local. */
762 if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
763 return NULL;
764 if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK))
765 return __this_address;
766 return ops->verify_attr(ip);
767}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index b9f0098e33b8..dd8aba0dd119 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -186,4 +186,18 @@ extern struct kmem_zone *xfs_ifork_zone;
186 186
187extern void xfs_ifork_init_cow(struct xfs_inode *ip); 187extern void xfs_ifork_init_cow(struct xfs_inode *ip);
188 188
189typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *);
190
191struct xfs_ifork_ops {
192 xfs_ifork_verifier_t verify_symlink;
193 xfs_ifork_verifier_t verify_dir;
194 xfs_ifork_verifier_t verify_attr;
195};
196extern struct xfs_ifork_ops xfs_default_ifork_ops;
197
198xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip,
199 struct xfs_ifork_ops *ops);
200xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip,
201 struct xfs_ifork_ops *ops);
202
189#endif /* __XFS_INODE_FORK_H__ */ 203#endif /* __XFS_INODE_FORK_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index c10597973333..cc4cbe290939 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -55,7 +55,7 @@ xfs_log_calc_max_attrsetm_res(
55 * the maximum one in terms of the pre-calculated values which were done 55 * the maximum one in terms of the pre-calculated values which were done
56 * at mount time. 56 * at mount time.
57 */ 57 */
58STATIC void 58void
59xfs_log_get_max_trans_res( 59xfs_log_get_max_trans_res(
60 struct xfs_mount *mp, 60 struct xfs_mount *mp,
61 struct xfs_trans_res *max_resp) 61 struct xfs_trans_res *max_resp)
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index d69c772271cb..bb1b13a9b5f4 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -112,8 +112,6 @@ typedef uint16_t xfs_qwarncnt_t;
112#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 112#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
113#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 113#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
114#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 114#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
115#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
116#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
117#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 115#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
118#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ 116#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
119#define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ 117#define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */
@@ -153,8 +151,11 @@ typedef uint16_t xfs_qwarncnt_t;
153 (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) 151 (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
154#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) 152#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
155 153
156extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, 154extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp,
157 xfs_dqid_t id, uint type, uint flags, const char *str); 155 struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type,
156 uint flags);
158extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); 157extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
158extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq,
159 xfs_dqid_t id, uint type);
159 160
160#endif /* __XFS_QUOTA_H__ */ 161#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index c40d26763075..bee68c23d612 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1696,3 +1696,22 @@ out_cursor:
1696 xfs_trans_brelse(tp, agbp); 1696 xfs_trans_brelse(tp, agbp);
1697 goto out_trans; 1697 goto out_trans;
1698} 1698}
1699
1700/* Is there a record covering a given extent? */
1701int
1702xfs_refcount_has_record(
1703 struct xfs_btree_cur *cur,
1704 xfs_agblock_t bno,
1705 xfs_extlen_t len,
1706 bool *exists)
1707{
1708 union xfs_btree_irec low;
1709 union xfs_btree_irec high;
1710
1711 memset(&low, 0, sizeof(low));
1712 low.rc.rc_startblock = bno;
1713 memset(&high, 0xFF, sizeof(high));
1714 high.rc.rc_startblock = bno + len - 1;
1715
1716 return xfs_btree_has_record(cur, &low, &high, exists);
1717}
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index eafb9d1f3b37..2a731ac68fe4 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -83,4 +83,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
83 return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; 83 return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
84} 84}
85 85
86extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
87 xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
88
86#endif /* __XFS_REFCOUNT_H__ */ 89#endif /* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 3c59dd3d58d7..8479769e470d 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -223,29 +223,31 @@ xfs_refcountbt_diff_two_keys(
223 be32_to_cpu(k2->refc.rc_startblock); 223 be32_to_cpu(k2->refc.rc_startblock);
224} 224}
225 225
226STATIC bool 226STATIC xfs_failaddr_t
227xfs_refcountbt_verify( 227xfs_refcountbt_verify(
228 struct xfs_buf *bp) 228 struct xfs_buf *bp)
229{ 229{
230 struct xfs_mount *mp = bp->b_target->bt_mount; 230 struct xfs_mount *mp = bp->b_target->bt_mount;
231 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 231 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
232 struct xfs_perag *pag = bp->b_pag; 232 struct xfs_perag *pag = bp->b_pag;
233 xfs_failaddr_t fa;
233 unsigned int level; 234 unsigned int level;
234 235
235 if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) 236 if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC))
236 return false; 237 return __this_address;
237 238
238 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 239 if (!xfs_sb_version_hasreflink(&mp->m_sb))
239 return false; 240 return __this_address;
240 if (!xfs_btree_sblock_v5hdr_verify(bp)) 241 fa = xfs_btree_sblock_v5hdr_verify(bp);
241 return false; 242 if (fa)
243 return fa;
242 244
243 level = be16_to_cpu(block->bb_level); 245 level = be16_to_cpu(block->bb_level);
244 if (pag && pag->pagf_init) { 246 if (pag && pag->pagf_init) {
245 if (level >= pag->pagf_refcount_level) 247 if (level >= pag->pagf_refcount_level)
246 return false; 248 return __this_address;
247 } else if (level >= mp->m_refc_maxlevels) 249 } else if (level >= mp->m_refc_maxlevels)
248 return false; 250 return __this_address;
249 251
250 return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); 252 return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]);
251} 253}
@@ -254,25 +256,30 @@ STATIC void
254xfs_refcountbt_read_verify( 256xfs_refcountbt_read_verify(
255 struct xfs_buf *bp) 257 struct xfs_buf *bp)
256{ 258{
259 xfs_failaddr_t fa;
260
257 if (!xfs_btree_sblock_verify_crc(bp)) 261 if (!xfs_btree_sblock_verify_crc(bp))
258 xfs_buf_ioerror(bp, -EFSBADCRC); 262 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
259 else if (!xfs_refcountbt_verify(bp)) 263 else {
260 xfs_buf_ioerror(bp, -EFSCORRUPTED); 264 fa = xfs_refcountbt_verify(bp);
265 if (fa)
266 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
267 }
261 268
262 if (bp->b_error) { 269 if (bp->b_error)
263 trace_xfs_btree_corrupt(bp, _RET_IP_); 270 trace_xfs_btree_corrupt(bp, _RET_IP_);
264 xfs_verifier_error(bp);
265 }
266} 271}
267 272
268STATIC void 273STATIC void
269xfs_refcountbt_write_verify( 274xfs_refcountbt_write_verify(
270 struct xfs_buf *bp) 275 struct xfs_buf *bp)
271{ 276{
272 if (!xfs_refcountbt_verify(bp)) { 277 xfs_failaddr_t fa;
278
279 fa = xfs_refcountbt_verify(bp);
280 if (fa) {
273 trace_xfs_btree_corrupt(bp, _RET_IP_); 281 trace_xfs_btree_corrupt(bp, _RET_IP_);
274 xfs_buf_ioerror(bp, -EFSCORRUPTED); 282 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
275 xfs_verifier_error(bp);
276 return; 283 return;
277 } 284 }
278 xfs_btree_sblock_calc_crc(bp); 285 xfs_btree_sblock_calc_crc(bp);
@@ -283,6 +290,7 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
283 .name = "xfs_refcountbt", 290 .name = "xfs_refcountbt",
284 .verify_read = xfs_refcountbt_read_verify, 291 .verify_read = xfs_refcountbt_read_verify,
285 .verify_write = xfs_refcountbt_write_verify, 292 .verify_write = xfs_refcountbt_write_verify,
293 .verify_struct = xfs_refcountbt_verify,
286}; 294};
287 295
288STATIC int 296STATIC int
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 50db920ceeeb..79822cf6ebe3 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2387,3 +2387,70 @@ xfs_rmap_compare(
2387 else 2387 else
2388 return 0; 2388 return 0;
2389} 2389}
2390
2391/* Is there a record covering a given extent? */
2392int
2393xfs_rmap_has_record(
2394 struct xfs_btree_cur *cur,
2395 xfs_agblock_t bno,
2396 xfs_extlen_t len,
2397 bool *exists)
2398{
2399 union xfs_btree_irec low;
2400 union xfs_btree_irec high;
2401
2402 memset(&low, 0, sizeof(low));
2403 low.r.rm_startblock = bno;
2404 memset(&high, 0xFF, sizeof(high));
2405 high.r.rm_startblock = bno + len - 1;
2406
2407 return xfs_btree_has_record(cur, &low, &high, exists);
2408}
2409
2410/*
2411 * Is there a record for this owner completely covering a given physical
2412 * extent? If so, *has_rmap will be set to true. If there is no record
2413 * or the record only covers part of the range, we set *has_rmap to false.
2414 * This function doesn't perform range lookups or offset checks, so it is
2415 * not suitable for checking data fork blocks.
2416 */
2417int
2418xfs_rmap_record_exists(
2419 struct xfs_btree_cur *cur,
2420 xfs_agblock_t bno,
2421 xfs_extlen_t len,
2422 struct xfs_owner_info *oinfo,
2423 bool *has_rmap)
2424{
2425 uint64_t owner;
2426 uint64_t offset;
2427 unsigned int flags;
2428 int has_record;
2429 struct xfs_rmap_irec irec;
2430 int error;
2431
2432 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
2433 ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
2434 (flags & XFS_RMAP_BMBT_BLOCK));
2435
2436 error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
2437 &has_record);
2438 if (error)
2439 return error;
2440 if (!has_record) {
2441 *has_rmap = false;
2442 return 0;
2443 }
2444
2445 error = xfs_rmap_get_rec(cur, &irec, &has_record);
2446 if (error)
2447 return error;
2448 if (!has_record) {
2449 *has_rmap = false;
2450 return 0;
2451 }
2452
2453 *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
2454 irec.rm_startblock + irec.rm_blockcount >= bno + len);
2455 return 0;
2456}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 0fcd5b1ba729..380e53be98d5 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -233,5 +233,10 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a,
233union xfs_btree_rec; 233union xfs_btree_rec;
234int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, 234int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
235 struct xfs_rmap_irec *irec); 235 struct xfs_rmap_irec *irec);
236int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
237 xfs_extlen_t len, bool *exists);
238int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno,
239 xfs_extlen_t len, struct xfs_owner_info *oinfo,
240 bool *has_rmap);
236 241
237#endif /* __XFS_RMAP_H__ */ 242#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 9d9c9192584c..e829c3e489ea 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -303,13 +303,14 @@ xfs_rmapbt_diff_two_keys(
303 return 0; 303 return 0;
304} 304}
305 305
306static bool 306static xfs_failaddr_t
307xfs_rmapbt_verify( 307xfs_rmapbt_verify(
308 struct xfs_buf *bp) 308 struct xfs_buf *bp)
309{ 309{
310 struct xfs_mount *mp = bp->b_target->bt_mount; 310 struct xfs_mount *mp = bp->b_target->bt_mount;
311 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 311 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
312 struct xfs_perag *pag = bp->b_pag; 312 struct xfs_perag *pag = bp->b_pag;
313 xfs_failaddr_t fa;
313 unsigned int level; 314 unsigned int level;
314 315
315 /* 316 /*
@@ -325,19 +326,20 @@ xfs_rmapbt_verify(
325 * in this case. 326 * in this case.
326 */ 327 */
327 if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) 328 if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
328 return false; 329 return __this_address;
329 330
330 if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 331 if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
331 return false; 332 return __this_address;
332 if (!xfs_btree_sblock_v5hdr_verify(bp)) 333 fa = xfs_btree_sblock_v5hdr_verify(bp);
333 return false; 334 if (fa)
335 return fa;
334 336
335 level = be16_to_cpu(block->bb_level); 337 level = be16_to_cpu(block->bb_level);
336 if (pag && pag->pagf_init) { 338 if (pag && pag->pagf_init) {
337 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 339 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
338 return false; 340 return __this_address;
339 } else if (level >= mp->m_rmap_maxlevels) 341 } else if (level >= mp->m_rmap_maxlevels)
340 return false; 342 return __this_address;
341 343
342 return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 344 return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
343} 345}
@@ -346,25 +348,30 @@ static void
346xfs_rmapbt_read_verify( 348xfs_rmapbt_read_verify(
347 struct xfs_buf *bp) 349 struct xfs_buf *bp)
348{ 350{
351 xfs_failaddr_t fa;
352
349 if (!xfs_btree_sblock_verify_crc(bp)) 353 if (!xfs_btree_sblock_verify_crc(bp))
350 xfs_buf_ioerror(bp, -EFSBADCRC); 354 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
351 else if (!xfs_rmapbt_verify(bp)) 355 else {
352 xfs_buf_ioerror(bp, -EFSCORRUPTED); 356 fa = xfs_rmapbt_verify(bp);
357 if (fa)
358 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
359 }
353 360
354 if (bp->b_error) { 361 if (bp->b_error)
355 trace_xfs_btree_corrupt(bp, _RET_IP_); 362 trace_xfs_btree_corrupt(bp, _RET_IP_);
356 xfs_verifier_error(bp);
357 }
358} 363}
359 364
360static void 365static void
361xfs_rmapbt_write_verify( 366xfs_rmapbt_write_verify(
362 struct xfs_buf *bp) 367 struct xfs_buf *bp)
363{ 368{
364 if (!xfs_rmapbt_verify(bp)) { 369 xfs_failaddr_t fa;
370
371 fa = xfs_rmapbt_verify(bp);
372 if (fa) {
365 trace_xfs_btree_corrupt(bp, _RET_IP_); 373 trace_xfs_btree_corrupt(bp, _RET_IP_);
366 xfs_buf_ioerror(bp, -EFSCORRUPTED); 374 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
367 xfs_verifier_error(bp);
368 return; 375 return;
369 } 376 }
370 xfs_btree_sblock_calc_crc(bp); 377 xfs_btree_sblock_calc_crc(bp);
@@ -375,6 +382,7 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
375 .name = "xfs_rmapbt", 382 .name = "xfs_rmapbt",
376 .verify_read = xfs_rmapbt_read_verify, 383 .verify_read = xfs_rmapbt_read_verify,
377 .verify_write = xfs_rmapbt_write_verify, 384 .verify_write = xfs_rmapbt_write_verify,
385 .verify_struct = xfs_rmapbt_verify,
378}; 386};
379 387
380STATIC int 388STATIC int
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 3fb29a5ea915..106be2d0bb88 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1097,3 +1097,24 @@ xfs_verify_rtbno(
1097{ 1097{
1098 return rtbno < mp->m_sb.sb_rblocks; 1098 return rtbno < mp->m_sb.sb_rblocks;
1099} 1099}
1100
1101/* Is the given extent all free? */
1102int
1103xfs_rtalloc_extent_is_free(
1104 struct xfs_mount *mp,
1105 struct xfs_trans *tp,
1106 xfs_rtblock_t start,
1107 xfs_extlen_t len,
1108 bool *is_free)
1109{
1110 xfs_rtblock_t end;
1111 int matches;
1112 int error;
1113
1114 error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches);
1115 if (error)
1116 return error;
1117
1118 *is_free = matches;
1119 return 0;
1120}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 9b5aae2bcc0b..46af6aa60a8e 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -40,6 +40,8 @@
40#include "xfs_rmap_btree.h" 40#include "xfs_rmap_btree.h"
41#include "xfs_bmap.h" 41#include "xfs_bmap.h"
42#include "xfs_refcount_btree.h" 42#include "xfs_refcount_btree.h"
43#include "xfs_da_format.h"
44#include "xfs_da_btree.h"
43 45
44/* 46/*
45 * Physical superblock buffer manipulations. Shared with libxfs in userspace. 47 * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -116,6 +118,9 @@ xfs_mount_validate_sb(
116 bool check_inprogress, 118 bool check_inprogress,
117 bool check_version) 119 bool check_version)
118{ 120{
121 u32 agcount = 0;
122 u32 rem;
123
119 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 124 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
120 xfs_warn(mp, "bad magic number"); 125 xfs_warn(mp, "bad magic number");
121 return -EWRONGFS; 126 return -EWRONGFS;
@@ -226,6 +231,13 @@ xfs_mount_validate_sb(
226 return -EINVAL; 231 return -EINVAL;
227 } 232 }
228 233
234 /* Compute agcount for this number of dblocks and agblocks */
235 if (sbp->sb_agblocks) {
236 agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem);
237 if (rem)
238 agcount++;
239 }
240
229 /* 241 /*
230 * More sanity checking. Most of these were stolen directly from 242 * More sanity checking. Most of these were stolen directly from
231 * xfs_repair. 243 * xfs_repair.
@@ -250,6 +262,10 @@ xfs_mount_validate_sb(
250 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 262 sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
251 sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || 263 sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE ||
252 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || 264 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
265 XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES ||
266 XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES ||
267 sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 ||
268 agcount == 0 || agcount != sbp->sb_agcount ||
253 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 269 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
254 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 270 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
255 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 271 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
@@ -640,11 +656,10 @@ xfs_sb_read_verify(
640 error = xfs_sb_verify(bp, true); 656 error = xfs_sb_verify(bp, true);
641 657
642out_error: 658out_error:
643 if (error) { 659 if (error == -EFSCORRUPTED || error == -EFSBADCRC)
660 xfs_verifier_error(bp, error, __this_address);
661 else if (error)
644 xfs_buf_ioerror(bp, error); 662 xfs_buf_ioerror(bp, error);
645 if (error == -EFSCORRUPTED || error == -EFSBADCRC)
646 xfs_verifier_error(bp);
647 }
648} 663}
649 664
650/* 665/*
@@ -673,13 +688,12 @@ xfs_sb_write_verify(
673 struct xfs_buf *bp) 688 struct xfs_buf *bp)
674{ 689{
675 struct xfs_mount *mp = bp->b_target->bt_mount; 690 struct xfs_mount *mp = bp->b_target->bt_mount;
676 struct xfs_buf_log_item *bip = bp->b_fspriv; 691 struct xfs_buf_log_item *bip = bp->b_log_item;
677 int error; 692 int error;
678 693
679 error = xfs_sb_verify(bp, false); 694 error = xfs_sb_verify(bp, false);
680 if (error) { 695 if (error) {
681 xfs_buf_ioerror(bp, error); 696 xfs_verifier_error(bp, error, __this_address);
682 xfs_verifier_error(bp);
683 return; 697 return;
684 } 698 }
685 699
@@ -876,3 +890,88 @@ xfs_sync_sb(
876 xfs_trans_set_sync(tp); 890 xfs_trans_set_sync(tp);
877 return xfs_trans_commit(tp); 891 return xfs_trans_commit(tp);
878} 892}
893
894int
895xfs_fs_geometry(
896 struct xfs_sb *sbp,
897 struct xfs_fsop_geom *geo,
898 int struct_version)
899{
900 memset(geo, 0, sizeof(struct xfs_fsop_geom));
901
902 geo->blocksize = sbp->sb_blocksize;
903 geo->rtextsize = sbp->sb_rextsize;
904 geo->agblocks = sbp->sb_agblocks;
905 geo->agcount = sbp->sb_agcount;
906 geo->logblocks = sbp->sb_logblocks;
907 geo->sectsize = sbp->sb_sectsize;
908 geo->inodesize = sbp->sb_inodesize;
909 geo->imaxpct = sbp->sb_imax_pct;
910 geo->datablocks = sbp->sb_dblocks;
911 geo->rtblocks = sbp->sb_rblocks;
912 geo->rtextents = sbp->sb_rextents;
913 geo->logstart = sbp->sb_logstart;
914 BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid));
915 memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid));
916
917 if (struct_version < 2)
918 return 0;
919
920 geo->sunit = sbp->sb_unit;
921 geo->swidth = sbp->sb_width;
922
923 if (struct_version < 3)
924 return 0;
925
926 geo->version = XFS_FSOP_GEOM_VERSION;
927 geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
928 XFS_FSOP_GEOM_FLAGS_DIRV2;
929 if (xfs_sb_version_hasattr(sbp))
930 geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
931 if (xfs_sb_version_hasquota(sbp))
932 geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA;
933 if (xfs_sb_version_hasalign(sbp))
934 geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN;
935 if (xfs_sb_version_hasdalign(sbp))
936 geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN;
937 if (xfs_sb_version_hasextflgbit(sbp))
938 geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG;
939 if (xfs_sb_version_hassector(sbp))
940 geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
941 if (xfs_sb_version_hasasciici(sbp))
942 geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI;
943 if (xfs_sb_version_haslazysbcount(sbp))
944 geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB;
945 if (xfs_sb_version_hasattr2(sbp))
946 geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2;
947 if (xfs_sb_version_hasprojid32bit(sbp))
948 geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32;
949 if (xfs_sb_version_hascrc(sbp))
950 geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB;
951 if (xfs_sb_version_hasftype(sbp))
952 geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE;
953 if (xfs_sb_version_hasfinobt(sbp))
954 geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT;
955 if (xfs_sb_version_hassparseinodes(sbp))
956 geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES;
957 if (xfs_sb_version_hasrmapbt(sbp))
958 geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT;
959 if (xfs_sb_version_hasreflink(sbp))
960 geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK;
961 if (xfs_sb_version_hassector(sbp))
962 geo->logsectsize = sbp->sb_logsectsize;
963 else
964 geo->logsectsize = BBSIZE;
965 geo->rtsectsize = sbp->sb_blocksize;
966 geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
967
968 if (struct_version < 4)
969 return 0;
970
971 if (xfs_sb_version_haslogv2(sbp))
972 geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2;
973
974 geo->logsunit = sbp->sb_logsunit;
975
976 return 0;
977}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 961e6475a309..63dcd2a1a657 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -34,4 +34,8 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
34extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); 34extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
35extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); 35extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
36 36
37#define XFS_FS_GEOM_MAX_STRUCT_VER (4)
38extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo,
39 int struct_version);
40
37#endif /* __XFS_SB_H__ */ 41#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index c6f4eb46fe26..d0b84da0cb1e 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -76,6 +76,9 @@ struct xfs_log_item_desc {
76int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); 76int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
77int xfs_log_calc_minimum_size(struct xfs_mount *); 77int xfs_log_calc_minimum_size(struct xfs_mount *);
78 78
79struct xfs_trans_res;
80void xfs_log_get_max_trans_res(struct xfs_mount *mp,
81 struct xfs_trans_res *max_resp);
79 82
80/* 83/*
81 * Values for t_flags. 84 * Values for t_flags.
@@ -143,5 +146,6 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
143 uint32_t size, struct xfs_buf *bp); 146 uint32_t size, struct xfs_buf *bp);
144void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, 147void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
145 struct xfs_inode *ip, struct xfs_ifork *ifp); 148 struct xfs_inode *ip, struct xfs_ifork *ifp);
149xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
146 150
147#endif /* __XFS_SHARED_H__ */ 151#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index c484877129a0..5ef5f354587e 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -98,7 +98,7 @@ xfs_symlink_hdr_ok(
98 return true; 98 return true;
99} 99}
100 100
101static bool 101static xfs_failaddr_t
102xfs_symlink_verify( 102xfs_symlink_verify(
103 struct xfs_buf *bp) 103 struct xfs_buf *bp)
104{ 104{
@@ -106,22 +106,22 @@ xfs_symlink_verify(
106 struct xfs_dsymlink_hdr *dsl = bp->b_addr; 106 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
107 107
108 if (!xfs_sb_version_hascrc(&mp->m_sb)) 108 if (!xfs_sb_version_hascrc(&mp->m_sb))
109 return false; 109 return __this_address;
110 if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) 110 if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
111 return false; 111 return __this_address;
112 if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) 112 if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
113 return false; 113 return __this_address;
114 if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) 114 if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
115 return false; 115 return __this_address;
116 if (be32_to_cpu(dsl->sl_offset) + 116 if (be32_to_cpu(dsl->sl_offset) +
117 be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) 117 be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
118 return false; 118 return __this_address;
119 if (dsl->sl_owner == 0) 119 if (dsl->sl_owner == 0)
120 return false; 120 return __this_address;
121 if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) 121 if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
122 return false; 122 return __this_address;
123 123
124 return true; 124 return NULL;
125} 125}
126 126
127static void 127static void
@@ -129,18 +129,19 @@ xfs_symlink_read_verify(
129 struct xfs_buf *bp) 129 struct xfs_buf *bp)
130{ 130{
131 struct xfs_mount *mp = bp->b_target->bt_mount; 131 struct xfs_mount *mp = bp->b_target->bt_mount;
132 xfs_failaddr_t fa;
132 133
133 /* no verification of non-crc buffers */ 134 /* no verification of non-crc buffers */
134 if (!xfs_sb_version_hascrc(&mp->m_sb)) 135 if (!xfs_sb_version_hascrc(&mp->m_sb))
135 return; 136 return;
136 137
137 if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) 138 if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
138 xfs_buf_ioerror(bp, -EFSBADCRC); 139 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
139 else if (!xfs_symlink_verify(bp)) 140 else {
140 xfs_buf_ioerror(bp, -EFSCORRUPTED); 141 fa = xfs_symlink_verify(bp);
141 142 if (fa)
142 if (bp->b_error) 143 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
143 xfs_verifier_error(bp); 144 }
144} 145}
145 146
146static void 147static void
@@ -148,15 +149,16 @@ xfs_symlink_write_verify(
148 struct xfs_buf *bp) 149 struct xfs_buf *bp)
149{ 150{
150 struct xfs_mount *mp = bp->b_target->bt_mount; 151 struct xfs_mount *mp = bp->b_target->bt_mount;
151 struct xfs_buf_log_item *bip = bp->b_fspriv; 152 struct xfs_buf_log_item *bip = bp->b_log_item;
153 xfs_failaddr_t fa;
152 154
153 /* no verification of non-crc buffers */ 155 /* no verification of non-crc buffers */
154 if (!xfs_sb_version_hascrc(&mp->m_sb)) 156 if (!xfs_sb_version_hascrc(&mp->m_sb))
155 return; 157 return;
156 158
157 if (!xfs_symlink_verify(bp)) { 159 fa = xfs_symlink_verify(bp);
158 xfs_buf_ioerror(bp, -EFSCORRUPTED); 160 if (fa) {
159 xfs_verifier_error(bp); 161 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
160 return; 162 return;
161 } 163 }
162 164
@@ -171,6 +173,7 @@ const struct xfs_buf_ops xfs_symlink_buf_ops = {
171 .name = "xfs_symlink", 173 .name = "xfs_symlink",
172 .verify_read = xfs_symlink_read_verify, 174 .verify_read = xfs_symlink_read_verify,
173 .verify_write = xfs_symlink_write_verify, 175 .verify_write = xfs_symlink_write_verify,
176 .verify_struct = xfs_symlink_verify,
174}; 177};
175 178
176void 179void
@@ -207,3 +210,37 @@ xfs_symlink_local_to_remote(
207 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + 210 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
208 ifp->if_bytes - 1); 211 ifp->if_bytes - 1);
209} 212}
213
214/* Verify the consistency of an inline symlink. */
215xfs_failaddr_t
216xfs_symlink_shortform_verify(
217 struct xfs_inode *ip)
218{
219 char *sfp;
220 char *endp;
221 struct xfs_ifork *ifp;
222 int size;
223
224 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
225 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
226 sfp = (char *)ifp->if_u1.if_data;
227 size = ifp->if_bytes;
228 endp = sfp + size;
229
230 /* Zero length symlinks can exist while we're deleting a remote one. */
231 if (size == 0)
232 return NULL;
233
234 /* No negative sizes or overly long symlink targets. */
235 if (size < 0 || size > XFS_SYMLINK_MAXLEN)
236 return __this_address;
237
238 /* No NULLs in the target either. */
239 if (memchr(sfp, 0, size - 1))
240 return __this_address;
241
242 /* We /did/ null-terminate the buffer, right? */
243 if (*endp != 0)
244 return __this_address;
245 return NULL;
246}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6bd916bd35e2..5f17641f040f 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -34,6 +34,9 @@
34#include "xfs_trans_space.h" 34#include "xfs_trans_space.h"
35#include "xfs_trace.h" 35#include "xfs_trace.h"
36 36
37#define _ALLOC true
38#define _FREE false
39
37/* 40/*
38 * A buffer has a format structure overhead in the log in addition 41 * A buffer has a format structure overhead in the log in addition
39 * to the data, so we need to take this into account when reserving 42 * to the data, so we need to take this into account when reserving
@@ -132,43 +135,77 @@ xfs_calc_inode_res(
132} 135}
133 136
134/* 137/*
135 * The free inode btree is a conditional feature and the log reservation 138 * Inode btree record insertion/removal modifies the inode btree and free space
136 * requirements differ slightly from that of the traditional inode allocation 139 * btrees (since the inobt does not use the agfl). This requires the following
137 * btree. The finobt tracks records for inode chunks with at least one free 140 * reservation:
138 * inode. A record can be removed from the tree for an inode allocation
139 * or free and thus the finobt reservation is unconditional across:
140 * 141 *
141 * - inode allocation 142 * the inode btree: max depth * blocksize
142 * - inode free 143 * the allocation btrees: 2 trees * (max depth - 1) * block size
143 * - inode chunk allocation
144 * 144 *
145 * The 'modify' param indicates to include the record modification scenario. The 145 * The caller must account for SB and AG header modifications, etc.
146 * 'alloc' param indicates to include the reservation for free space btree 146 */
147 * modifications on behalf of finobt modifications. This is required only for 147STATIC uint
148 * transactions that do not already account for free space btree modifications. 148xfs_calc_inobt_res(
149 struct xfs_mount *mp)
150{
151 return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
152 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
153 XFS_FSB_TO_B(mp, 1));
154}
155
156/*
157 * The free inode btree is a conditional feature. The behavior differs slightly
158 * from that of the traditional inode btree in that the finobt tracks records
159 * for inode chunks with at least one free inode. A record can be removed from
160 * the tree during individual inode allocation. Therefore the finobt
161 * reservation is unconditional for both the inode chunk allocation and
162 * individual inode allocation (modify) cases.
149 * 163 *
150 * the free inode btree: max depth * block size 164 * Behavior aside, the reservation for finobt modification is equivalent to the
151 * the allocation btrees: 2 trees * (max depth - 1) * block size 165 * traditional inobt: cover a full finobt shape change plus block allocation.
152 * the free inode btree entry: block size
153 */ 166 */
154STATIC uint 167STATIC uint
155xfs_calc_finobt_res( 168xfs_calc_finobt_res(
156 struct xfs_mount *mp, 169 struct xfs_mount *mp)
157 int alloc,
158 int modify)
159{ 170{
160 uint res;
161
162 if (!xfs_sb_version_hasfinobt(&mp->m_sb)) 171 if (!xfs_sb_version_hasfinobt(&mp->m_sb))
163 return 0; 172 return 0;
164 173
165 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); 174 return xfs_calc_inobt_res(mp);
166 if (alloc) 175}
167 res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
168 XFS_FSB_TO_B(mp, 1));
169 if (modify)
170 res += (uint)XFS_FSB_TO_B(mp, 1);
171 176
177/*
178 * Calculate the reservation required to allocate or free an inode chunk. This
179 * includes:
180 *
181 * the allocation btrees: 2 trees * (max depth - 1) * block size
182 * the inode chunk: m_ialloc_blks * N
183 *
184 * The size N of the inode chunk reservation depends on whether it is for
185 * allocation or free and which type of create transaction is in use. An inode
186 * chunk free always invalidates the buffers and only requires reservation for
187 * headers (N == 0). An inode chunk allocation requires a chunk sized
188 * reservation on v4 and older superblocks to initialize the chunk. No chunk
189 * reservation is required for allocation on v5 supers, which use ordered
190 * buffers to initialize.
191 */
192STATIC uint
193xfs_calc_inode_chunk_res(
194 struct xfs_mount *mp,
195 bool alloc)
196{
197 uint res, size = 0;
198
199 res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
200 XFS_FSB_TO_B(mp, 1));
201 if (alloc) {
202 /* icreate tx uses ordered buffers */
203 if (xfs_sb_version_hascrc(&mp->m_sb))
204 return res;
205 size = XFS_FSB_TO_B(mp, 1);
206 }
207
208 res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
172 return res; 209 return res;
173} 210}
174 211
@@ -232,8 +269,6 @@ xfs_calc_write_reservation(
232 * the super block to reflect the freed blocks: sector size 269 * the super block to reflect the freed blocks: sector size
233 * worst case split in allocation btrees per extent assuming 4 extents: 270 * worst case split in allocation btrees per extent assuming 4 extents:
234 * 4 exts * 2 trees * (2 * max depth - 1) * block size 271 * 4 exts * 2 trees * (2 * max depth - 1) * block size
235 * the inode btree: max depth * blocksize
236 * the allocation btrees: 2 trees * (max depth - 1) * block size
237 */ 272 */
238STATIC uint 273STATIC uint
239xfs_calc_itruncate_reservation( 274xfs_calc_itruncate_reservation(
@@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation(
245 XFS_FSB_TO_B(mp, 1))), 280 XFS_FSB_TO_B(mp, 1))),
246 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 281 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
247 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), 282 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
248 XFS_FSB_TO_B(mp, 1)) + 283 XFS_FSB_TO_B(mp, 1))));
249 xfs_calc_buf_res(5, 0) +
250 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
251 XFS_FSB_TO_B(mp, 1)) +
252 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
253 mp->m_in_maxlevels, 0)));
254} 284}
255 285
256/* 286/*
@@ -282,13 +312,14 @@ xfs_calc_rename_reservation(
282 * For removing an inode from unlinked list at first, we can modify: 312 * For removing an inode from unlinked list at first, we can modify:
283 * the agi hash list and counters: sector size 313 * the agi hash list and counters: sector size
284 * the on disk inode before ours in the agi hash list: inode cluster size 314 * the on disk inode before ours in the agi hash list: inode cluster size
315 * the on disk inode in the agi hash list: inode cluster size
285 */ 316 */
286STATIC uint 317STATIC uint
287xfs_calc_iunlink_remove_reservation( 318xfs_calc_iunlink_remove_reservation(
288 struct xfs_mount *mp) 319 struct xfs_mount *mp)
289{ 320{
290 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 321 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
291 max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 322 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
292} 323}
293 324
294/* 325/*
@@ -320,13 +351,13 @@ xfs_calc_link_reservation(
320/* 351/*
321 * For adding an inode to unlinked list we can modify: 352 * For adding an inode to unlinked list we can modify:
322 * the agi hash list: sector size 353 * the agi hash list: sector size
323 * the unlinked inode: inode size 354 * the on disk inode: inode cluster size
324 */ 355 */
325STATIC uint 356STATIC uint
326xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) 357xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
327{ 358{
328 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 359 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
329 xfs_calc_inode_res(mp, 1); 360 max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
330} 361}
331 362
332/* 363/*
@@ -379,45 +410,16 @@ xfs_calc_create_resv_modify(
379 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 410 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
380 (uint)XFS_FSB_TO_B(mp, 1) + 411 (uint)XFS_FSB_TO_B(mp, 1) +
381 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + 412 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
382 xfs_calc_finobt_res(mp, 1, 1); 413 xfs_calc_finobt_res(mp);
383}
384
385/*
386 * For create we can allocate some inodes giving:
387 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
388 * the superblock for the nlink flag: sector size
389 * the inode blocks allocated: mp->m_ialloc_blks * blocksize
390 * the inode btree: max depth * blocksize
391 * the allocation btrees: 2 trees * (max depth - 1) * block size
392 */
393STATIC uint
394xfs_calc_create_resv_alloc(
395 struct xfs_mount *mp)
396{
397 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
398 mp->m_sb.sb_sectsize +
399 xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
400 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
401 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
402 XFS_FSB_TO_B(mp, 1));
403}
404
405STATIC uint
406__xfs_calc_create_reservation(
407 struct xfs_mount *mp)
408{
409 return XFS_DQUOT_LOGRES(mp) +
410 MAX(xfs_calc_create_resv_alloc(mp),
411 xfs_calc_create_resv_modify(mp));
412} 414}
413 415
414/* 416/*
415 * For icreate we can allocate some inodes giving: 417 * For icreate we can allocate some inodes giving:
416 * the agi and agf of the ag getting the new inodes: 2 * sectorsize 418 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
417 * the superblock for the nlink flag: sector size 419 * the superblock for the nlink flag: sector size
418 * the inode btree: max depth * blocksize 420 * the inode chunk (allocation, optional init)
419 * the allocation btrees: 2 trees * (max depth - 1) * block size 421 * the inobt (record insertion)
420 * the finobt (record insertion) 422 * the finobt (optional, record insertion)
421 */ 423 */
422STATIC uint 424STATIC uint
423xfs_calc_icreate_resv_alloc( 425xfs_calc_icreate_resv_alloc(
@@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc(
425{ 427{
426 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 428 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
427 mp->m_sb.sb_sectsize + 429 mp->m_sb.sb_sectsize +
428 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 430 xfs_calc_inode_chunk_res(mp, _ALLOC) +
429 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 431 xfs_calc_inobt_res(mp) +
430 XFS_FSB_TO_B(mp, 1)) + 432 xfs_calc_finobt_res(mp);
431 xfs_calc_finobt_res(mp, 0, 0);
432} 433}
433 434
434STATIC uint 435STATIC uint
@@ -440,26 +441,12 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp)
440} 441}
441 442
442STATIC uint 443STATIC uint
443xfs_calc_create_reservation(
444 struct xfs_mount *mp)
445{
446 if (xfs_sb_version_hascrc(&mp->m_sb))
447 return xfs_calc_icreate_reservation(mp);
448 return __xfs_calc_create_reservation(mp);
449
450}
451
452STATIC uint
453xfs_calc_create_tmpfile_reservation( 444xfs_calc_create_tmpfile_reservation(
454 struct xfs_mount *mp) 445 struct xfs_mount *mp)
455{ 446{
456 uint res = XFS_DQUOT_LOGRES(mp); 447 uint res = XFS_DQUOT_LOGRES(mp);
457 448
458 if (xfs_sb_version_hascrc(&mp->m_sb)) 449 res += xfs_calc_icreate_resv_alloc(mp);
459 res += xfs_calc_icreate_resv_alloc(mp);
460 else
461 res += xfs_calc_create_resv_alloc(mp);
462
463 return res + xfs_calc_iunlink_add_reservation(mp); 450 return res + xfs_calc_iunlink_add_reservation(mp);
464} 451}
465 452
@@ -470,7 +457,7 @@ STATIC uint
470xfs_calc_mkdir_reservation( 457xfs_calc_mkdir_reservation(
471 struct xfs_mount *mp) 458 struct xfs_mount *mp)
472{ 459{
473 return xfs_calc_create_reservation(mp); 460 return xfs_calc_icreate_reservation(mp);
474} 461}
475 462
476 463
@@ -483,20 +470,24 @@ STATIC uint
483xfs_calc_symlink_reservation( 470xfs_calc_symlink_reservation(
484 struct xfs_mount *mp) 471 struct xfs_mount *mp)
485{ 472{
486 return xfs_calc_create_reservation(mp) + 473 return xfs_calc_icreate_reservation(mp) +
487 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); 474 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
488} 475}
489 476
490/* 477/*
491 * In freeing an inode we can modify: 478 * In freeing an inode we can modify:
492 * the inode being freed: inode size 479 * the inode being freed: inode size
493 * the super block free inode counter: sector size 480 * the super block free inode counter, AGF and AGFL: sector size
494 * the agi hash list and counters: sector size 481 * the on disk inode (agi unlinked list removal)
495 * the inode btree entry: block size 482 * the inode chunk (invalidated, headers only)
496 * the on disk inode before ours in the agi hash list: inode cluster size 483 * the inode btree
497 * the inode btree: max depth * blocksize
498 * the allocation btrees: 2 trees * (max depth - 1) * block size
499 * the finobt (record insertion, removal or modification) 484 * the finobt (record insertion, removal or modification)
485 *
486 * Note that the inode chunk res. includes an allocfree res. for freeing of the
487 * inode chunk. This is technically extraneous because the inode chunk free is
488 * deferred (it occurs after a transaction roll). Include the extra reservation
489 * anyways since we've had reports of ifree transaction overruns due to too many
490 * agfl fixups during inode chunk frees.
500 */ 491 */
501STATIC uint 492STATIC uint
502xfs_calc_ifree_reservation( 493xfs_calc_ifree_reservation(
@@ -504,15 +495,11 @@ xfs_calc_ifree_reservation(
504{ 495{
505 return XFS_DQUOT_LOGRES(mp) + 496 return XFS_DQUOT_LOGRES(mp) +
506 xfs_calc_inode_res(mp, 1) + 497 xfs_calc_inode_res(mp, 1) +
507 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 498 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
508 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
509 xfs_calc_iunlink_remove_reservation(mp) + 499 xfs_calc_iunlink_remove_reservation(mp) +
510 xfs_calc_buf_res(1, 0) + 500 xfs_calc_inode_chunk_res(mp, _FREE) +
511 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 501 xfs_calc_inobt_res(mp) +
512 mp->m_in_maxlevels, 0) + 502 xfs_calc_finobt_res(mp);
513 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
514 XFS_FSB_TO_B(mp, 1)) +
515 xfs_calc_finobt_res(mp, 0, 1);
516} 503}
517 504
518/* 505/*
@@ -842,7 +829,7 @@ xfs_trans_resv_calc(
842 resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; 829 resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
843 resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 830 resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
844 831
845 resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); 832 resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
846 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; 833 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
847 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 834 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
848 835
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 2a9b4f9e93c6..fd975524f460 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -32,30 +32,17 @@
32#include "xfs_inode.h" 32#include "xfs_inode.h"
33#include "xfs_alloc.h" 33#include "xfs_alloc.h"
34#include "xfs_ialloc.h" 34#include "xfs_ialloc.h"
35#include "xfs_rmap.h"
35#include "scrub/xfs_scrub.h" 36#include "scrub/xfs_scrub.h"
36#include "scrub/scrub.h" 37#include "scrub/scrub.h"
37#include "scrub/common.h" 38#include "scrub/common.h"
38#include "scrub/trace.h" 39#include "scrub/trace.h"
39 40
40/* 41/*
41 * Set up scrub to check all the static metadata in each AG. 42 * Walk all the blocks in the AGFL. The fn function can return any negative
42 * This means the SB, AGF, AGI, and AGFL headers. 43 * error code or XFS_BTREE_QUERY_RANGE_ABORT.
43 */ 44 */
44int 45int
45xfs_scrub_setup_ag_header(
46 struct xfs_scrub_context *sc,
47 struct xfs_inode *ip)
48{
49 struct xfs_mount *mp = sc->mp;
50
51 if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
52 sc->sm->sm_ino || sc->sm->sm_gen)
53 return -EINVAL;
54 return xfs_scrub_setup_fs(sc, ip);
55}
56
57/* Walk all the blocks in the AGFL. */
58int
59xfs_scrub_walk_agfl( 46xfs_scrub_walk_agfl(
60 struct xfs_scrub_context *sc, 47 struct xfs_scrub_context *sc,
61 int (*fn)(struct xfs_scrub_context *, 48 int (*fn)(struct xfs_scrub_context *,
@@ -115,6 +102,36 @@ xfs_scrub_walk_agfl(
115 102
116/* Superblock */ 103/* Superblock */
117 104
105/* Cross-reference with the other btrees. */
106STATIC void
107xfs_scrub_superblock_xref(
108 struct xfs_scrub_context *sc,
109 struct xfs_buf *bp)
110{
111 struct xfs_owner_info oinfo;
112 struct xfs_mount *mp = sc->mp;
113 xfs_agnumber_t agno = sc->sm->sm_agno;
114 xfs_agblock_t agbno;
115 int error;
116
117 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
118 return;
119
120 agbno = XFS_SB_BLOCK(mp);
121
122 error = xfs_scrub_ag_init(sc, agno, &sc->sa);
123 if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
124 return;
125
126 xfs_scrub_xref_is_used_space(sc, agbno, 1);
127 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
128 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
129 xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
130 xfs_scrub_xref_is_not_shared(sc, agbno, 1);
131
132 /* scrub teardown will take care of sc->sa for us */
133}
134
118/* 135/*
119 * Scrub the filesystem superblock. 136 * Scrub the filesystem superblock.
120 * 137 *
@@ -143,6 +160,22 @@ xfs_scrub_superblock(
143 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, 160 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
144 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 161 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
145 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); 162 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
163 /*
164 * The superblock verifier can return several different error codes
165 * if it thinks the superblock doesn't look right. For a mount these
166 * would all get bounced back to userspace, but if we're here then the
167 * fs mounted successfully, which means that this secondary superblock
168 * is simply incorrect. Treat all these codes the same way we treat
169 * any corruption.
170 */
171 switch (error) {
172 case -EINVAL: /* also -EWRONGFS */
173 case -ENOSYS:
174 case -EFBIG:
175 error = -EFSCORRUPTED;
176 default:
177 break;
178 }
146 if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error)) 179 if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
147 return error; 180 return error;
148 181
@@ -387,11 +420,175 @@ xfs_scrub_superblock(
387 BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) 420 BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
388 xfs_scrub_block_set_corrupt(sc, bp); 421 xfs_scrub_block_set_corrupt(sc, bp);
389 422
423 xfs_scrub_superblock_xref(sc, bp);
424
390 return error; 425 return error;
391} 426}
392 427
393/* AGF */ 428/* AGF */
394 429
430/* Tally freespace record lengths. */
431STATIC int
432xfs_scrub_agf_record_bno_lengths(
433 struct xfs_btree_cur *cur,
434 struct xfs_alloc_rec_incore *rec,
435 void *priv)
436{
437 xfs_extlen_t *blocks = priv;
438
439 (*blocks) += rec->ar_blockcount;
440 return 0;
441}
442
443/* Check agf_freeblks */
444static inline void
445xfs_scrub_agf_xref_freeblks(
446 struct xfs_scrub_context *sc)
447{
448 struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
449 xfs_extlen_t blocks = 0;
450 int error;
451
452 if (!sc->sa.bno_cur)
453 return;
454
455 error = xfs_alloc_query_all(sc->sa.bno_cur,
456 xfs_scrub_agf_record_bno_lengths, &blocks);
457 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
458 return;
459 if (blocks != be32_to_cpu(agf->agf_freeblks))
460 xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
461}
462
463/* Cross reference the AGF with the cntbt (freespace by length btree) */
464static inline void
465xfs_scrub_agf_xref_cntbt(
466 struct xfs_scrub_context *sc)
467{
468 struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
469 xfs_agblock_t agbno;
470 xfs_extlen_t blocks;
471 int have;
472 int error;
473
474 if (!sc->sa.cnt_cur)
475 return;
476
477 /* Any freespace at all? */
478 error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have);
479 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
480 return;
481 if (!have) {
482 if (agf->agf_freeblks != be32_to_cpu(0))
483 xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
484 return;
485 }
486
487 /* Check agf_longest */
488 error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have);
489 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
490 return;
491 if (!have || blocks != be32_to_cpu(agf->agf_longest))
492 xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
493}
494
495/* Check the btree block counts in the AGF against the btrees. */
496STATIC void
497xfs_scrub_agf_xref_btreeblks(
498 struct xfs_scrub_context *sc)
499{
500 struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
501 struct xfs_mount *mp = sc->mp;
502 xfs_agblock_t blocks;
503 xfs_agblock_t btreeblks;
504 int error;
505
506 /* Check agf_rmap_blocks; set up for agf_btreeblks check */
507 if (sc->sa.rmap_cur) {
508 error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks);
509 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
510 return;
511 btreeblks = blocks - 1;
512 if (blocks != be32_to_cpu(agf->agf_rmap_blocks))
513 xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
514 } else {
515 btreeblks = 0;
516 }
517
518 /*
519 * No rmap cursor; we can't xref if we have the rmapbt feature.
520 * We also can't do it if we're missing the free space btree cursors.
521 */
522 if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) ||
523 !sc->sa.bno_cur || !sc->sa.cnt_cur)
524 return;
525
526 /* Check agf_btreeblks */
527 error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
528 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
529 return;
530 btreeblks += blocks - 1;
531
532 error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
533 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
534 return;
535 btreeblks += blocks - 1;
536
537 if (btreeblks != be32_to_cpu(agf->agf_btreeblks))
538 xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
539}
540
541/* Check agf_refcount_blocks against tree size */
542static inline void
543xfs_scrub_agf_xref_refcblks(
544 struct xfs_scrub_context *sc)
545{
546 struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
547 xfs_agblock_t blocks;
548 int error;
549
550 if (!sc->sa.refc_cur)
551 return;
552
553 error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks);
554 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
555 return;
556 if (blocks != be32_to_cpu(agf->agf_refcount_blocks))
557 xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
558}
559
560/* Cross-reference with the other btrees. */
561STATIC void
562xfs_scrub_agf_xref(
563 struct xfs_scrub_context *sc)
564{
565 struct xfs_owner_info oinfo;
566 struct xfs_mount *mp = sc->mp;
567 xfs_agblock_t agbno;
568 int error;
569
570 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
571 return;
572
573 agbno = XFS_AGF_BLOCK(mp);
574
575 error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
576 if (error)
577 return;
578
579 xfs_scrub_xref_is_used_space(sc, agbno, 1);
580 xfs_scrub_agf_xref_freeblks(sc);
581 xfs_scrub_agf_xref_cntbt(sc);
582 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
583 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
584 xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
585 xfs_scrub_agf_xref_btreeblks(sc);
586 xfs_scrub_xref_is_not_shared(sc, agbno, 1);
587 xfs_scrub_agf_xref_refcblks(sc);
588
589 /* scrub teardown will take care of sc->sa for us */
590}
591
395/* Scrub the AGF. */ 592/* Scrub the AGF. */
396int 593int
397xfs_scrub_agf( 594xfs_scrub_agf(
@@ -414,6 +611,7 @@ xfs_scrub_agf(
414 &sc->sa.agf_bp, &sc->sa.agfl_bp); 611 &sc->sa.agf_bp, &sc->sa.agfl_bp);
415 if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error)) 612 if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
416 goto out; 613 goto out;
614 xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp);
417 615
418 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 616 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
419 617
@@ -470,6 +668,7 @@ xfs_scrub_agf(
470 if (agfl_count != 0 && fl_count != agfl_count) 668 if (agfl_count != 0 && fl_count != agfl_count)
471 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); 669 xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
472 670
671 xfs_scrub_agf_xref(sc);
473out: 672out:
474 return error; 673 return error;
475} 674}
@@ -477,11 +676,28 @@ out:
477/* AGFL */ 676/* AGFL */
478 677
479struct xfs_scrub_agfl_info { 678struct xfs_scrub_agfl_info {
679 struct xfs_owner_info oinfo;
480 unsigned int sz_entries; 680 unsigned int sz_entries;
481 unsigned int nr_entries; 681 unsigned int nr_entries;
482 xfs_agblock_t *entries; 682 xfs_agblock_t *entries;
483}; 683};
484 684
685/* Cross-reference with the other btrees. */
686STATIC void
687xfs_scrub_agfl_block_xref(
688 struct xfs_scrub_context *sc,
689 xfs_agblock_t agbno,
690 struct xfs_owner_info *oinfo)
691{
692 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
693 return;
694
695 xfs_scrub_xref_is_used_space(sc, agbno, 1);
696 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
697 xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo);
698 xfs_scrub_xref_is_not_shared(sc, agbno, 1);
699}
700
485/* Scrub an AGFL block. */ 701/* Scrub an AGFL block. */
486STATIC int 702STATIC int
487xfs_scrub_agfl_block( 703xfs_scrub_agfl_block(
@@ -499,6 +715,8 @@ xfs_scrub_agfl_block(
499 else 715 else
500 xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp); 716 xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp);
501 717
718 xfs_scrub_agfl_block_xref(sc, agbno, priv);
719
502 return 0; 720 return 0;
503} 721}
504 722
@@ -513,6 +731,37 @@ xfs_scrub_agblock_cmp(
513 return (int)*a - (int)*b; 731 return (int)*a - (int)*b;
514} 732}
515 733
734/* Cross-reference with the other btrees. */
735STATIC void
736xfs_scrub_agfl_xref(
737 struct xfs_scrub_context *sc)
738{
739 struct xfs_owner_info oinfo;
740 struct xfs_mount *mp = sc->mp;
741 xfs_agblock_t agbno;
742 int error;
743
744 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
745 return;
746
747 agbno = XFS_AGFL_BLOCK(mp);
748
749 error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
750 if (error)
751 return;
752
753 xfs_scrub_xref_is_used_space(sc, agbno, 1);
754 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
755 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
756 xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
757 xfs_scrub_xref_is_not_shared(sc, agbno, 1);
758
759 /*
760 * Scrub teardown will take care of sc->sa for us. Leave sc->sa
761 * active so that the agfl block xref can use it too.
762 */
763}
764
516/* Scrub the AGFL. */ 765/* Scrub the AGFL. */
517int 766int
518xfs_scrub_agfl( 767xfs_scrub_agfl(
@@ -532,6 +781,12 @@ xfs_scrub_agfl(
532 goto out; 781 goto out;
533 if (!sc->sa.agf_bp) 782 if (!sc->sa.agf_bp)
534 return -EFSCORRUPTED; 783 return -EFSCORRUPTED;
784 xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp);
785
786 xfs_scrub_agfl_xref(sc);
787
788 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
789 goto out;
535 790
536 /* Allocate buffer to ensure uniqueness of AGFL entries. */ 791 /* Allocate buffer to ensure uniqueness of AGFL entries. */
537 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp); 792 agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -548,6 +803,7 @@ xfs_scrub_agfl(
548 } 803 }
549 804
550 /* Check the blocks in the AGFL. */ 805 /* Check the blocks in the AGFL. */
806 xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
551 error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai); 807 error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai);
552 if (error) 808 if (error)
553 goto out_free; 809 goto out_free;
@@ -575,6 +831,56 @@ out:
575 831
576/* AGI */ 832/* AGI */
577 833
834/* Check agi_count/agi_freecount */
835static inline void
836xfs_scrub_agi_xref_icounts(
837 struct xfs_scrub_context *sc)
838{
839 struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
840 xfs_agino_t icount;
841 xfs_agino_t freecount;
842 int error;
843
844 if (!sc->sa.ino_cur)
845 return;
846
847 error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount);
848 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur))
849 return;
850 if (be32_to_cpu(agi->agi_count) != icount ||
851 be32_to_cpu(agi->agi_freecount) != freecount)
852 xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp);
853}
854
855/* Cross-reference with the other btrees. */
856STATIC void
857xfs_scrub_agi_xref(
858 struct xfs_scrub_context *sc)
859{
860 struct xfs_owner_info oinfo;
861 struct xfs_mount *mp = sc->mp;
862 xfs_agblock_t agbno;
863 int error;
864
865 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
866 return;
867
868 agbno = XFS_AGI_BLOCK(mp);
869
870 error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
871 if (error)
872 return;
873
874 xfs_scrub_xref_is_used_space(sc, agbno, 1);
875 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
876 xfs_scrub_agi_xref_icounts(sc);
877 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
878 xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
879 xfs_scrub_xref_is_not_shared(sc, agbno, 1);
880
881 /* scrub teardown will take care of sc->sa for us */
882}
883
578/* Scrub the AGI. */ 884/* Scrub the AGI. */
579int 885int
580xfs_scrub_agi( 886xfs_scrub_agi(
@@ -598,6 +904,7 @@ xfs_scrub_agi(
598 &sc->sa.agf_bp, &sc->sa.agfl_bp); 904 &sc->sa.agf_bp, &sc->sa.agfl_bp);
599 if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error)) 905 if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
600 goto out; 906 goto out;
907 xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp);
601 908
602 agi = XFS_BUF_TO_AGI(sc->sa.agi_bp); 909 agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
603 910
@@ -653,6 +960,7 @@ xfs_scrub_agi(
653 if (agi->agi_pad32 != cpu_to_be32(0)) 960 if (agi->agi_pad32 != cpu_to_be32(0))
654 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp); 961 xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
655 962
963 xfs_scrub_agi_xref(sc);
656out: 964out:
657 return error; 965 return error;
658} 966}
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 059663e13414..517c079d3f68 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -31,6 +31,7 @@
31#include "xfs_sb.h" 31#include "xfs_sb.h"
32#include "xfs_alloc.h" 32#include "xfs_alloc.h"
33#include "xfs_rmap.h" 33#include "xfs_rmap.h"
34#include "xfs_alloc.h"
34#include "scrub/xfs_scrub.h" 35#include "scrub/xfs_scrub.h"
35#include "scrub/scrub.h" 36#include "scrub/scrub.h"
36#include "scrub/common.h" 37#include "scrub/common.h"
@@ -49,6 +50,64 @@ xfs_scrub_setup_ag_allocbt(
49} 50}
50 51
51/* Free space btree scrubber. */ 52/* Free space btree scrubber. */
53/*
54 * Ensure there's a corresponding cntbt/bnobt record matching this
55 * bnobt/cntbt record, respectively.
56 */
57STATIC void
58xfs_scrub_allocbt_xref_other(
59 struct xfs_scrub_context *sc,
60 xfs_agblock_t agbno,
61 xfs_extlen_t len)
62{
63 struct xfs_btree_cur **pcur;
64 xfs_agblock_t fbno;
65 xfs_extlen_t flen;
66 int has_otherrec;
67 int error;
68
69 if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
70 pcur = &sc->sa.cnt_cur;
71 else
72 pcur = &sc->sa.bno_cur;
73 if (!*pcur)
74 return;
75
76 error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec);
77 if (!xfs_scrub_should_check_xref(sc, &error, pcur))
78 return;
79 if (!has_otherrec) {
80 xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
81 return;
82 }
83
84 error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec);
85 if (!xfs_scrub_should_check_xref(sc, &error, pcur))
86 return;
87 if (!has_otherrec) {
88 xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
89 return;
90 }
91
92 if (fbno != agbno || flen != len)
93 xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
94}
95
96/* Cross-reference with the other btrees. */
97STATIC void
98xfs_scrub_allocbt_xref(
99 struct xfs_scrub_context *sc,
100 xfs_agblock_t agbno,
101 xfs_extlen_t len)
102{
103 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
104 return;
105
106 xfs_scrub_allocbt_xref_other(sc, agbno, len);
107 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
108 xfs_scrub_xref_has_no_owner(sc, agbno, len);
109 xfs_scrub_xref_is_not_shared(sc, agbno, len);
110}
52 111
53/* Scrub a bnobt/cntbt record. */ 112/* Scrub a bnobt/cntbt record. */
54STATIC int 113STATIC int
@@ -70,6 +129,8 @@ xfs_scrub_allocbt_rec(
70 !xfs_verify_agbno(mp, agno, bno + len - 1)) 129 !xfs_verify_agbno(mp, agno, bno + len - 1))
71 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 130 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
72 131
132 xfs_scrub_allocbt_xref(bs->sc, bno, len);
133
73 return error; 134 return error;
74} 135}
75 136
@@ -100,3 +161,23 @@ xfs_scrub_cntbt(
100{ 161{
101 return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT); 162 return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
102} 163}
164
165/* xref check that the extent is not free */
166void
167xfs_scrub_xref_is_used_space(
168 struct xfs_scrub_context *sc,
169 xfs_agblock_t agbno,
170 xfs_extlen_t len)
171{
172 bool is_freesp;
173 int error;
174
175 if (!sc->sa.bno_cur)
176 return;
177
178 error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp);
179 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
180 return;
181 if (is_freesp)
182 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0);
183}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 42fec0bcd9e1..d00282130492 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -37,6 +37,7 @@
37#include "xfs_bmap_util.h" 37#include "xfs_bmap_util.h"
38#include "xfs_bmap_btree.h" 38#include "xfs_bmap_btree.h"
39#include "xfs_rmap.h" 39#include "xfs_rmap.h"
40#include "xfs_refcount.h"
40#include "scrub/xfs_scrub.h" 41#include "scrub/xfs_scrub.h"
41#include "scrub/scrub.h" 42#include "scrub/scrub.h"
42#include "scrub/common.h" 43#include "scrub/common.h"
@@ -99,6 +100,201 @@ struct xfs_scrub_bmap_info {
99 int whichfork; 100 int whichfork;
100}; 101};
101 102
103/* Look for a corresponding rmap for this irec. */
104static inline bool
105xfs_scrub_bmap_get_rmap(
106 struct xfs_scrub_bmap_info *info,
107 struct xfs_bmbt_irec *irec,
108 xfs_agblock_t agbno,
109 uint64_t owner,
110 struct xfs_rmap_irec *rmap)
111{
112 xfs_fileoff_t offset;
113 unsigned int rflags = 0;
114 int has_rmap;
115 int error;
116
117 if (info->whichfork == XFS_ATTR_FORK)
118 rflags |= XFS_RMAP_ATTR_FORK;
119
120 /*
121 * CoW staging extents are owned (on disk) by the refcountbt, so
122 * their rmaps do not have offsets.
123 */
124 if (info->whichfork == XFS_COW_FORK)
125 offset = 0;
126 else
127 offset = irec->br_startoff;
128
129 /*
130 * If the caller thinks this could be a shared bmbt extent (IOWs,
131 * any data fork extent of a reflink inode) then we have to use the
132 * range rmap lookup to make sure we get the correct owner/offset.
133 */
134 if (info->is_shared) {
135 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
136 owner, offset, rflags, rmap, &has_rmap);
137 if (!xfs_scrub_should_check_xref(info->sc, &error,
138 &info->sc->sa.rmap_cur))
139 return false;
140 goto out;
141 }
142
143 /*
144 * Otherwise, use the (faster) regular lookup.
145 */
146 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
147 offset, rflags, &has_rmap);
148 if (!xfs_scrub_should_check_xref(info->sc, &error,
149 &info->sc->sa.rmap_cur))
150 return false;
151 if (!has_rmap)
152 goto out;
153
154 error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
155 if (!xfs_scrub_should_check_xref(info->sc, &error,
156 &info->sc->sa.rmap_cur))
157 return false;
158
159out:
160 if (!has_rmap)
161 xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
162 irec->br_startoff);
163 return has_rmap;
164}
165
166/* Make sure that we have rmapbt records for this extent. */
167STATIC void
168xfs_scrub_bmap_xref_rmap(
169 struct xfs_scrub_bmap_info *info,
170 struct xfs_bmbt_irec *irec,
171 xfs_agblock_t agbno)
172{
173 struct xfs_rmap_irec rmap;
174 unsigned long long rmap_end;
175 uint64_t owner;
176
177 if (!info->sc->sa.rmap_cur)
178 return;
179
180 if (info->whichfork == XFS_COW_FORK)
181 owner = XFS_RMAP_OWN_COW;
182 else
183 owner = info->sc->ip->i_ino;
184
185 /* Find the rmap record for this irec. */
186 if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap))
187 return;
188
189 /* Check the rmap. */
190 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
191 if (rmap.rm_startblock > agbno ||
192 agbno + irec->br_blockcount > rmap_end)
193 xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
194 irec->br_startoff);
195
196 /*
197 * Check the logical offsets if applicable. CoW staging extents
198 * don't track logical offsets since the mappings only exist in
199 * memory.
200 */
201 if (info->whichfork != XFS_COW_FORK) {
202 rmap_end = (unsigned long long)rmap.rm_offset +
203 rmap.rm_blockcount;
204 if (rmap.rm_offset > irec->br_startoff ||
205 irec->br_startoff + irec->br_blockcount > rmap_end)
206 xfs_scrub_fblock_xref_set_corrupt(info->sc,
207 info->whichfork, irec->br_startoff);
208 }
209
210 if (rmap.rm_owner != owner)
211 xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
212 irec->br_startoff);
213
214 /*
215 * Check for discrepancies between the unwritten flag in the irec and
216 * the rmap. Note that the (in-memory) CoW fork distinguishes between
217 * unwritten and written extents, but we don't track that in the rmap
218 * records because the blocks are owned (on-disk) by the refcountbt,
219 * which doesn't track unwritten state.
220 */
221 if (owner != XFS_RMAP_OWN_COW &&
222 irec->br_state == XFS_EXT_UNWRITTEN &&
223 !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
224 xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
225 irec->br_startoff);
226
227 if (info->whichfork == XFS_ATTR_FORK &&
228 !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
229 xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
230 irec->br_startoff);
231 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
232 xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
233 irec->br_startoff);
234}
235
236/* Cross-reference a single rtdev extent record. */
237STATIC void
238xfs_scrub_bmap_rt_extent_xref(
239 struct xfs_scrub_bmap_info *info,
240 struct xfs_inode *ip,
241 struct xfs_btree_cur *cur,
242 struct xfs_bmbt_irec *irec)
243{
244 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
245 return;
246
247 xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock,
248 irec->br_blockcount);
249}
250
251/* Cross-reference a single datadev extent record. */
252STATIC void
253xfs_scrub_bmap_extent_xref(
254 struct xfs_scrub_bmap_info *info,
255 struct xfs_inode *ip,
256 struct xfs_btree_cur *cur,
257 struct xfs_bmbt_irec *irec)
258{
259 struct xfs_mount *mp = info->sc->mp;
260 xfs_agnumber_t agno;
261 xfs_agblock_t agbno;
262 xfs_extlen_t len;
263 int error;
264
265 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
266 return;
267
268 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
269 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
270 len = irec->br_blockcount;
271
272 error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa);
273 if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork,
274 irec->br_startoff, &error))
275 return;
276
277 xfs_scrub_xref_is_used_space(info->sc, agbno, len);
278 xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len);
279 xfs_scrub_bmap_xref_rmap(info, irec, agbno);
280 switch (info->whichfork) {
281 case XFS_DATA_FORK:
282 if (xfs_is_reflink_inode(info->sc->ip))
283 break;
284 /* fall through */
285 case XFS_ATTR_FORK:
286 xfs_scrub_xref_is_not_shared(info->sc, agbno,
287 irec->br_blockcount);
288 break;
289 case XFS_COW_FORK:
290 xfs_scrub_xref_is_cow_staging(info->sc, agbno,
291 irec->br_blockcount);
292 break;
293 }
294
295 xfs_scrub_ag_free(info->sc, &info->sc->sa);
296}
297
102/* Scrub a single extent record. */ 298/* Scrub a single extent record. */
103STATIC int 299STATIC int
104xfs_scrub_bmap_extent( 300xfs_scrub_bmap_extent(
@@ -109,6 +305,7 @@ xfs_scrub_bmap_extent(
109{ 305{
110 struct xfs_mount *mp = info->sc->mp; 306 struct xfs_mount *mp = info->sc->mp;
111 struct xfs_buf *bp = NULL; 307 struct xfs_buf *bp = NULL;
308 xfs_filblks_t end;
112 int error = 0; 309 int error = 0;
113 310
114 if (cur) 311 if (cur)
@@ -136,19 +333,23 @@ xfs_scrub_bmap_extent(
136 irec->br_startoff); 333 irec->br_startoff);
137 334
138 /* Make sure the extent points to a valid place. */ 335 /* Make sure the extent points to a valid place. */
336 if (irec->br_blockcount > MAXEXTLEN)
337 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
338 irec->br_startoff);
139 if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) 339 if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
140 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 340 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
141 irec->br_startoff); 341 irec->br_startoff);
342 end = irec->br_startblock + irec->br_blockcount - 1;
142 if (info->is_rt && 343 if (info->is_rt &&
143 (!xfs_verify_rtbno(mp, irec->br_startblock) || 344 (!xfs_verify_rtbno(mp, irec->br_startblock) ||
144 !xfs_verify_rtbno(mp, irec->br_startblock + 345 !xfs_verify_rtbno(mp, end)))
145 irec->br_blockcount - 1)))
146 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 346 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
147 irec->br_startoff); 347 irec->br_startoff);
148 if (!info->is_rt && 348 if (!info->is_rt &&
149 (!xfs_verify_fsbno(mp, irec->br_startblock) || 349 (!xfs_verify_fsbno(mp, irec->br_startblock) ||
150 !xfs_verify_fsbno(mp, irec->br_startblock + 350 !xfs_verify_fsbno(mp, end) ||
151 irec->br_blockcount - 1))) 351 XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
352 XFS_FSB_TO_AGNO(mp, end)))
152 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 353 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
153 irec->br_startoff); 354 irec->br_startoff);
154 355
@@ -158,6 +359,11 @@ xfs_scrub_bmap_extent(
158 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork, 359 xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
159 irec->br_startoff); 360 irec->br_startoff);
160 361
362 if (info->is_rt)
363 xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec);
364 else
365 xfs_scrub_bmap_extent_xref(info, ip, cur, irec);
366
161 info->lastoff = irec->br_startoff + irec->br_blockcount; 367 info->lastoff = irec->br_startoff + irec->br_blockcount;
162 return error; 368 return error;
163} 369}
@@ -235,7 +441,6 @@ xfs_scrub_bmap(
235 struct xfs_ifork *ifp; 441 struct xfs_ifork *ifp;
236 xfs_fileoff_t endoff; 442 xfs_fileoff_t endoff;
237 struct xfs_iext_cursor icur; 443 struct xfs_iext_cursor icur;
238 bool found;
239 int error = 0; 444 int error = 0;
240 445
241 ifp = XFS_IFORK_PTR(ip, whichfork); 446 ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -314,9 +519,7 @@ xfs_scrub_bmap(
314 /* Scrub extent records. */ 519 /* Scrub extent records. */
315 info.lastoff = 0; 520 info.lastoff = 0;
316 ifp = XFS_IFORK_PTR(ip, whichfork); 521 ifp = XFS_IFORK_PTR(ip, whichfork);
317 for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec); 522 for_each_xfs_iext(ifp, &icur, &irec) {
318 found != 0;
319 found = xfs_iext_next_extent(ifp, &icur, &irec)) {
320 if (xfs_scrub_should_terminate(sc, &error)) 523 if (xfs_scrub_should_terminate(sc, &error))
321 break; 524 break;
322 if (isnullstartblock(irec.br_startblock)) 525 if (isnullstartblock(irec.br_startblock))
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index df0766132ace..54218168c8f9 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -42,12 +42,14 @@
42 * Check for btree operation errors. See the section about handling 42 * Check for btree operation errors. See the section about handling
43 * operational errors in common.c. 43 * operational errors in common.c.
44 */ 44 */
45bool 45static bool
46xfs_scrub_btree_process_error( 46__xfs_scrub_btree_process_error(
47 struct xfs_scrub_context *sc, 47 struct xfs_scrub_context *sc,
48 struct xfs_btree_cur *cur, 48 struct xfs_btree_cur *cur,
49 int level, 49 int level,
50 int *error) 50 int *error,
51 __u32 errflag,
52 void *ret_ip)
51{ 53{
52 if (*error == 0) 54 if (*error == 0)
53 return true; 55 return true;
@@ -60,36 +62,80 @@ xfs_scrub_btree_process_error(
60 case -EFSBADCRC: 62 case -EFSBADCRC:
61 case -EFSCORRUPTED: 63 case -EFSCORRUPTED:
62 /* Note the badness but don't abort. */ 64 /* Note the badness but don't abort. */
63 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 65 sc->sm->sm_flags |= errflag;
64 *error = 0; 66 *error = 0;
65 /* fall through */ 67 /* fall through */
66 default: 68 default:
67 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 69 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
68 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, 70 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
69 *error, __return_address); 71 *error, ret_ip);
70 else 72 else
71 trace_xfs_scrub_btree_op_error(sc, cur, level, 73 trace_xfs_scrub_btree_op_error(sc, cur, level,
72 *error, __return_address); 74 *error, ret_ip);
73 break; 75 break;
74 } 76 }
75 return false; 77 return false;
76} 78}
77 79
80bool
81xfs_scrub_btree_process_error(
82 struct xfs_scrub_context *sc,
83 struct xfs_btree_cur *cur,
84 int level,
85 int *error)
86{
87 return __xfs_scrub_btree_process_error(sc, cur, level, error,
88 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
89}
90
91bool
92xfs_scrub_btree_xref_process_error(
93 struct xfs_scrub_context *sc,
94 struct xfs_btree_cur *cur,
95 int level,
96 int *error)
97{
98 return __xfs_scrub_btree_process_error(sc, cur, level, error,
99 XFS_SCRUB_OFLAG_XFAIL, __return_address);
100}
101
78/* Record btree block corruption. */ 102/* Record btree block corruption. */
79void 103static void
80xfs_scrub_btree_set_corrupt( 104__xfs_scrub_btree_set_corrupt(
81 struct xfs_scrub_context *sc, 105 struct xfs_scrub_context *sc,
82 struct xfs_btree_cur *cur, 106 struct xfs_btree_cur *cur,
83 int level) 107 int level,
108 __u32 errflag,
109 void *ret_ip)
84{ 110{
85 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 111 sc->sm->sm_flags |= errflag;
86 112
87 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 113 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
88 trace_xfs_scrub_ifork_btree_error(sc, cur, level, 114 trace_xfs_scrub_ifork_btree_error(sc, cur, level,
89 __return_address); 115 ret_ip);
90 else 116 else
91 trace_xfs_scrub_btree_error(sc, cur, level, 117 trace_xfs_scrub_btree_error(sc, cur, level,
92 __return_address); 118 ret_ip);
119}
120
121void
122xfs_scrub_btree_set_corrupt(
123 struct xfs_scrub_context *sc,
124 struct xfs_btree_cur *cur,
125 int level)
126{
127 __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
128 __return_address);
129}
130
131void
132xfs_scrub_btree_xref_set_corrupt(
133 struct xfs_scrub_context *sc,
134 struct xfs_btree_cur *cur,
135 int level)
136{
137 __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
138 __return_address);
93} 139}
94 140
95/* 141/*
@@ -268,6 +314,8 @@ xfs_scrub_btree_block_check_sibling(
268 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); 314 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
269 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) 315 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
270 goto out; 316 goto out;
317 if (pbp)
318 xfs_scrub_buffer_recheck(bs->sc, pbp);
271 319
272 if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) 320 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
273 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 321 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
@@ -315,6 +363,97 @@ out:
315 return error; 363 return error;
316} 364}
317 365
366struct check_owner {
367 struct list_head list;
368 xfs_daddr_t daddr;
369 int level;
370};
371
372/*
373 * Make sure this btree block isn't in the free list and that there's
374 * an rmap record for it.
375 */
376STATIC int
377xfs_scrub_btree_check_block_owner(
378 struct xfs_scrub_btree *bs,
379 int level,
380 xfs_daddr_t daddr)
381{
382 xfs_agnumber_t agno;
383 xfs_agblock_t agbno;
384 xfs_btnum_t btnum;
385 bool init_sa;
386 int error = 0;
387
388 if (!bs->cur)
389 return 0;
390
391 btnum = bs->cur->bc_btnum;
392 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
393 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
394
395 init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
396 if (init_sa) {
397 error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa);
398 if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur,
399 level, &error))
400 return error;
401 }
402
403 xfs_scrub_xref_is_used_space(bs->sc, agbno, 1);
404 /*
405 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
406 * have to nullify it (to shut down further block owner checks) if
407 * self-xref encounters problems.
408 */
409 if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
410 bs->cur = NULL;
411
412 xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
413 if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
414 bs->cur = NULL;
415
416 if (init_sa)
417 xfs_scrub_ag_free(bs->sc, &bs->sc->sa);
418
419 return error;
420}
421
422/* Check the owner of a btree block. */
423STATIC int
424xfs_scrub_btree_check_owner(
425 struct xfs_scrub_btree *bs,
426 int level,
427 struct xfs_buf *bp)
428{
429 struct xfs_btree_cur *cur = bs->cur;
430 struct check_owner *co;
431
432 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
433 return 0;
434
435 /*
436 * We want to cross-reference each btree block with the bnobt
437 * and the rmapbt. We cannot cross-reference the bnobt or
438 * rmapbt while scanning the bnobt or rmapbt, respectively,
439 * because we cannot alter the cursor and we'd prefer not to
440 * duplicate cursors. Therefore, save the buffer daddr for
441 * later scanning.
442 */
443 if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
444 co = kmem_alloc(sizeof(struct check_owner),
445 KM_MAYFAIL | KM_NOFS);
446 if (!co)
447 return -ENOMEM;
448 co->level = level;
449 co->daddr = XFS_BUF_ADDR(bp);
450 list_add_tail(&co->list, &bs->to_check);
451 return 0;
452 }
453
454 return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
455}
456
318/* 457/*
319 * Grab and scrub a btree block given a btree pointer. Returns block 458 * Grab and scrub a btree block given a btree pointer. Returns block
320 * and buffer pointers (if applicable) if they're ok to use. 459 * and buffer pointers (if applicable) if they're ok to use.
@@ -349,6 +488,16 @@ xfs_scrub_btree_get_block(
349 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); 488 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
350 return 0; 489 return 0;
351 } 490 }
491 if (*pbp)
492 xfs_scrub_buffer_recheck(bs->sc, *pbp);
493
494 /*
495 * Check the block's owner; this function absorbs error codes
496 * for us.
497 */
498 error = xfs_scrub_btree_check_owner(bs, level, *pbp);
499 if (error)
500 return error;
352 501
353 /* 502 /*
354 * Check the block's siblings; this function absorbs error codes 503 * Check the block's siblings; this function absorbs error codes
@@ -421,6 +570,8 @@ xfs_scrub_btree(
421 struct xfs_btree_block *block; 570 struct xfs_btree_block *block;
422 int level; 571 int level;
423 struct xfs_buf *bp; 572 struct xfs_buf *bp;
573 struct check_owner *co;
574 struct check_owner *n;
424 int i; 575 int i;
425 int error = 0; 576 int error = 0;
426 577
@@ -512,5 +663,14 @@ xfs_scrub_btree(
512 } 663 }
513 664
514out: 665out:
666 /* Process deferred owner checks on btree blocks. */
667 list_for_each_entry_safe(co, n, &bs.to_check, list) {
668 if (!error && bs.cur)
669 error = xfs_scrub_btree_check_block_owner(&bs,
670 co->level, co->daddr);
671 list_del(&co->list);
672 kmem_free(co);
673 }
674
515 return error; 675 return error;
516} 676}
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index 4de825a626d1..e2b868ede70b 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -26,10 +26,19 @@
26bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc, 26bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc,
27 struct xfs_btree_cur *cur, int level, int *error); 27 struct xfs_btree_cur *cur, int level, int *error);
28 28
29/* Check for btree xref operation errors. */
30bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc,
31 struct xfs_btree_cur *cur, int level,
32 int *error);
33
29/* Check for btree corruption. */ 34/* Check for btree corruption. */
30void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc, 35void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc,
31 struct xfs_btree_cur *cur, int level); 36 struct xfs_btree_cur *cur, int level);
32 37
38/* Check for btree xref discrepancies. */
39void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc,
40 struct xfs_btree_cur *cur, int level);
41
33struct xfs_scrub_btree; 42struct xfs_scrub_btree;
34typedef int (*xfs_scrub_btree_rec_fn)( 43typedef int (*xfs_scrub_btree_rec_fn)(
35 struct xfs_scrub_btree *bs, 44 struct xfs_scrub_btree *bs,
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index ac95fe911d96..8033ab9d8f47 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -78,12 +78,14 @@
78 */ 78 */
79 79
80/* Check for operational errors. */ 80/* Check for operational errors. */
81bool 81static bool
82xfs_scrub_process_error( 82__xfs_scrub_process_error(
83 struct xfs_scrub_context *sc, 83 struct xfs_scrub_context *sc,
84 xfs_agnumber_t agno, 84 xfs_agnumber_t agno,
85 xfs_agblock_t bno, 85 xfs_agblock_t bno,
86 int *error) 86 int *error,
87 __u32 errflag,
88 void *ret_ip)
87{ 89{
88 switch (*error) { 90 switch (*error) {
89 case 0: 91 case 0:
@@ -95,24 +97,48 @@ xfs_scrub_process_error(
95 case -EFSBADCRC: 97 case -EFSBADCRC:
96 case -EFSCORRUPTED: 98 case -EFSCORRUPTED:
97 /* Note the badness but don't abort. */ 99 /* Note the badness but don't abort. */
98 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 100 sc->sm->sm_flags |= errflag;
99 *error = 0; 101 *error = 0;
100 /* fall through */ 102 /* fall through */
101 default: 103 default:
102 trace_xfs_scrub_op_error(sc, agno, bno, *error, 104 trace_xfs_scrub_op_error(sc, agno, bno, *error,
103 __return_address); 105 ret_ip);
104 break; 106 break;
105 } 107 }
106 return false; 108 return false;
107} 109}
108 110
109/* Check for operational errors for a file offset. */
110bool 111bool
111xfs_scrub_fblock_process_error( 112xfs_scrub_process_error(
113 struct xfs_scrub_context *sc,
114 xfs_agnumber_t agno,
115 xfs_agblock_t bno,
116 int *error)
117{
118 return __xfs_scrub_process_error(sc, agno, bno, error,
119 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
120}
121
122bool
123xfs_scrub_xref_process_error(
124 struct xfs_scrub_context *sc,
125 xfs_agnumber_t agno,
126 xfs_agblock_t bno,
127 int *error)
128{
129 return __xfs_scrub_process_error(sc, agno, bno, error,
130 XFS_SCRUB_OFLAG_XFAIL, __return_address);
131}
132
133/* Check for operational errors for a file offset. */
134static bool
135__xfs_scrub_fblock_process_error(
112 struct xfs_scrub_context *sc, 136 struct xfs_scrub_context *sc,
113 int whichfork, 137 int whichfork,
114 xfs_fileoff_t offset, 138 xfs_fileoff_t offset,
115 int *error) 139 int *error,
140 __u32 errflag,
141 void *ret_ip)
116{ 142{
117 switch (*error) { 143 switch (*error) {
118 case 0: 144 case 0:
@@ -124,17 +150,39 @@ xfs_scrub_fblock_process_error(
124 case -EFSBADCRC: 150 case -EFSBADCRC:
125 case -EFSCORRUPTED: 151 case -EFSCORRUPTED:
126 /* Note the badness but don't abort. */ 152 /* Note the badness but don't abort. */
127 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 153 sc->sm->sm_flags |= errflag;
128 *error = 0; 154 *error = 0;
129 /* fall through */ 155 /* fall through */
130 default: 156 default:
131 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, 157 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
132 __return_address); 158 ret_ip);
133 break; 159 break;
134 } 160 }
135 return false; 161 return false;
136} 162}
137 163
164bool
165xfs_scrub_fblock_process_error(
166 struct xfs_scrub_context *sc,
167 int whichfork,
168 xfs_fileoff_t offset,
169 int *error)
170{
171 return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
172 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
173}
174
175bool
176xfs_scrub_fblock_xref_process_error(
177 struct xfs_scrub_context *sc,
178 int whichfork,
179 xfs_fileoff_t offset,
180 int *error)
181{
182 return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
183 XFS_SCRUB_OFLAG_XFAIL, __return_address);
184}
185
138/* 186/*
139 * Handling scrub corruption/optimization/warning checks. 187 * Handling scrub corruption/optimization/warning checks.
140 * 188 *
@@ -183,6 +231,16 @@ xfs_scrub_block_set_corrupt(
183 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); 231 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
184} 232}
185 233
234/* Record a corruption while cross-referencing. */
235void
236xfs_scrub_block_xref_set_corrupt(
237 struct xfs_scrub_context *sc,
238 struct xfs_buf *bp)
239{
240 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
241 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
242}
243
186/* 244/*
187 * Record a corrupt inode. The trace data will include the block given 245 * Record a corrupt inode. The trace data will include the block given
188 * by bp if bp is given; otherwise it will use the block location of the 246 * by bp if bp is given; otherwise it will use the block location of the
@@ -198,6 +256,17 @@ xfs_scrub_ino_set_corrupt(
198 trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); 256 trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
199} 257}
200 258
259/* Record a corruption while cross-referencing with an inode. */
260void
261xfs_scrub_ino_xref_set_corrupt(
262 struct xfs_scrub_context *sc,
263 xfs_ino_t ino,
264 struct xfs_buf *bp)
265{
266 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
267 trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
268}
269
201/* Record corruption in a block indexed by a file fork. */ 270/* Record corruption in a block indexed by a file fork. */
202void 271void
203xfs_scrub_fblock_set_corrupt( 272xfs_scrub_fblock_set_corrupt(
@@ -209,6 +278,17 @@ xfs_scrub_fblock_set_corrupt(
209 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); 278 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
210} 279}
211 280
281/* Record a corruption while cross-referencing a fork block. */
282void
283xfs_scrub_fblock_xref_set_corrupt(
284 struct xfs_scrub_context *sc,
285 int whichfork,
286 xfs_fileoff_t offset)
287{
288 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
289 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
290}
291
212/* 292/*
213 * Warn about inodes that need administrative review but is not 293 * Warn about inodes that need administrative review but is not
214 * incorrect. 294 * incorrect.
@@ -245,6 +325,59 @@ xfs_scrub_set_incomplete(
245} 325}
246 326
247/* 327/*
328 * rmap scrubbing -- compute the number of blocks with a given owner,
329 * at least according to the reverse mapping data.
330 */
331
332struct xfs_scrub_rmap_ownedby_info {
333 struct xfs_owner_info *oinfo;
334 xfs_filblks_t *blocks;
335};
336
337STATIC int
338xfs_scrub_count_rmap_ownedby_irec(
339 struct xfs_btree_cur *cur,
340 struct xfs_rmap_irec *rec,
341 void *priv)
342{
343 struct xfs_scrub_rmap_ownedby_info *sroi = priv;
344 bool irec_attr;
345 bool oinfo_attr;
346
347 irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
348 oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
349
350 if (rec->rm_owner != sroi->oinfo->oi_owner)
351 return 0;
352
353 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
354 (*sroi->blocks) += rec->rm_blockcount;
355
356 return 0;
357}
358
359/*
360 * Calculate the number of blocks the rmap thinks are owned by something.
361 * The caller should pass us an rmapbt cursor.
362 */
363int
364xfs_scrub_count_rmap_ownedby_ag(
365 struct xfs_scrub_context *sc,
366 struct xfs_btree_cur *cur,
367 struct xfs_owner_info *oinfo,
368 xfs_filblks_t *blocks)
369{
370 struct xfs_scrub_rmap_ownedby_info sroi;
371
372 sroi.oinfo = oinfo;
373 *blocks = 0;
374 sroi.blocks = blocks;
375
376 return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec,
377 &sroi);
378}
379
380/*
248 * AG scrubbing 381 * AG scrubbing
249 * 382 *
250 * These helpers facilitate locking an allocation group's header 383 * These helpers facilitate locking an allocation group's header
@@ -302,7 +435,7 @@ xfs_scrub_ag_read_headers(
302 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); 435 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
303 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) 436 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
304 goto out; 437 goto out;
305 438 error = 0;
306out: 439out:
307 return error; 440 return error;
308} 441}
@@ -472,7 +605,7 @@ xfs_scrub_setup_ag_btree(
472 return error; 605 return error;
473 } 606 }
474 607
475 error = xfs_scrub_setup_ag_header(sc, ip); 608 error = xfs_scrub_setup_fs(sc, ip);
476 if (error) 609 if (error)
477 return error; 610 return error;
478 611
@@ -503,18 +636,11 @@ xfs_scrub_get_inode(
503 struct xfs_scrub_context *sc, 636 struct xfs_scrub_context *sc,
504 struct xfs_inode *ip_in) 637 struct xfs_inode *ip_in)
505{ 638{
639 struct xfs_imap imap;
506 struct xfs_mount *mp = sc->mp; 640 struct xfs_mount *mp = sc->mp;
507 struct xfs_inode *ip = NULL; 641 struct xfs_inode *ip = NULL;
508 int error; 642 int error;
509 643
510 /*
511 * If userspace passed us an AG number or a generation number
512 * without an inode number, they haven't got a clue so bail out
513 * immediately.
514 */
515 if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
516 return -EINVAL;
517
518 /* We want to scan the inode we already had opened. */ 644 /* We want to scan the inode we already had opened. */
519 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { 645 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
520 sc->ip = ip_in; 646 sc->ip = ip_in;
@@ -526,10 +652,33 @@ xfs_scrub_get_inode(
526 return -ENOENT; 652 return -ENOENT;
527 error = xfs_iget(mp, NULL, sc->sm->sm_ino, 653 error = xfs_iget(mp, NULL, sc->sm->sm_ino,
528 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); 654 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
529 if (error == -ENOENT || error == -EINVAL) { 655 switch (error) {
530 /* inode doesn't exist... */ 656 case -ENOENT:
531 return -ENOENT; 657 /* Inode doesn't exist, just bail out. */
532 } else if (error) { 658 return error;
659 case 0:
660 /* Got an inode, continue. */
661 break;
662 case -EINVAL:
663 /*
664 * -EINVAL with IGET_UNTRUSTED could mean one of several
665 * things: userspace gave us an inode number that doesn't
666 * correspond to fs space, or doesn't have an inobt entry;
667 * or it could simply mean that the inode buffer failed the
668 * read verifiers.
669 *
670 * Try just the inode mapping lookup -- if it succeeds, then
671 * the inode buffer verifier failed and something needs fixing.
672 * Otherwise, we really couldn't find it so tell userspace
673 * that it no longer exists.
674 */
675 error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
676 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
677 if (error)
678 return -ENOENT;
679 error = -EFSCORRUPTED;
680 /* fall through */
681 default:
533 trace_xfs_scrub_op_error(sc, 682 trace_xfs_scrub_op_error(sc,
534 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), 683 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
535 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 684 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
@@ -572,3 +721,61 @@ out:
572 /* scrub teardown will unlock and release the inode for us */ 721 /* scrub teardown will unlock and release the inode for us */
573 return error; 722 return error;
574} 723}
724
725/*
726 * Predicate that decides if we need to evaluate the cross-reference check.
727 * If there was an error accessing the cross-reference btree, just delete
728 * the cursor and skip the check.
729 */
730bool
731xfs_scrub_should_check_xref(
732 struct xfs_scrub_context *sc,
733 int *error,
734 struct xfs_btree_cur **curpp)
735{
736 if (*error == 0)
737 return true;
738
739 if (curpp) {
740 /* If we've already given up on xref, just bail out. */
741 if (!*curpp)
742 return false;
743
744 /* xref error, delete cursor and bail out. */
745 xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
746 *curpp = NULL;
747 }
748
749 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
750 trace_xfs_scrub_xref_error(sc, *error, __return_address);
751
752 /*
753 * Errors encountered during cross-referencing with another
754 * data structure should not cause this scrubber to abort.
755 */
756 *error = 0;
757 return false;
758}
759
760/* Run the structure verifiers on in-memory buffers to detect bad memory. */
761void
762xfs_scrub_buffer_recheck(
763 struct xfs_scrub_context *sc,
764 struct xfs_buf *bp)
765{
766 xfs_failaddr_t fa;
767
768 if (bp->b_ops == NULL) {
769 xfs_scrub_block_set_corrupt(sc, bp);
770 return;
771 }
772 if (bp->b_ops->verify_struct == NULL) {
773 xfs_scrub_set_incomplete(sc);
774 return;
775 }
776 fa = bp->b_ops->verify_struct(bp);
777 if (!fa)
778 return;
779 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
780 trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
781}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 5c043855570e..ddb65d22c76a 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -56,6 +56,11 @@ bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
56bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork, 56bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
57 xfs_fileoff_t offset, int *error); 57 xfs_fileoff_t offset, int *error);
58 58
59bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc,
60 xfs_agnumber_t agno, xfs_agblock_t bno, int *error);
61bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
62 int whichfork, xfs_fileoff_t offset, int *error);
63
59void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc, 64void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
60 struct xfs_buf *bp); 65 struct xfs_buf *bp);
61void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino, 66void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
@@ -68,6 +73,13 @@ void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
68void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork, 73void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
69 xfs_fileoff_t offset); 74 xfs_fileoff_t offset);
70 75
76void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
77 struct xfs_buf *bp);
78void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
79 struct xfs_buf *bp);
80void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
81 int whichfork, xfs_fileoff_t offset);
82
71void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino, 83void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
72 struct xfs_buf *bp); 84 struct xfs_buf *bp);
73void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork, 85void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
@@ -76,10 +88,12 @@ void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
76void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc); 88void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
77int xfs_scrub_checkpoint_log(struct xfs_mount *mp); 89int xfs_scrub_checkpoint_log(struct xfs_mount *mp);
78 90
91/* Are we set up for a cross-referencing check? */
92bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error,
93 struct xfs_btree_cur **curpp);
94
79/* Setup functions */ 95/* Setup functions */
80int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip); 96int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
81int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
82 struct xfs_inode *ip);
83int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc, 97int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc,
84 struct xfs_inode *ip); 98 struct xfs_inode *ip);
85int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc, 99int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
@@ -134,11 +148,16 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
134 int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno, 148 int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
135 void *), 149 void *),
136 void *priv); 150 void *priv);
151int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
152 struct xfs_btree_cur *cur,
153 struct xfs_owner_info *oinfo,
154 xfs_filblks_t *blocks);
137 155
138int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc, 156int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc,
139 struct xfs_inode *ip, bool force_log); 157 struct xfs_inode *ip, bool force_log);
140int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in); 158int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
141int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc, 159int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc,
142 struct xfs_inode *ip, unsigned int resblks); 160 struct xfs_inode *ip, unsigned int resblks);
161void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
143 162
144#endif /* __XFS_SCRUB_COMMON_H__ */ 163#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index d94edd93cba8..bffdb7dc09bf 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -233,11 +233,28 @@ xfs_scrub_da_btree_write_verify(
233 return; 233 return;
234 } 234 }
235} 235}
236static void *
237xfs_scrub_da_btree_verify(
238 struct xfs_buf *bp)
239{
240 struct xfs_da_blkinfo *info = bp->b_addr;
241
242 switch (be16_to_cpu(info->magic)) {
243 case XFS_DIR2_LEAF1_MAGIC:
244 case XFS_DIR3_LEAF1_MAGIC:
245 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
246 return bp->b_ops->verify_struct(bp);
247 default:
248 bp->b_ops = &xfs_da3_node_buf_ops;
249 return bp->b_ops->verify_struct(bp);
250 }
251}
236 252
237static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = { 253static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
238 .name = "xfs_scrub_da_btree", 254 .name = "xfs_scrub_da_btree",
239 .verify_read = xfs_scrub_da_btree_read_verify, 255 .verify_read = xfs_scrub_da_btree_read_verify,
240 .verify_write = xfs_scrub_da_btree_write_verify, 256 .verify_write = xfs_scrub_da_btree_write_verify,
257 .verify_struct = xfs_scrub_da_btree_verify,
241}; 258};
242 259
243/* Check a block's sibling. */ 260/* Check a block's sibling. */
@@ -276,6 +293,9 @@ xfs_scrub_da_btree_block_check_sibling(
276 xfs_scrub_da_set_corrupt(ds, level); 293 xfs_scrub_da_set_corrupt(ds, level);
277 return error; 294 return error;
278 } 295 }
296 if (ds->state->altpath.blk[level].bp)
297 xfs_scrub_buffer_recheck(ds->sc,
298 ds->state->altpath.blk[level].bp);
279 299
280 /* Compare upper level pointer to sibling pointer. */ 300 /* Compare upper level pointer to sibling pointer. */
281 if (ds->state->altpath.blk[level].blkno != sibling) 301 if (ds->state->altpath.blk[level].blkno != sibling)
@@ -358,6 +378,8 @@ xfs_scrub_da_btree_block(
358 &xfs_scrub_da_btree_buf_ops); 378 &xfs_scrub_da_btree_buf_ops);
359 if (!xfs_scrub_da_process_error(ds, level, &error)) 379 if (!xfs_scrub_da_process_error(ds, level, &error))
360 goto out_nobuf; 380 goto out_nobuf;
381 if (blk->bp)
382 xfs_scrub_buffer_recheck(ds->sc, blk->bp);
361 383
362 /* 384 /*
363 * We didn't find a dir btree root block, which means that 385 * We didn't find a dir btree root block, which means that
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 69e1efdd4019..50b6a26b0299 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -92,7 +92,7 @@ xfs_scrub_dir_check_ftype(
92 * inodes can trigger immediate inactive cleanup of the inode. 92 * inodes can trigger immediate inactive cleanup of the inode.
93 */ 93 */
94 error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); 94 error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
95 if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, 95 if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
96 &error)) 96 &error))
97 goto out; 97 goto out;
98 98
@@ -200,6 +200,7 @@ xfs_scrub_dir_rec(
200 struct xfs_inode *dp = ds->dargs.dp; 200 struct xfs_inode *dp = ds->dargs.dp;
201 struct xfs_dir2_data_entry *dent; 201 struct xfs_dir2_data_entry *dent;
202 struct xfs_buf *bp; 202 struct xfs_buf *bp;
203 char *p, *endp;
203 xfs_ino_t ino; 204 xfs_ino_t ino;
204 xfs_dablk_t rec_bno; 205 xfs_dablk_t rec_bno;
205 xfs_dir2_db_t db; 206 xfs_dir2_db_t db;
@@ -237,9 +238,37 @@ xfs_scrub_dir_rec(
237 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); 238 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
238 goto out; 239 goto out;
239 } 240 }
241 xfs_scrub_buffer_recheck(ds->sc, bp);
240 242
241 /* Retrieve the entry, sanity check it, and compare hashes. */
242 dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off); 243 dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
244
245 /* Make sure we got a real directory entry. */
246 p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr);
247 endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
248 if (!endp) {
249 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
250 goto out_relse;
251 }
252 while (p < endp) {
253 struct xfs_dir2_data_entry *dep;
254 struct xfs_dir2_data_unused *dup;
255
256 dup = (struct xfs_dir2_data_unused *)p;
257 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
258 p += be16_to_cpu(dup->length);
259 continue;
260 }
261 dep = (struct xfs_dir2_data_entry *)p;
262 if (dep == dent)
263 break;
264 p += mp->m_dir_inode_ops->data_entsize(dep->namelen);
265 }
266 if (p >= endp) {
267 xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
268 goto out_relse;
269 }
270
271 /* Retrieve the entry, sanity check it, and compare hashes. */
243 ino = be64_to_cpu(dent->inumber); 272 ino = be64_to_cpu(dent->inumber);
244 hash = be32_to_cpu(ent->hashval); 273 hash = be32_to_cpu(ent->hashval);
245 tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent)); 274 tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
@@ -324,6 +353,7 @@ xfs_scrub_directory_data_bestfree(
324 } 353 }
325 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 354 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
326 goto out; 355 goto out;
356 xfs_scrub_buffer_recheck(sc, bp);
327 357
328 /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ 358 /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
329 359
@@ -361,13 +391,7 @@ xfs_scrub_directory_data_bestfree(
361 391
362 /* Make sure the bestfrees are actually the best free spaces. */ 392 /* Make sure the bestfrees are actually the best free spaces. */
363 ptr = (char *)d_ops->data_entry_p(bp->b_addr); 393 ptr = (char *)d_ops->data_entry_p(bp->b_addr);
364 if (is_block) { 394 endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
365 struct xfs_dir2_block_tail *btp;
366
367 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr);
368 endptr = (char *)xfs_dir2_block_leaf_p(btp);
369 } else
370 endptr = (char *)bp->b_addr + BBTOB(bp->b_length);
371 395
372 /* Iterate the entries, stopping when we hit or go past the end. */ 396 /* Iterate the entries, stopping when we hit or go past the end. */
373 while (ptr < endptr) { 397 while (ptr < endptr) {
@@ -474,6 +498,7 @@ xfs_scrub_directory_leaf1_bestfree(
474 error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp); 498 error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
475 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 499 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
476 goto out; 500 goto out;
501 xfs_scrub_buffer_recheck(sc, bp);
477 502
478 leaf = bp->b_addr; 503 leaf = bp->b_addr;
479 d_ops->leaf_hdr_from_disk(&leafhdr, leaf); 504 d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
@@ -559,6 +584,7 @@ xfs_scrub_directory_free_bestfree(
559 error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp); 584 error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
560 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error)) 585 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
561 goto out; 586 goto out;
587 xfs_scrub_buffer_recheck(sc, bp);
562 588
563 if (xfs_sb_version_hascrc(&sc->mp->m_sb)) { 589 if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
564 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; 590 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 496d6f2fbb9e..63ab3f98430d 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -58,6 +58,56 @@ xfs_scrub_setup_ag_iallocbt(
58 58
59/* Inode btree scrubber. */ 59/* Inode btree scrubber. */
60 60
61/*
62 * If we're checking the finobt, cross-reference with the inobt.
63 * Otherwise we're checking the inobt; if there is an finobt, make sure
64 * we have a record or not depending on freecount.
65 */
66static inline void
67xfs_scrub_iallocbt_chunk_xref_other(
68 struct xfs_scrub_context *sc,
69 struct xfs_inobt_rec_incore *irec,
70 xfs_agino_t agino)
71{
72 struct xfs_btree_cur **pcur;
73 bool has_irec;
74 int error;
75
76 if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
77 pcur = &sc->sa.ino_cur;
78 else
79 pcur = &sc->sa.fino_cur;
80 if (!(*pcur))
81 return;
82 error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec);
83 if (!xfs_scrub_should_check_xref(sc, &error, pcur))
84 return;
85 if (((irec->ir_freecount > 0 && !has_irec) ||
86 (irec->ir_freecount == 0 && has_irec)))
87 xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
88}
89
90/* Cross-reference with the other btrees. */
91STATIC void
92xfs_scrub_iallocbt_chunk_xref(
93 struct xfs_scrub_context *sc,
94 struct xfs_inobt_rec_incore *irec,
95 xfs_agino_t agino,
96 xfs_agblock_t agbno,
97 xfs_extlen_t len)
98{
99 struct xfs_owner_info oinfo;
100
101 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
102 return;
103
104 xfs_scrub_xref_is_used_space(sc, agbno, len);
105 xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino);
106 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
107 xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo);
108 xfs_scrub_xref_is_not_shared(sc, agbno, len);
109}
110
61/* Is this chunk worth checking? */ 111/* Is this chunk worth checking? */
62STATIC bool 112STATIC bool
63xfs_scrub_iallocbt_chunk( 113xfs_scrub_iallocbt_chunk(
@@ -76,6 +126,8 @@ xfs_scrub_iallocbt_chunk(
76 !xfs_verify_agbno(mp, agno, bno + len - 1)) 126 !xfs_verify_agbno(mp, agno, bno + len - 1))
77 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 127 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
78 128
129 xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
130
79 return true; 131 return true;
80} 132}
81 133
@@ -190,8 +242,14 @@ xfs_scrub_iallocbt_check_freemask(
190 } 242 }
191 243
192 /* If any part of this is a hole, skip it. */ 244 /* If any part of this is a hole, skip it. */
193 if (ir_holemask) 245 if (ir_holemask) {
246 xfs_scrub_xref_is_not_owned_by(bs->sc, agbno,
247 blks_per_cluster, &oinfo);
194 continue; 248 continue;
249 }
250
251 xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
252 &oinfo);
195 253
196 /* Grab the inode cluster buffer. */ 254 /* Grab the inode cluster buffer. */
197 imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, 255 imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
@@ -227,6 +285,7 @@ xfs_scrub_iallocbt_rec(
227 union xfs_btree_rec *rec) 285 union xfs_btree_rec *rec)
228{ 286{
229 struct xfs_mount *mp = bs->cur->bc_mp; 287 struct xfs_mount *mp = bs->cur->bc_mp;
288 xfs_filblks_t *inode_blocks = bs->private;
230 struct xfs_inobt_rec_incore irec; 289 struct xfs_inobt_rec_incore irec;
231 uint64_t holes; 290 uint64_t holes;
232 xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 291 xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
@@ -264,6 +323,9 @@ xfs_scrub_iallocbt_rec(
264 (agbno & (xfs_icluster_size_fsb(mp) - 1))) 323 (agbno & (xfs_icluster_size_fsb(mp) - 1)))
265 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 324 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
266 325
326 *inode_blocks += XFS_B_TO_FSB(mp,
327 irec.ir_count * mp->m_sb.sb_inodesize);
328
267 /* Handle non-sparse inodes */ 329 /* Handle non-sparse inodes */
268 if (!xfs_inobt_issparse(irec.ir_holemask)) { 330 if (!xfs_inobt_issparse(irec.ir_holemask)) {
269 len = XFS_B_TO_FSB(mp, 331 len = XFS_B_TO_FSB(mp,
@@ -308,6 +370,72 @@ out:
308 return error; 370 return error;
309} 371}
310 372
373/*
374 * Make sure the inode btrees are as large as the rmap thinks they are.
375 * Don't bother if we're missing btree cursors, as we're already corrupt.
376 */
377STATIC void
378xfs_scrub_iallocbt_xref_rmap_btreeblks(
379 struct xfs_scrub_context *sc,
380 int which)
381{
382 struct xfs_owner_info oinfo;
383 xfs_filblks_t blocks;
384 xfs_extlen_t inobt_blocks = 0;
385 xfs_extlen_t finobt_blocks = 0;
386 int error;
387
388 if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
389 (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur))
390 return;
391
392 /* Check that we saw as many inobt blocks as the rmap says. */
393 error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks);
394 if (!xfs_scrub_process_error(sc, 0, 0, &error))
395 return;
396
397 if (sc->sa.fino_cur) {
398 error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks);
399 if (!xfs_scrub_process_error(sc, 0, 0, &error))
400 return;
401 }
402
403 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
404 error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
405 &blocks);
406 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
407 return;
408 if (blocks != inobt_blocks + finobt_blocks)
409 xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
410}
411
412/*
413 * Make sure that the inobt records point to the same number of blocks as
414 * the rmap says are owned by inodes.
415 */
416STATIC void
417xfs_scrub_iallocbt_xref_rmap_inodes(
418 struct xfs_scrub_context *sc,
419 int which,
420 xfs_filblks_t inode_blocks)
421{
422 struct xfs_owner_info oinfo;
423 xfs_filblks_t blocks;
424 int error;
425
426 if (!sc->sa.rmap_cur)
427 return;
428
429 /* Check that we saw as many inode blocks as the rmap knows about. */
430 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
431 error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
432 &blocks);
433 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
434 return;
435 if (blocks != inode_blocks)
436 xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
437}
438
311/* Scrub the inode btrees for some AG. */ 439/* Scrub the inode btrees for some AG. */
312STATIC int 440STATIC int
313xfs_scrub_iallocbt( 441xfs_scrub_iallocbt(
@@ -316,10 +444,29 @@ xfs_scrub_iallocbt(
316{ 444{
317 struct xfs_btree_cur *cur; 445 struct xfs_btree_cur *cur;
318 struct xfs_owner_info oinfo; 446 struct xfs_owner_info oinfo;
447 xfs_filblks_t inode_blocks = 0;
448 int error;
319 449
320 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 450 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
321 cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur; 451 cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
322 return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL); 452 error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo,
453 &inode_blocks);
454 if (error)
455 return error;
456
457 xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which);
458
459 /*
460 * If we're scrubbing the inode btree, inode_blocks is the number of
461 * blocks pointed to by all the inode chunk records. Therefore, we
462 * should compare to the number of inode chunk blocks that the rmap
463 * knows about. We can't do this for the finobt since it only points
464 * to inode chunks with free inodes.
465 */
466 if (which == XFS_BTNUM_INO)
467 xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
468
469 return error;
323} 470}
324 471
325int 472int
@@ -335,3 +482,46 @@ xfs_scrub_finobt(
335{ 482{
336 return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO); 483 return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
337} 484}
485
486/* See if an inode btree has (or doesn't have) an inode chunk record. */
487static inline void
488xfs_scrub_xref_inode_check(
489 struct xfs_scrub_context *sc,
490 xfs_agblock_t agbno,
491 xfs_extlen_t len,
492 struct xfs_btree_cur **icur,
493 bool should_have_inodes)
494{
495 bool has_inodes;
496 int error;
497
498 if (!(*icur))
499 return;
500
501 error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes);
502 if (!xfs_scrub_should_check_xref(sc, &error, icur))
503 return;
504 if (has_inodes != should_have_inodes)
505 xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0);
506}
507
508/* xref check that the extent is not covered by inodes */
509void
510xfs_scrub_xref_is_not_inode_chunk(
511 struct xfs_scrub_context *sc,
512 xfs_agblock_t agbno,
513 xfs_extlen_t len)
514{
515 xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
516 xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
517}
518
519/* xref check that the extent is covered by inodes */
520void
521xfs_scrub_xref_is_inode_chunk(
522 struct xfs_scrub_context *sc,
523 xfs_agblock_t agbno,
524 xfs_extlen_t len)
525{
526 xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
527}
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index f120fb20452f..21297bef8df1 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -36,9 +36,13 @@
36#include "xfs_ialloc.h" 36#include "xfs_ialloc.h"
37#include "xfs_da_format.h" 37#include "xfs_da_format.h"
38#include "xfs_reflink.h" 38#include "xfs_reflink.h"
39#include "xfs_rmap.h"
40#include "xfs_bmap.h"
41#include "xfs_bmap_util.h"
39#include "scrub/xfs_scrub.h" 42#include "scrub/xfs_scrub.h"
40#include "scrub/scrub.h" 43#include "scrub/scrub.h"
41#include "scrub/common.h" 44#include "scrub/common.h"
45#include "scrub/btree.h"
42#include "scrub/trace.h" 46#include "scrub/trace.h"
43 47
44/* 48/*
@@ -64,7 +68,7 @@ xfs_scrub_setup_inode(
64 break; 68 break;
65 case -EFSCORRUPTED: 69 case -EFSCORRUPTED:
66 case -EFSBADCRC: 70 case -EFSBADCRC:
67 return 0; 71 return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
68 default: 72 default:
69 return error; 73 return error;
70 } 74 }
@@ -392,6 +396,14 @@ xfs_scrub_dinode(
392 break; 396 break;
393 } 397 }
394 398
399 /* di_[amc]time.nsec */
400 if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
401 xfs_scrub_ino_set_corrupt(sc, ino, bp);
402 if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
403 xfs_scrub_ino_set_corrupt(sc, ino, bp);
404 if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
405 xfs_scrub_ino_set_corrupt(sc, ino, bp);
406
395 /* 407 /*
396 * di_size. xfs_dinode_verify checks for things that screw up 408 * di_size. xfs_dinode_verify checks for things that screw up
397 * the VFS such as the upper bit being set and zero-length 409 * the VFS such as the upper bit being set and zero-length
@@ -495,6 +507,8 @@ xfs_scrub_dinode(
495 } 507 }
496 508
497 if (dip->di_version >= 3) { 509 if (dip->di_version >= 3) {
510 if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
511 xfs_scrub_ino_set_corrupt(sc, ino, bp);
498 xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); 512 xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
499 xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, 513 xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
500 flags2); 514 flags2);
@@ -546,7 +560,7 @@ xfs_scrub_inode_map_raw(
546 */ 560 */
547 bp->b_ops = &xfs_inode_buf_ops; 561 bp->b_ops = &xfs_inode_buf_ops;
548 dip = xfs_buf_offset(bp, imap.im_boffset); 562 dip = xfs_buf_offset(bp, imap.im_boffset);
549 if (!xfs_dinode_verify(mp, ino, dip) || 563 if (xfs_dinode_verify(mp, ino, dip) != NULL ||
550 !xfs_dinode_good_version(mp, dip->di_version)) { 564 !xfs_dinode_good_version(mp, dip->di_version)) {
551 xfs_scrub_ino_set_corrupt(sc, ino, bp); 565 xfs_scrub_ino_set_corrupt(sc, ino, bp);
552 goto out_buf; 566 goto out_buf;
@@ -567,18 +581,155 @@ out_buf:
567 return error; 581 return error;
568} 582}
569 583
584/*
585 * Make sure the finobt doesn't think this inode is free.
586 * We don't have to check the inobt ourselves because we got the inode via
587 * IGET_UNTRUSTED, which checks the inobt for us.
588 */
589static void
590xfs_scrub_inode_xref_finobt(
591 struct xfs_scrub_context *sc,
592 xfs_ino_t ino)
593{
594 struct xfs_inobt_rec_incore rec;
595 xfs_agino_t agino;
596 int has_record;
597 int error;
598
599 if (!sc->sa.fino_cur)
600 return;
601
602 agino = XFS_INO_TO_AGINO(sc->mp, ino);
603
604 /*
605 * Try to get the finobt record. If we can't get it, then we're
606 * in good shape.
607 */
608 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
609 &has_record);
610 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
611 !has_record)
612 return;
613
614 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
615 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
616 !has_record)
617 return;
618
619 /*
620 * Otherwise, make sure this record either doesn't cover this inode,
621 * or that it does but it's marked present.
622 */
623 if (rec.ir_startino > agino ||
624 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
625 return;
626
627 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
628 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
629}
630
631/* Cross reference the inode fields with the forks. */
632STATIC void
633xfs_scrub_inode_xref_bmap(
634 struct xfs_scrub_context *sc,
635 struct xfs_dinode *dip)
636{
637 xfs_extnum_t nextents;
638 xfs_filblks_t count;
639 xfs_filblks_t acount;
640 int error;
641
642 /* Walk all the extents to check nextents/naextents/nblocks. */
643 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
644 &nextents, &count);
645 if (!xfs_scrub_should_check_xref(sc, &error, NULL))
646 return;
647 if (nextents < be32_to_cpu(dip->di_nextents))
648 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
649
650 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
651 &nextents, &acount);
652 if (!xfs_scrub_should_check_xref(sc, &error, NULL))
653 return;
654 if (nextents != be16_to_cpu(dip->di_anextents))
655 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
656
657 /* Check nblocks against the inode. */
658 if (count + acount != be64_to_cpu(dip->di_nblocks))
659 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
660}
661
662/* Cross-reference with the other btrees. */
663STATIC void
664xfs_scrub_inode_xref(
665 struct xfs_scrub_context *sc,
666 xfs_ino_t ino,
667 struct xfs_dinode *dip)
668{
669 struct xfs_owner_info oinfo;
670 xfs_agnumber_t agno;
671 xfs_agblock_t agbno;
672 int error;
673
674 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
675 return;
676
677 agno = XFS_INO_TO_AGNO(sc->mp, ino);
678 agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
679
680 error = xfs_scrub_ag_init(sc, agno, &sc->sa);
681 if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
682 return;
683
684 xfs_scrub_xref_is_used_space(sc, agbno, 1);
685 xfs_scrub_inode_xref_finobt(sc, ino);
686 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
687 xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
688 xfs_scrub_xref_is_not_shared(sc, agbno, 1);
689 xfs_scrub_inode_xref_bmap(sc, dip);
690
691 xfs_scrub_ag_free(sc, &sc->sa);
692}
693
694/*
695 * If the reflink iflag disagrees with a scan for shared data fork extents,
696 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
697 * any shared extents). We already checked for reflink iflag set on a non
698 * reflink filesystem.
699 */
700static void
701xfs_scrub_inode_check_reflink_iflag(
702 struct xfs_scrub_context *sc,
703 xfs_ino_t ino,
704 struct xfs_buf *bp)
705{
706 struct xfs_mount *mp = sc->mp;
707 bool has_shared;
708 int error;
709
710 if (!xfs_sb_version_hasreflink(&mp->m_sb))
711 return;
712
713 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
714 &has_shared);
715 if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
716 XFS_INO_TO_AGBNO(mp, ino), &error))
717 return;
718 if (xfs_is_reflink_inode(sc->ip) && !has_shared)
719 xfs_scrub_ino_set_preen(sc, ino, bp);
720 else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
721 xfs_scrub_ino_set_corrupt(sc, ino, bp);
722}
723
570/* Scrub an inode. */ 724/* Scrub an inode. */
571int 725int
572xfs_scrub_inode( 726xfs_scrub_inode(
573 struct xfs_scrub_context *sc) 727 struct xfs_scrub_context *sc)
574{ 728{
575 struct xfs_dinode di; 729 struct xfs_dinode di;
576 struct xfs_mount *mp = sc->mp;
577 struct xfs_buf *bp = NULL; 730 struct xfs_buf *bp = NULL;
578 struct xfs_dinode *dip; 731 struct xfs_dinode *dip;
579 xfs_ino_t ino; 732 xfs_ino_t ino;
580
581 bool has_shared;
582 int error = 0; 733 int error = 0;
583 734
584 /* Did we get the in-core inode, or are we doing this manually? */ 735 /* Did we get the in-core inode, or are we doing this manually? */
@@ -603,19 +754,14 @@ xfs_scrub_inode(
603 goto out; 754 goto out;
604 755
605 /* 756 /*
606 * Does this inode have the reflink flag set but no shared extents? 757 * Look for discrepancies between file's data blocks and the reflink
607 * Set the preening flag if this is the case. 758 * iflag. We already checked the iflag against the file mode when
759 * we scrubbed the dinode.
608 */ 760 */
609 if (xfs_is_reflink_inode(sc->ip)) { 761 if (S_ISREG(VFS_I(sc->ip)->i_mode))
610 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 762 xfs_scrub_inode_check_reflink_iflag(sc, ino, bp);
611 &has_shared);
612 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
613 XFS_INO_TO_AGBNO(mp, ino), &error))
614 goto out;
615 if (!has_shared)
616 xfs_scrub_ino_set_preen(sc, ino, bp);
617 }
618 763
764 xfs_scrub_inode_xref(sc, ino, dip);
619out: 765out:
620 if (bp) 766 if (bp)
621 xfs_trans_brelse(sc->tp, bp); 767 xfs_trans_brelse(sc->tp, bp);
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 63a25334fc83..0d3851410c74 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -169,9 +169,9 @@ xfs_scrub_parent_validate(
169 * immediate inactive cleanup of the inode. 169 * immediate inactive cleanup of the inode.
170 */ 170 */
171 error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp); 171 error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
172 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 172 if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
173 goto out; 173 goto out;
174 if (dp == sc->ip) { 174 if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
175 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); 175 xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
176 goto out_rele; 176 goto out_rele;
177 } 177 }
@@ -185,7 +185,7 @@ xfs_scrub_parent_validate(
185 */ 185 */
186 if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { 186 if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
187 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 187 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
188 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, 188 if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
189 &error)) 189 &error))
190 goto out_unlock; 190 goto out_unlock;
191 if (nlink != expected_nlink) 191 if (nlink != expected_nlink)
@@ -205,7 +205,7 @@ xfs_scrub_parent_validate(
205 205
206 /* Go looking for our dentry. */ 206 /* Go looking for our dentry. */
207 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); 207 error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
208 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) 208 if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
209 goto out_unlock; 209 goto out_unlock;
210 210
211 /* Drop the parent lock, relock this inode. */ 211 /* Drop the parent lock, relock this inode. */
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 3d9037eceaf1..51daa4ae2627 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -67,13 +67,6 @@ xfs_scrub_setup_quota(
67{ 67{
68 uint dqtype; 68 uint dqtype;
69 69
70 /*
71 * If userspace gave us an AG number or inode data, they don't
72 * know what they're doing. Get out.
73 */
74 if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
75 return -EINVAL;
76
77 dqtype = xfs_scrub_quota_to_dqtype(sc); 70 dqtype = xfs_scrub_quota_to_dqtype(sc);
78 if (dqtype == 0) 71 if (dqtype == 0)
79 return -EINVAL; 72 return -EINVAL;
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 2f88a8d44bd0..400f1561cd3d 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -31,6 +31,7 @@
31#include "xfs_sb.h" 31#include "xfs_sb.h"
32#include "xfs_alloc.h" 32#include "xfs_alloc.h"
33#include "xfs_rmap.h" 33#include "xfs_rmap.h"
34#include "xfs_refcount.h"
34#include "scrub/xfs_scrub.h" 35#include "scrub/xfs_scrub.h"
35#include "scrub/scrub.h" 36#include "scrub/scrub.h"
36#include "scrub/common.h" 37#include "scrub/common.h"
@@ -50,6 +51,307 @@ xfs_scrub_setup_ag_refcountbt(
50 51
51/* Reference count btree scrubber. */ 52/* Reference count btree scrubber. */
52 53
54/*
55 * Confirming Reference Counts via Reverse Mappings
56 *
57 * We want to count the reverse mappings overlapping a refcount record
58 * (bno, len, refcount), allowing for the possibility that some of the
59 * overlap may come from smaller adjoining reverse mappings, while some
60 * comes from single extents which overlap the range entirely. The
61 * outer loop is as follows:
62 *
63 * 1. For all reverse mappings overlapping the refcount extent,
64 * a. If a given rmap completely overlaps, mark it as seen.
65 * b. Otherwise, record the fragment (in agbno order) for later
66 * processing.
67 *
68 * Once we've seen all the rmaps, we know that for all blocks in the
69 * refcount record we want to find $refcount owners and we've already
70 * visited $seen extents that overlap all the blocks. Therefore, we
71 * need to find ($refcount - $seen) owners for every block in the
72 * extent; call that quantity $target_nr. Proceed as follows:
73 *
74 * 2. Pull the first $target_nr fragments from the list; all of them
75 * should start at or before the start of the extent.
76 * Call this subset of fragments the working set.
77 * 3. Until there are no more unprocessed fragments,
78 * a. Find the shortest fragments in the set and remove them.
79 * b. Note the block number of the end of these fragments.
80 * c. Pull the same number of fragments from the list. All of these
81 * fragments should start at the block number recorded in the
82 * previous step.
83 * d. Put those fragments in the set.
84 * 4. Check that there are $target_nr fragments remaining in the list,
85 * and that they all end at or beyond the end of the refcount extent.
86 *
87 * If the refcount is correct, all the check conditions in the algorithm
88 * should always hold true. If not, the refcount is incorrect.
89 */
90struct xfs_scrub_refcnt_frag {
91 struct list_head list;
92 struct xfs_rmap_irec rm;
93};
94
95struct xfs_scrub_refcnt_check {
96 struct xfs_scrub_context *sc;
97 struct list_head fragments;
98
99 /* refcount extent we're examining */
100 xfs_agblock_t bno;
101 xfs_extlen_t len;
102 xfs_nlink_t refcount;
103
104 /* number of owners seen */
105 xfs_nlink_t seen;
106};
107
108/*
109 * Decide if the given rmap is large enough that we can redeem it
110 * towards refcount verification now, or if it's a fragment, in
111 * which case we'll hang onto it in the hopes that we'll later
112 * discover that we've collected exactly the correct number of
113 * fragments as the refcountbt says we should have.
114 */
115STATIC int
116xfs_scrub_refcountbt_rmap_check(
117 struct xfs_btree_cur *cur,
118 struct xfs_rmap_irec *rec,
119 void *priv)
120{
121 struct xfs_scrub_refcnt_check *refchk = priv;
122 struct xfs_scrub_refcnt_frag *frag;
123 xfs_agblock_t rm_last;
124 xfs_agblock_t rc_last;
125 int error = 0;
126
127 if (xfs_scrub_should_terminate(refchk->sc, &error))
128 return error;
129
130 rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
131 rc_last = refchk->bno + refchk->len - 1;
132
133 /* Confirm that a single-owner refc extent is a CoW stage. */
134 if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
135 xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0);
136 return 0;
137 }
138
139 if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
140 /*
141 * The rmap overlaps the refcount record, so we can confirm
142 * one refcount owner seen.
143 */
144 refchk->seen++;
145 } else {
146 /*
147 * This rmap covers only part of the refcount record, so
148 * save the fragment for later processing. If the rmapbt
149 * is healthy each rmap_irec we see will be in agbno order
150 * so we don't need insertion sort here.
151 */
152 frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
153 KM_MAYFAIL | KM_NOFS);
154 if (!frag)
155 return -ENOMEM;
156 memcpy(&frag->rm, rec, sizeof(frag->rm));
157 list_add_tail(&frag->list, &refchk->fragments);
158 }
159
160 return 0;
161}
162
163/*
164 * Given a bunch of rmap fragments, iterate through them, keeping
165 * a running tally of the refcount. If this ever deviates from
166 * what we expect (which is the refcountbt's refcount minus the
167 * number of extents that totally covered the refcountbt extent),
168 * we have a refcountbt error.
169 */
170STATIC void
171xfs_scrub_refcountbt_process_rmap_fragments(
172 struct xfs_scrub_refcnt_check *refchk)
173{
174 struct list_head worklist;
175 struct xfs_scrub_refcnt_frag *frag;
176 struct xfs_scrub_refcnt_frag *n;
177 xfs_agblock_t bno;
178 xfs_agblock_t rbno;
179 xfs_agblock_t next_rbno;
180 xfs_nlink_t nr;
181 xfs_nlink_t target_nr;
182
183 target_nr = refchk->refcount - refchk->seen;
184 if (target_nr == 0)
185 return;
186
187 /*
188 * There are (refchk->rc.rc_refcount - refchk->nr refcount)
189 * references we haven't found yet. Pull that many off the
190 * fragment list and figure out where the smallest rmap ends
191 * (and therefore the next rmap should start). All the rmaps
192 * we pull off should start at or before the beginning of the
193 * refcount record's range.
194 */
195 INIT_LIST_HEAD(&worklist);
196 rbno = NULLAGBLOCK;
197 nr = 1;
198
199 /* Make sure the fragments actually /are/ in agbno order. */
200 bno = 0;
201 list_for_each_entry(frag, &refchk->fragments, list) {
202 if (frag->rm.rm_startblock < bno)
203 goto done;
204 bno = frag->rm.rm_startblock;
205 }
206
207 /*
208 * Find all the rmaps that start at or before the refc extent,
209 * and put them on the worklist.
210 */
211 list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
212 if (frag->rm.rm_startblock > refchk->bno)
213 goto done;
214 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
215 if (bno < rbno)
216 rbno = bno;
217 list_move_tail(&frag->list, &worklist);
218 if (nr == target_nr)
219 break;
220 nr++;
221 }
222
223 /*
224 * We should have found exactly $target_nr rmap fragments starting
225 * at or before the refcount extent.
226 */
227 if (nr != target_nr)
228 goto done;
229
230 while (!list_empty(&refchk->fragments)) {
231 /* Discard any fragments ending at rbno from the worklist. */
232 nr = 0;
233 next_rbno = NULLAGBLOCK;
234 list_for_each_entry_safe(frag, n, &worklist, list) {
235 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
236 if (bno != rbno) {
237 if (bno < next_rbno)
238 next_rbno = bno;
239 continue;
240 }
241 list_del(&frag->list);
242 kmem_free(frag);
243 nr++;
244 }
245
246 /* Try to add nr rmaps starting at rbno to the worklist. */
247 list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
248 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
249 if (frag->rm.rm_startblock != rbno)
250 goto done;
251 list_move_tail(&frag->list, &worklist);
252 if (next_rbno > bno)
253 next_rbno = bno;
254 nr--;
255 if (nr == 0)
256 break;
257 }
258
259 /*
260 * If we get here and nr > 0, this means that we added fewer
261 * items to the worklist than we discarded because the fragment
262 * list ran out of items. Therefore, we cannot maintain the
263 * required refcount. Something is wrong, so we're done.
264 */
265 if (nr)
266 goto done;
267
268 rbno = next_rbno;
269 }
270
271 /*
272 * Make sure the last extent we processed ends at or beyond
273 * the end of the refcount extent.
274 */
275 if (rbno < refchk->bno + refchk->len)
276 goto done;
277
278 /* Actually record us having seen the remaining refcount. */
279 refchk->seen = refchk->refcount;
280done:
281 /* Delete fragments and work list. */
282 list_for_each_entry_safe(frag, n, &worklist, list) {
283 list_del(&frag->list);
284 kmem_free(frag);
285 }
286 list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
287 list_del(&frag->list);
288 kmem_free(frag);
289 }
290}
291
292/* Use the rmap entries covering this extent to verify the refcount. */
293STATIC void
294xfs_scrub_refcountbt_xref_rmap(
295 struct xfs_scrub_context *sc,
296 xfs_agblock_t bno,
297 xfs_extlen_t len,
298 xfs_nlink_t refcount)
299{
300 struct xfs_scrub_refcnt_check refchk = {
301 .sc = sc,
302 .bno = bno,
303 .len = len,
304 .refcount = refcount,
305 .seen = 0,
306 };
307 struct xfs_rmap_irec low;
308 struct xfs_rmap_irec high;
309 struct xfs_scrub_refcnt_frag *frag;
310 struct xfs_scrub_refcnt_frag *n;
311 int error;
312
313 if (!sc->sa.rmap_cur)
314 return;
315
316 /* Cross-reference with the rmapbt to confirm the refcount. */
317 memset(&low, 0, sizeof(low));
318 low.rm_startblock = bno;
319 memset(&high, 0xFF, sizeof(high));
320 high.rm_startblock = bno + len - 1;
321
322 INIT_LIST_HEAD(&refchk.fragments);
323 error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
324 &xfs_scrub_refcountbt_rmap_check, &refchk);
325 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
326 goto out_free;
327
328 xfs_scrub_refcountbt_process_rmap_fragments(&refchk);
329 if (refcount != refchk.seen)
330 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
331
332out_free:
333 list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
334 list_del(&frag->list);
335 kmem_free(frag);
336 }
337}
338
339/* Cross-reference with the other btrees. */
340STATIC void
341xfs_scrub_refcountbt_xref(
342 struct xfs_scrub_context *sc,
343 xfs_agblock_t agbno,
344 xfs_extlen_t len,
345 xfs_nlink_t refcount)
346{
347 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
348 return;
349
350 xfs_scrub_xref_is_used_space(sc, agbno, len);
351 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
352 xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount);
353}
354
53/* Scrub a refcountbt record. */ 355/* Scrub a refcountbt record. */
54STATIC int 356STATIC int
55xfs_scrub_refcountbt_rec( 357xfs_scrub_refcountbt_rec(
@@ -57,6 +359,7 @@ xfs_scrub_refcountbt_rec(
57 union xfs_btree_rec *rec) 359 union xfs_btree_rec *rec)
58{ 360{
59 struct xfs_mount *mp = bs->cur->bc_mp; 361 struct xfs_mount *mp = bs->cur->bc_mp;
362 xfs_agblock_t *cow_blocks = bs->private;
60 xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 363 xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
61 xfs_agblock_t bno; 364 xfs_agblock_t bno;
62 xfs_extlen_t len; 365 xfs_extlen_t len;
@@ -72,6 +375,8 @@ xfs_scrub_refcountbt_rec(
72 has_cowflag = (bno & XFS_REFC_COW_START); 375 has_cowflag = (bno & XFS_REFC_COW_START);
73 if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) 376 if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
74 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 377 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
378 if (has_cowflag)
379 (*cow_blocks) += len;
75 380
76 /* Check the extent. */ 381 /* Check the extent. */
77 bno &= ~XFS_REFC_COW_START; 382 bno &= ~XFS_REFC_COW_START;
@@ -83,17 +388,128 @@ xfs_scrub_refcountbt_rec(
83 if (refcount == 0) 388 if (refcount == 0)
84 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 389 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
85 390
391 xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount);
392
86 return error; 393 return error;
87} 394}
88 395
396/* Make sure we have as many refc blocks as the rmap says. */
397STATIC void
398xfs_scrub_refcount_xref_rmap(
399 struct xfs_scrub_context *sc,
400 struct xfs_owner_info *oinfo,
401 xfs_filblks_t cow_blocks)
402{
403 xfs_extlen_t refcbt_blocks = 0;
404 xfs_filblks_t blocks;
405 int error;
406
407 if (!sc->sa.rmap_cur)
408 return;
409
410 /* Check that we saw as many refcbt blocks as the rmap knows about. */
411 error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
412 if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
413 return;
414 error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
415 &blocks);
416 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
417 return;
418 if (blocks != refcbt_blocks)
419 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
420
421 /* Check that we saw as many cow blocks as the rmap knows about. */
422 xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
423 error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
424 &blocks);
425 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
426 return;
427 if (blocks != cow_blocks)
428 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
429}
430
89/* Scrub the refcount btree for some AG. */ 431/* Scrub the refcount btree for some AG. */
90int 432int
91xfs_scrub_refcountbt( 433xfs_scrub_refcountbt(
92 struct xfs_scrub_context *sc) 434 struct xfs_scrub_context *sc)
93{ 435{
94 struct xfs_owner_info oinfo; 436 struct xfs_owner_info oinfo;
437 xfs_agblock_t cow_blocks = 0;
438 int error;
95 439
96 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); 440 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
97 return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, 441 error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
98 &oinfo, NULL); 442 &oinfo, &cow_blocks);
443 if (error)
444 return error;
445
446 xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
447
448 return 0;
449}
450
451/* xref check that a cow staging extent is marked in the refcountbt. */
452void
453xfs_scrub_xref_is_cow_staging(
454 struct xfs_scrub_context *sc,
455 xfs_agblock_t agbno,
456 xfs_extlen_t len)
457{
458 struct xfs_refcount_irec rc;
459 bool has_cowflag;
460 int has_refcount;
461 int error;
462
463 if (!sc->sa.refc_cur)
464 return;
465
466 /* Find the CoW staging extent. */
467 error = xfs_refcount_lookup_le(sc->sa.refc_cur,
468 agbno + XFS_REFC_COW_START, &has_refcount);
469 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
470 return;
471 if (!has_refcount) {
472 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
473 return;
474 }
475
476 error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
477 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
478 return;
479 if (!has_refcount) {
480 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
481 return;
482 }
483
484 /* CoW flag must be set, refcount must be 1. */
485 has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
486 if (!has_cowflag || rc.rc_refcount != 1)
487 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
488
489 /* Must be at least as long as what was passed in */
490 if (rc.rc_blockcount < len)
491 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
492}
493
494/*
495 * xref check that the extent is not shared. Only file data blocks
496 * can have multiple owners.
497 */
498void
499xfs_scrub_xref_is_not_shared(
500 struct xfs_scrub_context *sc,
501 xfs_agblock_t agbno,
502 xfs_extlen_t len)
503{
504 bool shared;
505 int error;
506
507 if (!sc->sa.refc_cur)
508 return;
509
510 error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
511 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
512 return;
513 if (shared)
514 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
99} 515}
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 97846c424690..8f2a7c3ff455 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -32,6 +32,7 @@
32#include "xfs_alloc.h" 32#include "xfs_alloc.h"
33#include "xfs_ialloc.h" 33#include "xfs_ialloc.h"
34#include "xfs_rmap.h" 34#include "xfs_rmap.h"
35#include "xfs_refcount.h"
35#include "scrub/xfs_scrub.h" 36#include "scrub/xfs_scrub.h"
36#include "scrub/scrub.h" 37#include "scrub/scrub.h"
37#include "scrub/common.h" 38#include "scrub/common.h"
@@ -51,6 +52,61 @@ xfs_scrub_setup_ag_rmapbt(
51 52
52/* Reverse-mapping scrubber. */ 53/* Reverse-mapping scrubber. */
53 54
55/* Cross-reference a rmap against the refcount btree. */
56STATIC void
57xfs_scrub_rmapbt_xref_refc(
58 struct xfs_scrub_context *sc,
59 struct xfs_rmap_irec *irec)
60{
61 xfs_agblock_t fbno;
62 xfs_extlen_t flen;
63 bool non_inode;
64 bool is_bmbt;
65 bool is_attr;
66 bool is_unwritten;
67 int error;
68
69 if (!sc->sa.refc_cur)
70 return;
71
72 non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
73 is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
74 is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
75 is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
76
77 /* If this is shared, must be a data fork extent. */
78 error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock,
79 irec->rm_blockcount, &fbno, &flen, false);
80 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
81 return;
82 if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten))
83 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
84}
85
86/* Cross-reference with the other btrees. */
87STATIC void
88xfs_scrub_rmapbt_xref(
89 struct xfs_scrub_context *sc,
90 struct xfs_rmap_irec *irec)
91{
92 xfs_agblock_t agbno = irec->rm_startblock;
93 xfs_extlen_t len = irec->rm_blockcount;
94
95 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
96 return;
97
98 xfs_scrub_xref_is_used_space(sc, agbno, len);
99 if (irec->rm_owner == XFS_RMAP_OWN_INODES)
100 xfs_scrub_xref_is_inode_chunk(sc, agbno, len);
101 else
102 xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
103 if (irec->rm_owner == XFS_RMAP_OWN_COW)
104 xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock,
105 irec->rm_blockcount);
106 else
107 xfs_scrub_rmapbt_xref_refc(sc, irec);
108}
109
54/* Scrub an rmapbt record. */ 110/* Scrub an rmapbt record. */
55STATIC int 111STATIC int
56xfs_scrub_rmapbt_rec( 112xfs_scrub_rmapbt_rec(
@@ -121,6 +177,8 @@ xfs_scrub_rmapbt_rec(
121 irec.rm_owner > XFS_RMAP_OWN_FS) 177 irec.rm_owner > XFS_RMAP_OWN_FS)
122 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); 178 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
123 } 179 }
180
181 xfs_scrub_rmapbt_xref(bs->sc, &irec);
124out: 182out:
125 return error; 183 return error;
126} 184}
@@ -136,3 +194,68 @@ xfs_scrub_rmapbt(
136 return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec, 194 return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec,
137 &oinfo, NULL); 195 &oinfo, NULL);
138} 196}
197
198/* xref check that the extent is owned by a given owner */
199static inline void
200xfs_scrub_xref_check_owner(
201 struct xfs_scrub_context *sc,
202 xfs_agblock_t bno,
203 xfs_extlen_t len,
204 struct xfs_owner_info *oinfo,
205 bool should_have_rmap)
206{
207 bool has_rmap;
208 int error;
209
210 if (!sc->sa.rmap_cur)
211 return;
212
213 error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo,
214 &has_rmap);
215 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
216 return;
217 if (has_rmap != should_have_rmap)
218 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
219}
220
221/* xref check that the extent is owned by a given owner */
222void
223xfs_scrub_xref_is_owned_by(
224 struct xfs_scrub_context *sc,
225 xfs_agblock_t bno,
226 xfs_extlen_t len,
227 struct xfs_owner_info *oinfo)
228{
229 xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true);
230}
231
232/* xref check that the extent is not owned by a given owner */
233void
234xfs_scrub_xref_is_not_owned_by(
235 struct xfs_scrub_context *sc,
236 xfs_agblock_t bno,
237 xfs_extlen_t len,
238 struct xfs_owner_info *oinfo)
239{
240 xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false);
241}
242
243/* xref check that the extent has no reverse mapping at all */
244void
245xfs_scrub_xref_has_no_owner(
246 struct xfs_scrub_context *sc,
247 xfs_agblock_t bno,
248 xfs_extlen_t len)
249{
250 bool has_rmap;
251 int error;
252
253 if (!sc->sa.rmap_cur)
254 return;
255
256 error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap);
257 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
258 return;
259 if (has_rmap)
260 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
261}
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index c6fedb698008..26390991369a 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -43,22 +43,14 @@ xfs_scrub_setup_rt(
43 struct xfs_scrub_context *sc, 43 struct xfs_scrub_context *sc,
44 struct xfs_inode *ip) 44 struct xfs_inode *ip)
45{ 45{
46 struct xfs_mount *mp = sc->mp; 46 int error;
47 int error = 0;
48
49 /*
50 * If userspace gave us an AG number or inode data, they don't
51 * know what they're doing. Get out.
52 */
53 if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
54 return -EINVAL;
55 47
56 error = xfs_scrub_setup_fs(sc, ip); 48 error = xfs_scrub_setup_fs(sc, ip);
57 if (error) 49 if (error)
58 return error; 50 return error;
59 51
60 sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP; 52 sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
61 sc->ip = mp->m_rbmip; 53 sc->ip = sc->mp->m_rbmip;
62 xfs_ilock(sc->ip, sc->ilock_flags); 54 xfs_ilock(sc->ip, sc->ilock_flags);
63 55
64 return 0; 56 return 0;
@@ -106,3 +98,26 @@ xfs_scrub_rtsummary(
106 /* XXX: implement this some day */ 98 /* XXX: implement this some day */
107 return -ENOENT; 99 return -ENOENT;
108} 100}
101
102
103/* xref check that the extent is not free in the rtbitmap */
104void
105xfs_scrub_xref_is_used_rt_space(
106 struct xfs_scrub_context *sc,
107 xfs_rtblock_t fsbno,
108 xfs_extlen_t len)
109{
110 bool is_free;
111 int error;
112
113 xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
114 error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len,
115 &is_free);
116 if (!xfs_scrub_should_check_xref(sc, &error, NULL))
117 goto out_unlock;
118 if (is_free)
119 xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino,
120 NULL);
121out_unlock:
122 xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
123}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index ab3aef2ae823..26c75967a072 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -110,6 +110,16 @@
110 * structure itself is corrupt, the CORRUPT flag will be set. If 110 * structure itself is corrupt, the CORRUPT flag will be set. If
111 * the metadata is correct but otherwise suboptimal, the PREEN flag 111 * the metadata is correct but otherwise suboptimal, the PREEN flag
112 * will be set. 112 * will be set.
113 *
114 * We perform secondary validation of filesystem metadata by
115 * cross-referencing every record with all other available metadata.
116 * For example, for block mapping extents, we verify that there are no
117 * records in the free space and inode btrees corresponding to that
118 * space extent and that there is a corresponding entry in the reverse
119 * mapping btree. Inconsistent metadata is noted by setting the
120 * XCORRUPT flag; btree query function errors are noted by setting the
121 * XFAIL flag and deleting the cursor to prevent further attempts to
122 * cross-reference with a defective btree.
113 */ 123 */
114 124
115/* 125/*
@@ -128,8 +138,6 @@ xfs_scrub_probe(
128{ 138{
129 int error = 0; 139 int error = 0;
130 140
131 if (sc->sm->sm_ino || sc->sm->sm_agno)
132 return -EINVAL;
133 if (xfs_scrub_should_terminate(sc, &error)) 141 if (xfs_scrub_should_terminate(sc, &error))
134 return error; 142 return error;
135 143
@@ -151,7 +159,8 @@ xfs_scrub_teardown(
151 sc->tp = NULL; 159 sc->tp = NULL;
152 } 160 }
153 if (sc->ip) { 161 if (sc->ip) {
154 xfs_iunlock(sc->ip, sc->ilock_flags); 162 if (sc->ilock_flags)
163 xfs_iunlock(sc->ip, sc->ilock_flags);
155 if (sc->ip != ip_in && 164 if (sc->ip != ip_in &&
156 !xfs_internal_inum(sc->mp, sc->ip->i_ino)) 165 !xfs_internal_inum(sc->mp, sc->ip->i_ino))
157 iput(VFS_I(sc->ip)); 166 iput(VFS_I(sc->ip));
@@ -167,106 +176,130 @@ xfs_scrub_teardown(
167/* Scrubbing dispatch. */ 176/* Scrubbing dispatch. */
168 177
169static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { 178static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
170 { /* ioctl presence test */ 179 [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */
180 .type = ST_NONE,
171 .setup = xfs_scrub_setup_fs, 181 .setup = xfs_scrub_setup_fs,
172 .scrub = xfs_scrub_probe, 182 .scrub = xfs_scrub_probe,
173 }, 183 },
174 { /* superblock */ 184 [XFS_SCRUB_TYPE_SB] = { /* superblock */
175 .setup = xfs_scrub_setup_ag_header, 185 .type = ST_PERAG,
186 .setup = xfs_scrub_setup_fs,
176 .scrub = xfs_scrub_superblock, 187 .scrub = xfs_scrub_superblock,
177 }, 188 },
178 { /* agf */ 189 [XFS_SCRUB_TYPE_AGF] = { /* agf */
179 .setup = xfs_scrub_setup_ag_header, 190 .type = ST_PERAG,
191 .setup = xfs_scrub_setup_fs,
180 .scrub = xfs_scrub_agf, 192 .scrub = xfs_scrub_agf,
181 }, 193 },
182 { /* agfl */ 194 [XFS_SCRUB_TYPE_AGFL]= { /* agfl */
183 .setup = xfs_scrub_setup_ag_header, 195 .type = ST_PERAG,
196 .setup = xfs_scrub_setup_fs,
184 .scrub = xfs_scrub_agfl, 197 .scrub = xfs_scrub_agfl,
185 }, 198 },
186 { /* agi */ 199 [XFS_SCRUB_TYPE_AGI] = { /* agi */
187 .setup = xfs_scrub_setup_ag_header, 200 .type = ST_PERAG,
201 .setup = xfs_scrub_setup_fs,
188 .scrub = xfs_scrub_agi, 202 .scrub = xfs_scrub_agi,
189 }, 203 },
190 { /* bnobt */ 204 [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */
205 .type = ST_PERAG,
191 .setup = xfs_scrub_setup_ag_allocbt, 206 .setup = xfs_scrub_setup_ag_allocbt,
192 .scrub = xfs_scrub_bnobt, 207 .scrub = xfs_scrub_bnobt,
193 }, 208 },
194 { /* cntbt */ 209 [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */
210 .type = ST_PERAG,
195 .setup = xfs_scrub_setup_ag_allocbt, 211 .setup = xfs_scrub_setup_ag_allocbt,
196 .scrub = xfs_scrub_cntbt, 212 .scrub = xfs_scrub_cntbt,
197 }, 213 },
198 { /* inobt */ 214 [XFS_SCRUB_TYPE_INOBT] = { /* inobt */
215 .type = ST_PERAG,
199 .setup = xfs_scrub_setup_ag_iallocbt, 216 .setup = xfs_scrub_setup_ag_iallocbt,
200 .scrub = xfs_scrub_inobt, 217 .scrub = xfs_scrub_inobt,
201 }, 218 },
202 { /* finobt */ 219 [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */
220 .type = ST_PERAG,
203 .setup = xfs_scrub_setup_ag_iallocbt, 221 .setup = xfs_scrub_setup_ag_iallocbt,
204 .scrub = xfs_scrub_finobt, 222 .scrub = xfs_scrub_finobt,
205 .has = xfs_sb_version_hasfinobt, 223 .has = xfs_sb_version_hasfinobt,
206 }, 224 },
207 { /* rmapbt */ 225 [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */
226 .type = ST_PERAG,
208 .setup = xfs_scrub_setup_ag_rmapbt, 227 .setup = xfs_scrub_setup_ag_rmapbt,
209 .scrub = xfs_scrub_rmapbt, 228 .scrub = xfs_scrub_rmapbt,
210 .has = xfs_sb_version_hasrmapbt, 229 .has = xfs_sb_version_hasrmapbt,
211 }, 230 },
212 { /* refcountbt */ 231 [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
232 .type = ST_PERAG,
213 .setup = xfs_scrub_setup_ag_refcountbt, 233 .setup = xfs_scrub_setup_ag_refcountbt,
214 .scrub = xfs_scrub_refcountbt, 234 .scrub = xfs_scrub_refcountbt,
215 .has = xfs_sb_version_hasreflink, 235 .has = xfs_sb_version_hasreflink,
216 }, 236 },
217 { /* inode record */ 237 [XFS_SCRUB_TYPE_INODE] = { /* inode record */
238 .type = ST_INODE,
218 .setup = xfs_scrub_setup_inode, 239 .setup = xfs_scrub_setup_inode,
219 .scrub = xfs_scrub_inode, 240 .scrub = xfs_scrub_inode,
220 }, 241 },
221 { /* inode data fork */ 242 [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */
243 .type = ST_INODE,
222 .setup = xfs_scrub_setup_inode_bmap, 244 .setup = xfs_scrub_setup_inode_bmap,
223 .scrub = xfs_scrub_bmap_data, 245 .scrub = xfs_scrub_bmap_data,
224 }, 246 },
225 { /* inode attr fork */ 247 [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */
248 .type = ST_INODE,
226 .setup = xfs_scrub_setup_inode_bmap, 249 .setup = xfs_scrub_setup_inode_bmap,
227 .scrub = xfs_scrub_bmap_attr, 250 .scrub = xfs_scrub_bmap_attr,
228 }, 251 },
229 { /* inode CoW fork */ 252 [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */
253 .type = ST_INODE,
230 .setup = xfs_scrub_setup_inode_bmap, 254 .setup = xfs_scrub_setup_inode_bmap,
231 .scrub = xfs_scrub_bmap_cow, 255 .scrub = xfs_scrub_bmap_cow,
232 }, 256 },
233 { /* directory */ 257 [XFS_SCRUB_TYPE_DIR] = { /* directory */
258 .type = ST_INODE,
234 .setup = xfs_scrub_setup_directory, 259 .setup = xfs_scrub_setup_directory,
235 .scrub = xfs_scrub_directory, 260 .scrub = xfs_scrub_directory,
236 }, 261 },
237 { /* extended attributes */ 262 [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */
263 .type = ST_INODE,
238 .setup = xfs_scrub_setup_xattr, 264 .setup = xfs_scrub_setup_xattr,
239 .scrub = xfs_scrub_xattr, 265 .scrub = xfs_scrub_xattr,
240 }, 266 },
241 { /* symbolic link */ 267 [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
268 .type = ST_INODE,
242 .setup = xfs_scrub_setup_symlink, 269 .setup = xfs_scrub_setup_symlink,
243 .scrub = xfs_scrub_symlink, 270 .scrub = xfs_scrub_symlink,
244 }, 271 },
245 { /* parent pointers */ 272 [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */
273 .type = ST_INODE,
246 .setup = xfs_scrub_setup_parent, 274 .setup = xfs_scrub_setup_parent,
247 .scrub = xfs_scrub_parent, 275 .scrub = xfs_scrub_parent,
248 }, 276 },
249 { /* realtime bitmap */ 277 [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
278 .type = ST_FS,
250 .setup = xfs_scrub_setup_rt, 279 .setup = xfs_scrub_setup_rt,
251 .scrub = xfs_scrub_rtbitmap, 280 .scrub = xfs_scrub_rtbitmap,
252 .has = xfs_sb_version_hasrealtime, 281 .has = xfs_sb_version_hasrealtime,
253 }, 282 },
254 { /* realtime summary */ 283 [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
284 .type = ST_FS,
255 .setup = xfs_scrub_setup_rt, 285 .setup = xfs_scrub_setup_rt,
256 .scrub = xfs_scrub_rtsummary, 286 .scrub = xfs_scrub_rtsummary,
257 .has = xfs_sb_version_hasrealtime, 287 .has = xfs_sb_version_hasrealtime,
258 }, 288 },
259 { /* user quota */ 289 [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
260 .setup = xfs_scrub_setup_quota, 290 .type = ST_FS,
261 .scrub = xfs_scrub_quota, 291 .setup = xfs_scrub_setup_quota,
292 .scrub = xfs_scrub_quota,
262 }, 293 },
263 { /* group quota */ 294 [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */
264 .setup = xfs_scrub_setup_quota, 295 .type = ST_FS,
265 .scrub = xfs_scrub_quota, 296 .setup = xfs_scrub_setup_quota,
297 .scrub = xfs_scrub_quota,
266 }, 298 },
267 { /* project quota */ 299 [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */
268 .setup = xfs_scrub_setup_quota, 300 .type = ST_FS,
269 .scrub = xfs_scrub_quota, 301 .setup = xfs_scrub_setup_quota,
302 .scrub = xfs_scrub_quota,
270 }, 303 },
271}; 304};
272 305
@@ -284,44 +317,56 @@ xfs_scrub_experimental_warning(
284"EXPERIMENTAL online scrub feature in use. Use at your own risk!"); 317"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
285} 318}
286 319
287/* Dispatch metadata scrubbing. */ 320static int
288int 321xfs_scrub_validate_inputs(
289xfs_scrub_metadata( 322 struct xfs_mount *mp,
290 struct xfs_inode *ip,
291 struct xfs_scrub_metadata *sm) 323 struct xfs_scrub_metadata *sm)
292{ 324{
293 struct xfs_scrub_context sc; 325 int error;
294 struct xfs_mount *mp = ip->i_mount;
295 const struct xfs_scrub_meta_ops *ops; 326 const struct xfs_scrub_meta_ops *ops;
296 bool try_harder = false;
297 int error = 0;
298
299 trace_xfs_scrub_start(ip, sm, error);
300
301 /* Forbidden if we are shut down or mounted norecovery. */
302 error = -ESHUTDOWN;
303 if (XFS_FORCED_SHUTDOWN(mp))
304 goto out;
305 error = -ENOTRECOVERABLE;
306 if (mp->m_flags & XFS_MOUNT_NORECOVERY)
307 goto out;
308 327
309 /* Check our inputs. */
310 error = -EINVAL; 328 error = -EINVAL;
329 /* Check our inputs. */
311 sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 330 sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
312 if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) 331 if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
313 goto out; 332 goto out;
333 /* sm_reserved[] must be zero */
314 if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) 334 if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
315 goto out; 335 goto out;
316 336
317 /* Do we know about this type of metadata? */
318 error = -ENOENT; 337 error = -ENOENT;
338 /* Do we know about this type of metadata? */
319 if (sm->sm_type >= XFS_SCRUB_TYPE_NR) 339 if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
320 goto out; 340 goto out;
321 ops = &meta_scrub_ops[sm->sm_type]; 341 ops = &meta_scrub_ops[sm->sm_type];
322 if (ops->scrub == NULL) 342 if (ops->setup == NULL || ops->scrub == NULL)
323 goto out; 343 goto out;
344 /* Does this fs even support this type of metadata? */
345 if (ops->has && !ops->has(&mp->m_sb))
346 goto out;
347
348 error = -EINVAL;
349 /* restricting fields must be appropriate for type */
350 switch (ops->type) {
351 case ST_NONE:
352 case ST_FS:
353 if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
354 goto out;
355 break;
356 case ST_PERAG:
357 if (sm->sm_ino || sm->sm_gen ||
358 sm->sm_agno >= mp->m_sb.sb_agcount)
359 goto out;
360 break;
361 case ST_INODE:
362 if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
363 goto out;
364 break;
365 default:
366 goto out;
367 }
324 368
369 error = -EOPNOTSUPP;
325 /* 370 /*
326 * We won't scrub any filesystem that doesn't have the ability 371 * We won't scrub any filesystem that doesn't have the ability
327 * to record unwritten extents. The option was made default in 372 * to record unwritten extents. The option was made default in
@@ -331,20 +376,46 @@ xfs_scrub_metadata(
331 * We also don't support v1-v3 filesystems, which aren't 376 * We also don't support v1-v3 filesystems, which aren't
332 * mountable. 377 * mountable.
333 */ 378 */
334 error = -EOPNOTSUPP;
335 if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 379 if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
336 goto out; 380 goto out;
337 381
338 /* Does this fs even support this type of metadata? */
339 error = -ENOENT;
340 if (ops->has && !ops->has(&mp->m_sb))
341 goto out;
342
343 /* We don't know how to repair anything yet. */ 382 /* We don't know how to repair anything yet. */
344 error = -EOPNOTSUPP;
345 if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 383 if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
346 goto out; 384 goto out;
347 385
386 error = 0;
387out:
388 return error;
389}
390
391/* Dispatch metadata scrubbing. */
392int
393xfs_scrub_metadata(
394 struct xfs_inode *ip,
395 struct xfs_scrub_metadata *sm)
396{
397 struct xfs_scrub_context sc;
398 struct xfs_mount *mp = ip->i_mount;
399 bool try_harder = false;
400 int error = 0;
401
402 BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
403 (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR));
404
405 trace_xfs_scrub_start(ip, sm, error);
406
407 /* Forbidden if we are shut down or mounted norecovery. */
408 error = -ESHUTDOWN;
409 if (XFS_FORCED_SHUTDOWN(mp))
410 goto out;
411 error = -ENOTRECOVERABLE;
412 if (mp->m_flags & XFS_MOUNT_NORECOVERY)
413 goto out;
414
415 error = xfs_scrub_validate_inputs(mp, sm);
416 if (error)
417 goto out;
418
348 xfs_scrub_experimental_warning(mp); 419 xfs_scrub_experimental_warning(mp);
349 420
350retry_op: 421retry_op:
@@ -352,7 +423,7 @@ retry_op:
352 memset(&sc, 0, sizeof(sc)); 423 memset(&sc, 0, sizeof(sc));
353 sc.mp = ip->i_mount; 424 sc.mp = ip->i_mount;
354 sc.sm = sm; 425 sc.sm = sm;
355 sc.ops = ops; 426 sc.ops = &meta_scrub_ops[sm->sm_type];
356 sc.try_harder = try_harder; 427 sc.try_harder = try_harder;
357 sc.sa.agno = NULLAGNUMBER; 428 sc.sa.agno = NULLAGNUMBER;
358 error = sc.ops->setup(&sc, ip); 429 error = sc.ops->setup(&sc, ip);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e9ec041cf713..0d92af86f67a 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -22,6 +22,14 @@
22 22
23struct xfs_scrub_context; 23struct xfs_scrub_context;
24 24
25/* Type info and names for the scrub types. */
26enum xfs_scrub_type {
27 ST_NONE = 1, /* disabled */
28 ST_PERAG, /* per-AG metadata */
29 ST_FS, /* per-FS metadata */
30 ST_INODE, /* per-inode metadata */
31};
32
25struct xfs_scrub_meta_ops { 33struct xfs_scrub_meta_ops {
26 /* Acquire whatever resources are needed for the operation. */ 34 /* Acquire whatever resources are needed for the operation. */
27 int (*setup)(struct xfs_scrub_context *, 35 int (*setup)(struct xfs_scrub_context *,
@@ -32,6 +40,9 @@ struct xfs_scrub_meta_ops {
32 40
33 /* Decide if we even have this piece of metadata. */ 41 /* Decide if we even have this piece of metadata. */
34 bool (*has)(struct xfs_sb *); 42 bool (*has)(struct xfs_sb *);
43
44 /* type describing required/allowed inputs */
45 enum xfs_scrub_type type;
35}; 46};
36 47
37/* Buffer pointers and btree cursors for an entire AG. */ 48/* Buffer pointers and btree cursors for an entire AG. */
@@ -112,4 +123,30 @@ xfs_scrub_quota(struct xfs_scrub_context *sc)
112} 123}
113#endif 124#endif
114 125
126/* cross-referencing helpers */
127void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc,
128 xfs_agblock_t agbno, xfs_extlen_t len);
129void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc,
130 xfs_agblock_t agbno, xfs_extlen_t len);
131void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc,
132 xfs_agblock_t agbno, xfs_extlen_t len);
133void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc,
134 xfs_agblock_t agbno, xfs_extlen_t len,
135 struct xfs_owner_info *oinfo);
136void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc,
137 xfs_agblock_t agbno, xfs_extlen_t len,
138 struct xfs_owner_info *oinfo);
139void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc,
140 xfs_agblock_t agbno, xfs_extlen_t len);
141void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc,
142 xfs_agblock_t bno, xfs_extlen_t len);
143void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc,
144 xfs_agblock_t bno, xfs_extlen_t len);
145#ifdef CONFIG_XFS_RT
146void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc,
147 xfs_rtblock_t rtbno, xfs_extlen_t len);
148#else
149# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
150#endif
151
115#endif /* __XFS_SCRUB_SCRUB_H__ */ 152#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index c4ebfb5c1ee8..4dc896852bf0 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
50 __entry->flags = sm->sm_flags; 50 __entry->flags = sm->sm_flags;
51 __entry->error = error; 51 __entry->error = error;
52 ), 52 ),
53 TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d", 53 TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d",
54 MAJOR(__entry->dev), MINOR(__entry->dev), 54 MAJOR(__entry->dev), MINOR(__entry->dev),
55 __entry->ino, 55 __entry->ino,
56 __entry->type, 56 __entry->type,
@@ -90,7 +90,7 @@ TRACE_EVENT(xfs_scrub_op_error,
90 __entry->error = error; 90 __entry->error = error;
91 __entry->ret_ip = ret_ip; 91 __entry->ret_ip = ret_ip;
92 ), 92 ),
93 TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF", 93 TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS",
94 MAJOR(__entry->dev), MINOR(__entry->dev), 94 MAJOR(__entry->dev), MINOR(__entry->dev),
95 __entry->type, 95 __entry->type,
96 __entry->agno, 96 __entry->agno,
@@ -121,7 +121,7 @@ TRACE_EVENT(xfs_scrub_file_op_error,
121 __entry->error = error; 121 __entry->error = error;
122 __entry->ret_ip = ret_ip; 122 __entry->ret_ip = ret_ip;
123 ), 123 ),
124 TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF", 124 TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS",
125 MAJOR(__entry->dev), MINOR(__entry->dev), 125 MAJOR(__entry->dev), MINOR(__entry->dev),
126 __entry->ino, 126 __entry->ino,
127 __entry->whichfork, 127 __entry->whichfork,
@@ -156,7 +156,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
156 __entry->bno = bno; 156 __entry->bno = bno;
157 __entry->ret_ip = ret_ip; 157 __entry->ret_ip = ret_ip;
158 ), 158 ),
159 TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF", 159 TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS",
160 MAJOR(__entry->dev), MINOR(__entry->dev), 160 MAJOR(__entry->dev), MINOR(__entry->dev),
161 __entry->type, 161 __entry->type,
162 __entry->agno, 162 __entry->agno,
@@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
207 __entry->bno = bno; 207 __entry->bno = bno;
208 __entry->ret_ip = ret_ip; 208 __entry->ret_ip = ret_ip;
209 ), 209 ),
210 TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF", 210 TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS",
211 MAJOR(__entry->dev), MINOR(__entry->dev), 211 MAJOR(__entry->dev), MINOR(__entry->dev),
212 __entry->ino, 212 __entry->ino,
213 __entry->type, 213 __entry->type,
@@ -246,7 +246,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
246 __entry->offset = offset; 246 __entry->offset = offset;
247 __entry->ret_ip = ret_ip; 247 __entry->ret_ip = ret_ip;
248 ), 248 ),
249 TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF", 249 TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS",
250 MAJOR(__entry->dev), MINOR(__entry->dev), 250 MAJOR(__entry->dev), MINOR(__entry->dev),
251 __entry->ino, 251 __entry->ino,
252 __entry->whichfork, 252 __entry->whichfork,
@@ -277,7 +277,7 @@ TRACE_EVENT(xfs_scrub_incomplete,
277 __entry->type = sc->sm->sm_type; 277 __entry->type = sc->sm->sm_type;
278 __entry->ret_ip = ret_ip; 278 __entry->ret_ip = ret_ip;
279 ), 279 ),
280 TP_printk("dev %d:%d type %u ret_ip %pF", 280 TP_printk("dev %d:%d type %u ret_ip %pS",
281 MAJOR(__entry->dev), MINOR(__entry->dev), 281 MAJOR(__entry->dev), MINOR(__entry->dev),
282 __entry->type, 282 __entry->type,
283 __entry->ret_ip) 283 __entry->ret_ip)
@@ -311,7 +311,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error,
311 __entry->error = error; 311 __entry->error = error;
312 __entry->ret_ip = ret_ip; 312 __entry->ret_ip = ret_ip;
313 ), 313 ),
314 TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 314 TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
315 MAJOR(__entry->dev), MINOR(__entry->dev), 315 MAJOR(__entry->dev), MINOR(__entry->dev),
316 __entry->type, 316 __entry->type,
317 __entry->btnum, 317 __entry->btnum,
@@ -354,7 +354,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
354 __entry->error = error; 354 __entry->error = error;
355 __entry->ret_ip = ret_ip; 355 __entry->ret_ip = ret_ip;
356 ), 356 ),
357 TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF", 357 TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
358 MAJOR(__entry->dev), MINOR(__entry->dev), 358 MAJOR(__entry->dev), MINOR(__entry->dev),
359 __entry->ino, 359 __entry->ino,
360 __entry->whichfork, 360 __entry->whichfork,
@@ -393,7 +393,7 @@ TRACE_EVENT(xfs_scrub_btree_error,
393 __entry->ptr = cur->bc_ptrs[level]; 393 __entry->ptr = cur->bc_ptrs[level];
394 __entry->ret_ip = ret_ip; 394 __entry->ret_ip = ret_ip;
395 ), 395 ),
396 TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 396 TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
397 MAJOR(__entry->dev), MINOR(__entry->dev), 397 MAJOR(__entry->dev), MINOR(__entry->dev),
398 __entry->type, 398 __entry->type,
399 __entry->btnum, 399 __entry->btnum,
@@ -433,7 +433,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error,
433 __entry->ptr = cur->bc_ptrs[level]; 433 __entry->ptr = cur->bc_ptrs[level];
434 __entry->ret_ip = ret_ip; 434 __entry->ret_ip = ret_ip;
435 ), 435 ),
436 TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF", 436 TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
437 MAJOR(__entry->dev), MINOR(__entry->dev), 437 MAJOR(__entry->dev), MINOR(__entry->dev),
438 __entry->ino, 438 __entry->ino,
439 __entry->whichfork, 439 __entry->whichfork,
@@ -491,6 +491,28 @@ DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
491DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec); 491DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
492DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key); 492DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
493 493
494TRACE_EVENT(xfs_scrub_xref_error,
495 TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip),
496 TP_ARGS(sc, error, ret_ip),
497 TP_STRUCT__entry(
498 __field(dev_t, dev)
499 __field(int, type)
500 __field(int, error)
501 __field(void *, ret_ip)
502 ),
503 TP_fast_assign(
504 __entry->dev = sc->mp->m_super->s_dev;
505 __entry->type = sc->sm->sm_type;
506 __entry->error = error;
507 __entry->ret_ip = ret_ip;
508 ),
509 TP_printk("dev %d:%d type %u xref error %d ret_ip %pF",
510 MAJOR(__entry->dev), MINOR(__entry->dev),
511 __entry->type,
512 __entry->error,
513 __entry->ret_ip)
514);
515
494#endif /* _TRACE_XFS_SCRUB_TRACE_H */ 516#endif /* _TRACE_XFS_SCRUB_TRACE_H */
495 517
496#undef TRACE_INCLUDE_PATH 518#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4fc526a27a94..9c6a830da0ee 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -390,6 +390,19 @@ xfs_map_blocks(
390 if (XFS_FORCED_SHUTDOWN(mp)) 390 if (XFS_FORCED_SHUTDOWN(mp))
391 return -EIO; 391 return -EIO;
392 392
393 /*
394 * Truncate can race with writeback since writeback doesn't take the
395 * iolock and truncate decreases the file size before it starts
396 * truncating the pages between new_size and old_size. Therefore, we
397 * can end up in the situation where writeback gets a CoW fork mapping
398 * but the truncate makes the mapping invalid and we end up in here
399 * trying to get a new mapping. Bail out here so that we simply never
400 * get a valid mapping and so we drop the write altogether. The page
401 * truncation will kill the contents anyway.
402 */
403 if (type == XFS_IO_COW && offset > i_size_read(inode))
404 return 0;
405
393 ASSERT(type != XFS_IO_COW); 406 ASSERT(type != XFS_IO_COW);
394 if (type == XFS_IO_UNWRITTEN) 407 if (type == XFS_IO_UNWRITTEN)
395 bmapi_flags |= XFS_BMAPI_IGSTATE; 408 bmapi_flags |= XFS_BMAPI_IGSTATE;
@@ -791,7 +804,7 @@ xfs_aops_discard_page(
791 goto out_invalidate; 804 goto out_invalidate;
792 805
793 xfs_alert(ip->i_mount, 806 xfs_alert(ip->i_mount,
794 "page discard on page %p, inode 0x%llx, offset %llu.", 807 "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
795 page, ip->i_ino, offset); 808 page, ip->i_ino, offset);
796 809
797 xfs_ilock(ip, XFS_ILOCK_EXCL); 810 xfs_ilock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 6d37ab43195f..c83f549dc17b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1872,7 +1872,7 @@ xfs_swap_extents(
1872 */ 1872 */
1873 lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 1873 lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1874 lock_flags = XFS_MMAPLOCK_EXCL; 1874 lock_flags = XFS_MMAPLOCK_EXCL;
1875 xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); 1875 xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL);
1876 1876
1877 /* Verify that both files have the same format */ 1877 /* Verify that both files have the same format */
1878 if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { 1878 if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
@@ -1919,7 +1919,7 @@ xfs_swap_extents(
1919 * Lock and join the inodes to the tansaction so that transaction commit 1919 * Lock and join the inodes to the tansaction so that transaction commit
1920 * or cancel will unlock the inodes from this point onwards. 1920 * or cancel will unlock the inodes from this point onwards.
1921 */ 1921 */
1922 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1922 xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
1923 lock_flags |= XFS_ILOCK_EXCL; 1923 lock_flags |= XFS_ILOCK_EXCL;
1924 xfs_trans_ijoin(tp, ip, 0); 1924 xfs_trans_ijoin(tp, ip, 0);
1925 xfs_trans_ijoin(tp, tip, 0); 1925 xfs_trans_ijoin(tp, tip, 0);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4c6e86d861fd..d1da2ee9e6db 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -236,6 +236,7 @@ _xfs_buf_alloc(
236 init_completion(&bp->b_iowait); 236 init_completion(&bp->b_iowait);
237 INIT_LIST_HEAD(&bp->b_lru); 237 INIT_LIST_HEAD(&bp->b_lru);
238 INIT_LIST_HEAD(&bp->b_list); 238 INIT_LIST_HEAD(&bp->b_list);
239 INIT_LIST_HEAD(&bp->b_li_list);
239 sema_init(&bp->b_sema, 0); /* held, no waiters */ 240 sema_init(&bp->b_sema, 0); /* held, no waiters */
240 spin_lock_init(&bp->b_lock); 241 spin_lock_init(&bp->b_lock);
241 XB_SET_OWNER(bp); 242 XB_SET_OWNER(bp);
@@ -585,7 +586,7 @@ _xfs_buf_find(
585 * returning a specific error on buffer lookup failures. 586 * returning a specific error on buffer lookup failures.
586 */ 587 */
587 xfs_alert(btp->bt_mount, 588 xfs_alert(btp->bt_mount,
588 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", 589 "%s: daddr 0x%llx out of range, EOFS 0x%llx",
589 __func__, cmap.bm_bn, eofs); 590 __func__, cmap.bm_bn, eofs);
590 WARN_ON(1); 591 WARN_ON(1);
591 return NULL; 592 return NULL;
@@ -1180,13 +1181,14 @@ xfs_buf_ioend_async(
1180} 1181}
1181 1182
1182void 1183void
1183xfs_buf_ioerror( 1184__xfs_buf_ioerror(
1184 xfs_buf_t *bp, 1185 xfs_buf_t *bp,
1185 int error) 1186 int error,
1187 xfs_failaddr_t failaddr)
1186{ 1188{
1187 ASSERT(error <= 0 && error >= -1000); 1189 ASSERT(error <= 0 && error >= -1000);
1188 bp->b_error = error; 1190 bp->b_error = error;
1189 trace_xfs_buf_ioerror(bp, error, _RET_IP_); 1191 trace_xfs_buf_ioerror(bp, error, failaddr);
1190} 1192}
1191 1193
1192void 1194void
@@ -1195,8 +1197,9 @@ xfs_buf_ioerror_alert(
1195 const char *func) 1197 const char *func)
1196{ 1198{
1197 xfs_alert(bp->b_target->bt_mount, 1199 xfs_alert(bp->b_target->bt_mount,
1198"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", 1200"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
1199 (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length); 1201 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
1202 -bp->b_error);
1200} 1203}
1201 1204
1202int 1205int
@@ -1378,9 +1381,10 @@ _xfs_buf_ioapply(
1378 */ 1381 */
1379 if (xfs_sb_version_hascrc(&mp->m_sb)) { 1382 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1380 xfs_warn(mp, 1383 xfs_warn(mp,
1381 "%s: no ops on block 0x%llx/0x%x", 1384 "%s: no buf ops on daddr 0x%llx len %d",
1382 __func__, bp->b_bn, bp->b_length); 1385 __func__, bp->b_bn, bp->b_length);
1383 xfs_hex_dump(bp->b_addr, 64); 1386 xfs_hex_dump(bp->b_addr,
1387 XFS_CORRUPTION_DUMP_LEN);
1384 dump_stack(); 1388 dump_stack();
1385 } 1389 }
1386 } 1390 }
@@ -1671,7 +1675,7 @@ xfs_wait_buftarg(
1671 list_del_init(&bp->b_lru); 1675 list_del_init(&bp->b_lru);
1672 if (bp->b_flags & XBF_WRITE_FAIL) { 1676 if (bp->b_flags & XBF_WRITE_FAIL) {
1673 xfs_alert(btp->bt_mount, 1677 xfs_alert(btp->bt_mount,
1674"Corruption Alert: Buffer at block 0x%llx had permanent write failures!", 1678"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
1675 (long long)bp->b_bn); 1679 (long long)bp->b_bn);
1676 xfs_alert(btp->bt_mount, 1680 xfs_alert(btp->bt_mount,
1677"Please run xfs_repair to determine the extent of the problem."); 1681"Please run xfs_repair to determine the extent of the problem.");
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index f873bb786824..2f4c91452861 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -140,6 +140,7 @@ struct xfs_buf_ops {
140 char *name; 140 char *name;
141 void (*verify_read)(struct xfs_buf *); 141 void (*verify_read)(struct xfs_buf *);
142 void (*verify_write)(struct xfs_buf *); 142 void (*verify_write)(struct xfs_buf *);
143 xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp);
143}; 144};
144 145
145typedef struct xfs_buf { 146typedef struct xfs_buf {
@@ -175,7 +176,8 @@ typedef struct xfs_buf {
175 struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ 176 struct workqueue_struct *b_ioend_wq; /* I/O completion wq */
176 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 177 xfs_buf_iodone_t b_iodone; /* I/O completion function */
177 struct completion b_iowait; /* queue for I/O waiters */ 178 struct completion b_iowait; /* queue for I/O waiters */
178 void *b_fspriv; 179 void *b_log_item;
180 struct list_head b_li_list; /* Log items list head */
179 struct xfs_trans *b_transp; 181 struct xfs_trans *b_transp;
180 struct page **b_pages; /* array of page pointers */ 182 struct page **b_pages; /* array of page pointers */
181 struct page *b_page_array[XB_PAGES]; /* inline pages */ 183 struct page *b_page_array[XB_PAGES]; /* inline pages */
@@ -315,7 +317,9 @@ extern void xfs_buf_unlock(xfs_buf_t *);
315/* Buffer Read and Write Routines */ 317/* Buffer Read and Write Routines */
316extern int xfs_bwrite(struct xfs_buf *bp); 318extern int xfs_bwrite(struct xfs_buf *bp);
317extern void xfs_buf_ioend(struct xfs_buf *bp); 319extern void xfs_buf_ioend(struct xfs_buf *bp);
318extern void xfs_buf_ioerror(xfs_buf_t *, int); 320extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
321 xfs_failaddr_t failaddr);
322#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
319extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); 323extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
320extern void xfs_buf_submit(struct xfs_buf *bp); 324extern void xfs_buf_submit(struct xfs_buf *bp);
321extern int xfs_buf_submit_wait(struct xfs_buf *bp); 325extern int xfs_buf_submit_wait(struct xfs_buf *bp);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index e0a0af0946f2..270ddb4d2313 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -61,14 +61,14 @@ xfs_buf_log_format_size(
61 */ 61 */
62STATIC void 62STATIC void
63xfs_buf_item_size_segment( 63xfs_buf_item_size_segment(
64 struct xfs_buf_log_item *bip, 64 struct xfs_buf_log_item *bip,
65 struct xfs_buf_log_format *blfp, 65 struct xfs_buf_log_format *blfp,
66 int *nvecs, 66 int *nvecs,
67 int *nbytes) 67 int *nbytes)
68{ 68{
69 struct xfs_buf *bp = bip->bli_buf; 69 struct xfs_buf *bp = bip->bli_buf;
70 int next_bit; 70 int next_bit;
71 int last_bit; 71 int last_bit;
72 72
73 last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); 73 last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
74 if (last_bit == -1) 74 if (last_bit == -1)
@@ -218,12 +218,12 @@ xfs_buf_item_format_segment(
218 uint offset, 218 uint offset,
219 struct xfs_buf_log_format *blfp) 219 struct xfs_buf_log_format *blfp)
220{ 220{
221 struct xfs_buf *bp = bip->bli_buf; 221 struct xfs_buf *bp = bip->bli_buf;
222 uint base_size; 222 uint base_size;
223 int first_bit; 223 int first_bit;
224 int last_bit; 224 int last_bit;
225 int next_bit; 225 int next_bit;
226 uint nbits; 226 uint nbits;
227 227
228 /* copy the flags across from the base format item */ 228 /* copy the flags across from the base format item */
229 blfp->blf_flags = bip->__bli_format.blf_flags; 229 blfp->blf_flags = bip->__bli_format.blf_flags;
@@ -406,12 +406,12 @@ xfs_buf_item_unpin(
406 int remove) 406 int remove)
407{ 407{
408 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 408 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
409 xfs_buf_t *bp = bip->bli_buf; 409 xfs_buf_t *bp = bip->bli_buf;
410 struct xfs_ail *ailp = lip->li_ailp; 410 struct xfs_ail *ailp = lip->li_ailp;
411 int stale = bip->bli_flags & XFS_BLI_STALE; 411 int stale = bip->bli_flags & XFS_BLI_STALE;
412 int freed; 412 int freed;
413 413
414 ASSERT(bp->b_fspriv == bip); 414 ASSERT(bp->b_log_item == bip);
415 ASSERT(atomic_read(&bip->bli_refcount) > 0); 415 ASSERT(atomic_read(&bip->bli_refcount) > 0);
416 416
417 trace_xfs_buf_item_unpin(bip); 417 trace_xfs_buf_item_unpin(bip);
@@ -456,13 +456,14 @@ xfs_buf_item_unpin(
456 */ 456 */
457 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 457 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
458 xfs_buf_do_callbacks(bp); 458 xfs_buf_do_callbacks(bp);
459 bp->b_fspriv = NULL; 459 bp->b_log_item = NULL;
460 list_del_init(&bp->b_li_list);
460 bp->b_iodone = NULL; 461 bp->b_iodone = NULL;
461 } else { 462 } else {
462 spin_lock(&ailp->xa_lock); 463 spin_lock(&ailp->xa_lock);
463 xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); 464 xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
464 xfs_buf_item_relse(bp); 465 xfs_buf_item_relse(bp);
465 ASSERT(bp->b_fspriv == NULL); 466 ASSERT(bp->b_log_item == NULL);
466 } 467 }
467 xfs_buf_relse(bp); 468 xfs_buf_relse(bp);
468 } else if (freed && remove) { 469 } else if (freed && remove) {
@@ -722,18 +723,15 @@ xfs_buf_item_free_format(
722 723
723/* 724/*
724 * Allocate a new buf log item to go with the given buffer. 725 * Allocate a new buf log item to go with the given buffer.
725 * Set the buffer's b_fsprivate field to point to the new 726 * Set the buffer's b_log_item field to point to the new
726 * buf log item. If there are other item's attached to the 727 * buf log item.
727 * buffer (see xfs_buf_attach_iodone() below), then put the
728 * buf log item at the front.
729 */ 728 */
730int 729int
731xfs_buf_item_init( 730xfs_buf_item_init(
732 struct xfs_buf *bp, 731 struct xfs_buf *bp,
733 struct xfs_mount *mp) 732 struct xfs_mount *mp)
734{ 733{
735 struct xfs_log_item *lip = bp->b_fspriv; 734 struct xfs_buf_log_item *bip = bp->b_log_item;
736 struct xfs_buf_log_item *bip;
737 int chunks; 735 int chunks;
738 int map_size; 736 int map_size;
739 int error; 737 int error;
@@ -741,13 +739,14 @@ xfs_buf_item_init(
741 739
742 /* 740 /*
743 * Check to see if there is already a buf log item for 741 * Check to see if there is already a buf log item for
744 * this buffer. If there is, it is guaranteed to be 742 * this buffer. If we do already have one, there is
745 * the first. If we do already have one, there is
746 * nothing to do here so return. 743 * nothing to do here so return.
747 */ 744 */
748 ASSERT(bp->b_target->bt_mount == mp); 745 ASSERT(bp->b_target->bt_mount == mp);
749 if (lip != NULL && lip->li_type == XFS_LI_BUF) 746 if (bip != NULL) {
747 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
750 return 0; 748 return 0;
749 }
751 750
752 bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); 751 bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
753 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); 752 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
@@ -781,13 +780,7 @@ xfs_buf_item_init(
781 bip->bli_formats[i].blf_map_size = map_size; 780 bip->bli_formats[i].blf_map_size = map_size;
782 } 781 }
783 782
784 /* 783 bp->b_log_item = bip;
785 * Put the buf item into the list of items attached to the
786 * buffer at the front.
787 */
788 if (bp->b_fspriv)
789 bip->bli_item.li_bio_list = bp->b_fspriv;
790 bp->b_fspriv = bip;
791 xfs_buf_hold(bp); 784 xfs_buf_hold(bp);
792 return 0; 785 return 0;
793} 786}
@@ -880,7 +873,7 @@ xfs_buf_item_log_segment(
880 */ 873 */
881void 874void
882xfs_buf_item_log( 875xfs_buf_item_log(
883 xfs_buf_log_item_t *bip, 876 struct xfs_buf_log_item *bip,
884 uint first, 877 uint first,
885 uint last) 878 uint last)
886{ 879{
@@ -943,7 +936,7 @@ xfs_buf_item_dirty_format(
943 936
944STATIC void 937STATIC void
945xfs_buf_item_free( 938xfs_buf_item_free(
946 xfs_buf_log_item_t *bip) 939 struct xfs_buf_log_item *bip)
947{ 940{
948 xfs_buf_item_free_format(bip); 941 xfs_buf_item_free_format(bip);
949 kmem_free(bip->bli_item.li_lv_shadow); 942 kmem_free(bip->bli_item.li_lv_shadow);
@@ -961,13 +954,13 @@ void
961xfs_buf_item_relse( 954xfs_buf_item_relse(
962 xfs_buf_t *bp) 955 xfs_buf_t *bp)
963{ 956{
964 xfs_buf_log_item_t *bip = bp->b_fspriv; 957 struct xfs_buf_log_item *bip = bp->b_log_item;
965 958
966 trace_xfs_buf_item_relse(bp, _RET_IP_); 959 trace_xfs_buf_item_relse(bp, _RET_IP_);
967 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 960 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
968 961
969 bp->b_fspriv = bip->bli_item.li_bio_list; 962 bp->b_log_item = NULL;
970 if (bp->b_fspriv == NULL) 963 if (list_empty(&bp->b_li_list))
971 bp->b_iodone = NULL; 964 bp->b_iodone = NULL;
972 965
973 xfs_buf_rele(bp); 966 xfs_buf_rele(bp);
@@ -980,9 +973,7 @@ xfs_buf_item_relse(
980 * to be called when the buffer's I/O completes. If it is not set 973 * to be called when the buffer's I/O completes. If it is not set
981 * already, set the buffer's b_iodone() routine to be 974 * already, set the buffer's b_iodone() routine to be
982 * xfs_buf_iodone_callbacks() and link the log item into the list of 975 * xfs_buf_iodone_callbacks() and link the log item into the list of
983 * items rooted at b_fsprivate. Items are always added as the second 976 * items rooted at b_li_list.
984 * entry in the list if there is a first, because the buf item code
985 * assumes that the buf log item is first.
986 */ 977 */
987void 978void
988xfs_buf_attach_iodone( 979xfs_buf_attach_iodone(
@@ -990,18 +981,10 @@ xfs_buf_attach_iodone(
990 void (*cb)(xfs_buf_t *, xfs_log_item_t *), 981 void (*cb)(xfs_buf_t *, xfs_log_item_t *),
991 xfs_log_item_t *lip) 982 xfs_log_item_t *lip)
992{ 983{
993 xfs_log_item_t *head_lip;
994
995 ASSERT(xfs_buf_islocked(bp)); 984 ASSERT(xfs_buf_islocked(bp));
996 985
997 lip->li_cb = cb; 986 lip->li_cb = cb;
998 head_lip = bp->b_fspriv; 987 list_add_tail(&lip->li_bio_list, &bp->b_li_list);
999 if (head_lip) {
1000 lip->li_bio_list = head_lip->li_bio_list;
1001 head_lip->li_bio_list = lip;
1002 } else {
1003 bp->b_fspriv = lip;
1004 }
1005 988
1006 ASSERT(bp->b_iodone == NULL || 989 ASSERT(bp->b_iodone == NULL ||
1007 bp->b_iodone == xfs_buf_iodone_callbacks); 990 bp->b_iodone == xfs_buf_iodone_callbacks);
@@ -1011,12 +994,12 @@ xfs_buf_attach_iodone(
1011/* 994/*
1012 * We can have many callbacks on a buffer. Running the callbacks individually 995 * We can have many callbacks on a buffer. Running the callbacks individually
1013 * can cause a lot of contention on the AIL lock, so we allow for a single 996 * can cause a lot of contention on the AIL lock, so we allow for a single
1014 * callback to be able to scan the remaining lip->li_bio_list for other items 997 * callback to be able to scan the remaining items in bp->b_li_list for other
1015 * of the same type and callback to be processed in the first call. 998 * items of the same type and callback to be processed in the first call.
1016 * 999 *
1017 * As a result, the loop walking the callback list below will also modify the 1000 * As a result, the loop walking the callback list below will also modify the
1018 * list. it removes the first item from the list and then runs the callback. 1001 * list. it removes the first item from the list and then runs the callback.
1019 * The loop then restarts from the new head of the list. This allows the 1002 * The loop then restarts from the new first item int the list. This allows the
1020 * callback to scan and modify the list attached to the buffer and we don't 1003 * callback to scan and modify the list attached to the buffer and we don't
1021 * have to care about maintaining a next item pointer. 1004 * have to care about maintaining a next item pointer.
1022 */ 1005 */
@@ -1024,18 +1007,26 @@ STATIC void
1024xfs_buf_do_callbacks( 1007xfs_buf_do_callbacks(
1025 struct xfs_buf *bp) 1008 struct xfs_buf *bp)
1026{ 1009{
1010 struct xfs_buf_log_item *blip = bp->b_log_item;
1027 struct xfs_log_item *lip; 1011 struct xfs_log_item *lip;
1028 1012
1029 while ((lip = bp->b_fspriv) != NULL) { 1013 /* If there is a buf_log_item attached, run its callback */
1030 bp->b_fspriv = lip->li_bio_list; 1014 if (blip) {
1031 ASSERT(lip->li_cb != NULL); 1015 lip = &blip->bli_item;
1016 lip->li_cb(bp, lip);
1017 }
1018
1019 while (!list_empty(&bp->b_li_list)) {
1020 lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
1021 li_bio_list);
1022
1032 /* 1023 /*
1033 * Clear the next pointer so we don't have any 1024 * Remove the item from the list, so we don't have any
1034 * confusion if the item is added to another buf. 1025 * confusion if the item is added to another buf.
1035 * Don't touch the log item after calling its 1026 * Don't touch the log item after calling its
1036 * callback, because it could have freed itself. 1027 * callback, because it could have freed itself.
1037 */ 1028 */
1038 lip->li_bio_list = NULL; 1029 list_del_init(&lip->li_bio_list);
1039 lip->li_cb(bp, lip); 1030 lip->li_cb(bp, lip);
1040 } 1031 }
1041} 1032}
@@ -1052,13 +1043,22 @@ STATIC void
1052xfs_buf_do_callbacks_fail( 1043xfs_buf_do_callbacks_fail(
1053 struct xfs_buf *bp) 1044 struct xfs_buf *bp)
1054{ 1045{
1055 struct xfs_log_item *next; 1046 struct xfs_log_item *lip;
1056 struct xfs_log_item *lip = bp->b_fspriv; 1047 struct xfs_ail *ailp;
1057 struct xfs_ail *ailp = lip->li_ailp;
1058 1048
1049 /*
1050 * Buffer log item errors are handled directly by xfs_buf_item_push()
1051 * and xfs_buf_iodone_callback_error, and they have no IO error
1052 * callbacks. Check only for items in b_li_list.
1053 */
1054 if (list_empty(&bp->b_li_list))
1055 return;
1056
1057 lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
1058 li_bio_list);
1059 ailp = lip->li_ailp;
1059 spin_lock(&ailp->xa_lock); 1060 spin_lock(&ailp->xa_lock);
1060 for (; lip; lip = next) { 1061 list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
1061 next = lip->li_bio_list;
1062 if (lip->li_ops->iop_error) 1062 if (lip->li_ops->iop_error)
1063 lip->li_ops->iop_error(lip, bp); 1063 lip->li_ops->iop_error(lip, bp);
1064 } 1064 }
@@ -1069,13 +1069,23 @@ static bool
1069xfs_buf_iodone_callback_error( 1069xfs_buf_iodone_callback_error(
1070 struct xfs_buf *bp) 1070 struct xfs_buf *bp)
1071{ 1071{
1072 struct xfs_log_item *lip = bp->b_fspriv; 1072 struct xfs_buf_log_item *bip = bp->b_log_item;
1073 struct xfs_mount *mp = lip->li_mountp; 1073 struct xfs_log_item *lip;
1074 struct xfs_mount *mp;
1074 static ulong lasttime; 1075 static ulong lasttime;
1075 static xfs_buftarg_t *lasttarg; 1076 static xfs_buftarg_t *lasttarg;
1076 struct xfs_error_cfg *cfg; 1077 struct xfs_error_cfg *cfg;
1077 1078
1078 /* 1079 /*
1080 * The failed buffer might not have a buf_log_item attached or the
1081 * log_item list might be empty. Get the mp from the available
1082 * xfs_log_item
1083 */
1084 lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item,
1085 li_bio_list);
1086 mp = lip ? lip->li_mountp : bip->bli_item.li_mountp;
1087
1088 /*
1079 * If we've already decided to shutdown the filesystem because of 1089 * If we've already decided to shutdown the filesystem because of
1080 * I/O errors, there's no point in giving this a retry. 1090 * I/O errors, there's no point in giving this a retry.
1081 */ 1091 */
@@ -1183,7 +1193,8 @@ xfs_buf_iodone_callbacks(
1183 bp->b_first_retry_time = 0; 1193 bp->b_first_retry_time = 0;
1184 1194
1185 xfs_buf_do_callbacks(bp); 1195 xfs_buf_do_callbacks(bp);
1186 bp->b_fspriv = NULL; 1196 bp->b_log_item = NULL;
1197 list_del_init(&bp->b_li_list);
1187 bp->b_iodone = NULL; 1198 bp->b_iodone = NULL;
1188 xfs_buf_ioend(bp); 1199 xfs_buf_ioend(bp);
1189} 1200}
@@ -1228,10 +1239,9 @@ xfs_buf_iodone(
1228bool 1239bool
1229xfs_buf_resubmit_failed_buffers( 1240xfs_buf_resubmit_failed_buffers(
1230 struct xfs_buf *bp, 1241 struct xfs_buf *bp,
1231 struct xfs_log_item *lip,
1232 struct list_head *buffer_list) 1242 struct list_head *buffer_list)
1233{ 1243{
1234 struct xfs_log_item *next; 1244 struct xfs_log_item *lip;
1235 1245
1236 /* 1246 /*
1237 * Clear XFS_LI_FAILED flag from all items before resubmit 1247 * Clear XFS_LI_FAILED flag from all items before resubmit
@@ -1239,10 +1249,8 @@ xfs_buf_resubmit_failed_buffers(
1239 * XFS_LI_FAILED set/clear is protected by xa_lock, caller this 1249 * XFS_LI_FAILED set/clear is protected by xa_lock, caller this
1240 * function already have it acquired 1250 * function already have it acquired
1241 */ 1251 */
1242 for (; lip; lip = next) { 1252 list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
1243 next = lip->li_bio_list;
1244 xfs_clear_li_failed(lip); 1253 xfs_clear_li_failed(lip);
1245 }
1246 1254
1247 /* Add this buffer back to the delayed write list */ 1255 /* Add this buffer back to the delayed write list */
1248 return xfs_buf_delwri_queue(bp, buffer_list); 1256 return xfs_buf_delwri_queue(bp, buffer_list);
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 9690ce62c9a7..643f53dcfe51 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -50,7 +50,7 @@ struct xfs_buf_log_item;
50 * needed to log buffers. It tracks how many times the lock has been 50 * needed to log buffers. It tracks how many times the lock has been
51 * locked, and which 128 byte chunks of the buffer are dirty. 51 * locked, and which 128 byte chunks of the buffer are dirty.
52 */ 52 */
53typedef struct xfs_buf_log_item { 53struct xfs_buf_log_item {
54 xfs_log_item_t bli_item; /* common item structure */ 54 xfs_log_item_t bli_item; /* common item structure */
55 struct xfs_buf *bli_buf; /* real buffer pointer */ 55 struct xfs_buf *bli_buf; /* real buffer pointer */
56 unsigned int bli_flags; /* misc flags */ 56 unsigned int bli_flags; /* misc flags */
@@ -59,11 +59,11 @@ typedef struct xfs_buf_log_item {
59 int bli_format_count; /* count of headers */ 59 int bli_format_count; /* count of headers */
60 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ 60 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
61 struct xfs_buf_log_format __bli_format; /* embedded in-log header */ 61 struct xfs_buf_log_format __bli_format; /* embedded in-log header */
62} xfs_buf_log_item_t; 62};
63 63
64int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 64int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
65void xfs_buf_item_relse(struct xfs_buf *); 65void xfs_buf_item_relse(struct xfs_buf *);
66void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); 66void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
67bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 67bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
68void xfs_buf_attach_iodone(struct xfs_buf *, 68void xfs_buf_attach_iodone(struct xfs_buf *,
69 void(*)(struct xfs_buf *, xfs_log_item_t *), 69 void(*)(struct xfs_buf *, xfs_log_item_t *),
@@ -71,7 +71,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
71void xfs_buf_iodone_callbacks(struct xfs_buf *); 71void xfs_buf_iodone_callbacks(struct xfs_buf *);
72void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 72void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
73bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, 73bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
74 struct xfs_log_item *,
75 struct list_head *); 74 struct list_head *);
76 75
77extern kmem_zone_t *xfs_buf_item_zone; 76extern kmem_zone_t *xfs_buf_item_zone;
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 0c58918bc0ad..b6ae3597bfb0 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -152,7 +152,6 @@ xfs_dir2_block_getdents(
152 struct xfs_inode *dp = args->dp; /* incore directory inode */ 152 struct xfs_inode *dp = args->dp; /* incore directory inode */
153 xfs_dir2_data_hdr_t *hdr; /* block header */ 153 xfs_dir2_data_hdr_t *hdr; /* block header */
154 struct xfs_buf *bp; /* buffer for block */ 154 struct xfs_buf *bp; /* buffer for block */
155 xfs_dir2_block_tail_t *btp; /* block tail */
156 xfs_dir2_data_entry_t *dep; /* block data entry */ 155 xfs_dir2_data_entry_t *dep; /* block data entry */
157 xfs_dir2_data_unused_t *dup; /* block unused entry */ 156 xfs_dir2_data_unused_t *dup; /* block unused entry */
158 char *endptr; /* end of the data entries */ 157 char *endptr; /* end of the data entries */
@@ -185,9 +184,8 @@ xfs_dir2_block_getdents(
185 /* 184 /*
186 * Set up values for the loop. 185 * Set up values for the loop.
187 */ 186 */
188 btp = xfs_dir2_block_tail_p(geo, hdr);
189 ptr = (char *)dp->d_ops->data_entry_p(hdr); 187 ptr = (char *)dp->d_ops->data_entry_p(hdr);
190 endptr = (char *)xfs_dir2_block_leaf_p(btp); 188 endptr = xfs_dir3_data_endp(geo, hdr);
191 189
192 /* 190 /*
193 * Loop over the data portion of the block. 191 * Loop over the data portion of the block.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index f248708c10ff..43572f8a1b8e 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -399,52 +399,6 @@ error0:
399 return error; 399 return error;
400} 400}
401 401
402STATIC int
403xfs_qm_dqrepair(
404 struct xfs_mount *mp,
405 struct xfs_trans *tp,
406 struct xfs_dquot *dqp,
407 xfs_dqid_t firstid,
408 struct xfs_buf **bpp)
409{
410 int error;
411 struct xfs_disk_dquot *ddq;
412 struct xfs_dqblk *d;
413 int i;
414
415 /*
416 * Read the buffer without verification so we get the corrupted
417 * buffer returned to us. make sure we verify it on write, though.
418 */
419 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
420 mp->m_quotainfo->qi_dqchunklen,
421 0, bpp, NULL);
422
423 if (error) {
424 ASSERT(*bpp == NULL);
425 return error;
426 }
427 (*bpp)->b_ops = &xfs_dquot_buf_ops;
428
429 ASSERT(xfs_buf_islocked(*bpp));
430 d = (struct xfs_dqblk *)(*bpp)->b_addr;
431
432 /* Do the actual repair of dquots in this buffer */
433 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
434 ddq = &d[i].dd_diskdq;
435 error = xfs_dqcheck(mp, ddq, firstid + i,
436 dqp->dq_flags & XFS_DQ_ALLTYPES,
437 XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
438 if (error) {
439 /* repair failed, we're screwed */
440 xfs_trans_brelse(tp, *bpp);
441 return -EIO;
442 }
443 }
444
445 return 0;
446}
447
448/* 402/*
449 * Maps a dquot to the buffer containing its on-disk version. 403 * Maps a dquot to the buffer containing its on-disk version.
450 * This returns a ptr to the buffer containing the on-disk dquot 404 * This returns a ptr to the buffer containing the on-disk dquot
@@ -526,14 +480,6 @@ xfs_qm_dqtobp(
526 dqp->q_blkno, 480 dqp->q_blkno,
527 mp->m_quotainfo->qi_dqchunklen, 481 mp->m_quotainfo->qi_dqchunklen,
528 0, &bp, &xfs_dquot_buf_ops); 482 0, &bp, &xfs_dquot_buf_ops);
529
530 if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
531 xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
532 mp->m_quotainfo->qi_dqperchunk;
533 ASSERT(bp == NULL);
534 error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
535 }
536
537 if (error) { 483 if (error) {
538 ASSERT(bp == NULL); 484 ASSERT(bp == NULL);
539 return error; 485 return error;
@@ -1010,6 +956,7 @@ xfs_qm_dqflush(
1010 struct xfs_mount *mp = dqp->q_mount; 956 struct xfs_mount *mp = dqp->q_mount;
1011 struct xfs_buf *bp; 957 struct xfs_buf *bp;
1012 struct xfs_disk_dquot *ddqp; 958 struct xfs_disk_dquot *ddqp;
959 xfs_failaddr_t fa;
1013 int error; 960 int error;
1014 961
1015 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 962 ASSERT(XFS_DQ_IS_LOCKED(dqp));
@@ -1056,9 +1003,10 @@ xfs_qm_dqflush(
1056 /* 1003 /*
1057 * A simple sanity check in case we got a corrupted dquot.. 1004 * A simple sanity check in case we got a corrupted dquot..
1058 */ 1005 */
1059 error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1006 fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0);
1060 XFS_QMOPT_DOWARN, "dqflush (incore copy)"); 1007 if (fa) {
1061 if (error) { 1008 xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
1009 be32_to_cpu(ddqp->d_id), fa);
1062 xfs_buf_relse(bp); 1010 xfs_buf_relse(bp);
1063 xfs_dqfunlock(dqp); 1011 xfs_dqfunlock(dqp);
1064 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1012 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 664dea105e76..96eaa6933709 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -150,10 +150,7 @@ xfs_dquot_item_error(
150 struct xfs_log_item *lip, 150 struct xfs_log_item *lip,
151 struct xfs_buf *bp) 151 struct xfs_buf *bp)
152{ 152{
153 struct xfs_dquot *dqp; 153 ASSERT(!completion_done(&DQUOT_ITEM(lip)->qli_dquot->q_flush));
154
155 dqp = DQUOT_ITEM(lip)->qli_dquot;
156 ASSERT(!completion_done(&dqp->q_flush));
157 xfs_set_li_failed(lip, bp); 154 xfs_set_li_failed(lip, bp);
158} 155}
159 156
@@ -179,7 +176,7 @@ xfs_qm_dquot_logitem_push(
179 if (!xfs_buf_trylock(bp)) 176 if (!xfs_buf_trylock(bp))
180 return XFS_ITEM_LOCKED; 177 return XFS_ITEM_LOCKED;
181 178
182 if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) 179 if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
183 rval = XFS_ITEM_FLUSHING; 180 rval = XFS_ITEM_FLUSHING;
184 181
185 xfs_buf_unlock(bp); 182 xfs_buf_unlock(bp);
@@ -212,7 +209,7 @@ xfs_qm_dquot_logitem_push(
212 209
213 error = xfs_qm_dqflush(dqp, &bp); 210 error = xfs_qm_dqflush(dqp, &bp);
214 if (error) { 211 if (error) {
215 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", 212 xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT,
216 __func__, error, dqp); 213 __func__, error, dqp);
217 } else { 214 } else {
218 if (!xfs_buf_delwri_queue(bp, buffer_list)) 215 if (!xfs_buf_delwri_queue(bp, buffer_list))
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 4c9f35d983b2..ccf520f0b00d 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -24,6 +24,7 @@
24#include "xfs_errortag.h" 24#include "xfs_errortag.h"
25#include "xfs_error.h" 25#include "xfs_error.h"
26#include "xfs_sysfs.h" 26#include "xfs_sysfs.h"
27#include "xfs_inode.h"
27 28
28#ifdef DEBUG 29#ifdef DEBUG
29 30
@@ -314,12 +315,12 @@ xfs_error_report(
314 struct xfs_mount *mp, 315 struct xfs_mount *mp,
315 const char *filename, 316 const char *filename,
316 int linenum, 317 int linenum,
317 void *ra) 318 xfs_failaddr_t failaddr)
318{ 319{
319 if (level <= xfs_error_level) { 320 if (level <= xfs_error_level) {
320 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 321 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
321 "Internal error %s at line %d of file %s. Caller %pS", 322 "Internal error %s at line %d of file %s. Caller %pS",
322 tag, linenum, filename, ra); 323 tag, linenum, filename, failaddr);
323 324
324 xfs_stack_trace(); 325 xfs_stack_trace();
325 } 326 }
@@ -333,11 +334,11 @@ xfs_corruption_error(
333 void *p, 334 void *p,
334 const char *filename, 335 const char *filename,
335 int linenum, 336 int linenum,
336 void *ra) 337 xfs_failaddr_t failaddr)
337{ 338{
338 if (level <= xfs_error_level) 339 if (level <= xfs_error_level)
339 xfs_hex_dump(p, 64); 340 xfs_hex_dump(p, XFS_CORRUPTION_DUMP_LEN);
340 xfs_error_report(tag, level, mp, filename, linenum, ra); 341 xfs_error_report(tag, level, mp, filename, linenum, failaddr);
341 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); 342 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
342} 343}
343 344
@@ -347,19 +348,62 @@ xfs_corruption_error(
347 */ 348 */
348void 349void
349xfs_verifier_error( 350xfs_verifier_error(
350 struct xfs_buf *bp) 351 struct xfs_buf *bp,
352 int error,
353 xfs_failaddr_t failaddr)
351{ 354{
352 struct xfs_mount *mp = bp->b_target->bt_mount; 355 struct xfs_mount *mp = bp->b_target->bt_mount;
356 xfs_failaddr_t fa;
357
358 fa = failaddr ? failaddr : __return_address;
359 __xfs_buf_ioerror(bp, error, fa);
353 360
354 xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", 361 xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
355 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", 362 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
356 __return_address, bp->b_ops->name, bp->b_bn); 363 fa, bp->b_ops->name, bp->b_bn);
357 364
358 xfs_alert(mp, "Unmount and run xfs_repair"); 365 xfs_alert(mp, "Unmount and run xfs_repair");
359 366
360 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 367 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
361 xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); 368 xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
362 xfs_hex_dump(xfs_buf_offset(bp, 0), 64); 369 XFS_CORRUPTION_DUMP_LEN);
370 xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN);
371 }
372
373 if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
374 xfs_stack_trace();
375}
376
377/*
378 * Warnings for inode corruption problems. Don't bother with the stack
379 * trace unless the error level is turned up high.
380 */
381void
382xfs_inode_verifier_error(
383 struct xfs_inode *ip,
384 int error,
385 const char *name,
386 void *buf,
387 size_t bufsz,
388 xfs_failaddr_t failaddr)
389{
390 struct xfs_mount *mp = ip->i_mount;
391 xfs_failaddr_t fa;
392 int sz;
393
394 fa = failaddr ? failaddr : __return_address;
395
396 xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s",
397 error == -EFSBADCRC ? "CRC error" : "corruption",
398 fa, ip->i_ino, name);
399
400 xfs_alert(mp, "Unmount and run xfs_repair");
401
402 if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) {
403 sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz);
404 xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
405 sz);
406 xfs_hex_dump(buf, sz);
363 } 407 }
364 408
365 if (xfs_error_level >= XFS_ERRLEVEL_HIGH) 409 if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index ea816c1bf8db..7e728c5a46b8 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -21,11 +21,16 @@
21struct xfs_mount; 21struct xfs_mount;
22 22
23extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, 23extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
24 const char *filename, int linenum, void *ra); 24 const char *filename, int linenum,
25 xfs_failaddr_t failaddr);
25extern void xfs_corruption_error(const char *tag, int level, 26extern void xfs_corruption_error(const char *tag, int level,
26 struct xfs_mount *mp, void *p, const char *filename, 27 struct xfs_mount *mp, void *p, const char *filename,
27 int linenum, void *ra); 28 int linenum, xfs_failaddr_t failaddr);
28extern void xfs_verifier_error(struct xfs_buf *bp); 29extern void xfs_verifier_error(struct xfs_buf *bp, int error,
30 xfs_failaddr_t failaddr);
31extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
32 const char *name, void *buf, size_t bufsz,
33 xfs_failaddr_t failaddr);
29 34
30#define XFS_ERROR_REPORT(e, lvl, mp) \ 35#define XFS_ERROR_REPORT(e, lvl, mp) \
31 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) 36 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
@@ -37,6 +42,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
37#define XFS_ERRLEVEL_LOW 1 42#define XFS_ERRLEVEL_LOW 1
38#define XFS_ERRLEVEL_HIGH 5 43#define XFS_ERRLEVEL_HIGH 5
39 44
45/* Dump 128 bytes of any corrupt buffer */
46#define XFS_CORRUPTION_DUMP_LEN (128)
47
40/* 48/*
41 * Macros to set EFSCORRUPTED & return/branch. 49 * Macros to set EFSCORRUPTED & return/branch.
42 */ 50 */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8601275cc5e6..9ea08326f876 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1048,7 +1048,7 @@ __xfs_filemap_fault(
1048 if (IS_DAX(inode)) { 1048 if (IS_DAX(inode)) {
1049 pfn_t pfn; 1049 pfn_t pfn;
1050 1050
1051 ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops); 1051 ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops);
1052 if (ret & VM_FAULT_NEEDDSYNC) 1052 if (ret & VM_FAULT_NEEDDSYNC)
1053 ret = dax_finish_sync_fault(vmf, pe_size, pfn); 1053 ret = dax_finish_sync_fault(vmf, pe_size, pfn);
1054 } else { 1054 } else {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 60a2e128cb6a..8b4545623e25 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -49,83 +49,6 @@
49 * File system operations 49 * File system operations
50 */ 50 */
51 51
52int
53xfs_fs_geometry(
54 xfs_mount_t *mp,
55 xfs_fsop_geom_t *geo,
56 int new_version)
57{
58
59 memset(geo, 0, sizeof(*geo));
60
61 geo->blocksize = mp->m_sb.sb_blocksize;
62 geo->rtextsize = mp->m_sb.sb_rextsize;
63 geo->agblocks = mp->m_sb.sb_agblocks;
64 geo->agcount = mp->m_sb.sb_agcount;
65 geo->logblocks = mp->m_sb.sb_logblocks;
66 geo->sectsize = mp->m_sb.sb_sectsize;
67 geo->inodesize = mp->m_sb.sb_inodesize;
68 geo->imaxpct = mp->m_sb.sb_imax_pct;
69 geo->datablocks = mp->m_sb.sb_dblocks;
70 geo->rtblocks = mp->m_sb.sb_rblocks;
71 geo->rtextents = mp->m_sb.sb_rextents;
72 geo->logstart = mp->m_sb.sb_logstart;
73 ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid));
74 memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid));
75 if (new_version >= 2) {
76 geo->sunit = mp->m_sb.sb_unit;
77 geo->swidth = mp->m_sb.sb_width;
78 }
79 if (new_version >= 3) {
80 geo->version = XFS_FSOP_GEOM_VERSION;
81 geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
82 XFS_FSOP_GEOM_FLAGS_DIRV2 |
83 (xfs_sb_version_hasattr(&mp->m_sb) ?
84 XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
85 (xfs_sb_version_hasquota(&mp->m_sb) ?
86 XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
87 (xfs_sb_version_hasalign(&mp->m_sb) ?
88 XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
89 (xfs_sb_version_hasdalign(&mp->m_sb) ?
90 XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
91 (xfs_sb_version_hasextflgbit(&mp->m_sb) ?
92 XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
93 (xfs_sb_version_hassector(&mp->m_sb) ?
94 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
95 (xfs_sb_version_hasasciici(&mp->m_sb) ?
96 XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
97 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
98 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
99 (xfs_sb_version_hasattr2(&mp->m_sb) ?
100 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
101 (xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
102 XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
103 (xfs_sb_version_hascrc(&mp->m_sb) ?
104 XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
105 (xfs_sb_version_hasftype(&mp->m_sb) ?
106 XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
107 (xfs_sb_version_hasfinobt(&mp->m_sb) ?
108 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
109 (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
110 XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
111 (xfs_sb_version_hasrmapbt(&mp->m_sb) ?
112 XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) |
113 (xfs_sb_version_hasreflink(&mp->m_sb) ?
114 XFS_FSOP_GEOM_FLAGS_REFLINK : 0);
115 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
116 mp->m_sb.sb_logsectsize : BBSIZE;
117 geo->rtsectsize = mp->m_sb.sb_blocksize;
118 geo->dirblocksize = mp->m_dir_geo->blksize;
119 }
120 if (new_version >= 4) {
121 geo->flags |=
122 (xfs_sb_version_haslogv2(&mp->m_sb) ?
123 XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
124 geo->logsunit = mp->m_sb.sb_logsunit;
125 }
126 return 0;
127}
128
129static struct xfs_buf * 52static struct xfs_buf *
130xfs_growfs_get_hdr_buf( 53xfs_growfs_get_hdr_buf(
131 struct xfs_mount *mp, 54 struct xfs_mount *mp,
@@ -955,7 +878,7 @@ xfs_do_force_shutdown(
955 878
956 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 879 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
957 xfs_notice(mp, 880 xfs_notice(mp,
958 "%s(0x%x) called from line %d of file %s. Return address = 0x%p", 881 "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT,
959 __func__, flags, lnnum, fname, __return_address); 882 __func__, flags, lnnum, fname, __return_address);
960 } 883 }
961 /* 884 /*
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 2954c13a3acd..20484ed5e919 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -18,7 +18,6 @@
18#ifndef __XFS_FSOPS_H__ 18#ifndef __XFS_FSOPS_H__
19#define __XFS_FSOPS_H__ 19#define __XFS_FSOPS_H__
20 20
21extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
22extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in); 21extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
23extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in); 22extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
24extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); 23extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 3bcb8fd2a826..d53a316162d6 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -296,6 +296,7 @@ xfs_reinit_inode(
296 uint32_t generation = inode->i_generation; 296 uint32_t generation = inode->i_generation;
297 uint64_t version = inode_peek_iversion(inode); 297 uint64_t version = inode_peek_iversion(inode);
298 umode_t mode = inode->i_mode; 298 umode_t mode = inode->i_mode;
299 dev_t dev = inode->i_rdev;
299 300
300 error = inode_init_always(mp->m_super, inode); 301 error = inode_init_always(mp->m_super, inode);
301 302
@@ -303,6 +304,7 @@ xfs_reinit_inode(
303 inode->i_generation = generation; 304 inode->i_generation = generation;
304 inode_set_iversion_queried(inode, version); 305 inode_set_iversion_queried(inode, version);
305 inode->i_mode = mode; 306 inode->i_mode = mode;
307 inode->i_rdev = dev;
306 return error; 308 return error;
307} 309}
308 310
@@ -474,6 +476,11 @@ xfs_iget_cache_miss(
474 if (error) 476 if (error)
475 goto out_destroy; 477 goto out_destroy;
476 478
479 if (!xfs_inode_verify_forks(ip)) {
480 error = -EFSCORRUPTED;
481 goto out_destroy;
482 }
483
477 trace_xfs_iget_miss(ip); 484 trace_xfs_iget_miss(ip);
478 485
479 if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) { 486 if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
@@ -1651,28 +1658,15 @@ xfs_inode_clear_eofblocks_tag(
1651} 1658}
1652 1659
1653/* 1660/*
1654 * Automatic CoW Reservation Freeing 1661 * Set ourselves up to free CoW blocks from this file. If it's already clean
1655 * 1662 * then we can bail out quickly, but otherwise we must back off if the file
1656 * These functions automatically garbage collect leftover CoW reservations 1663 * is undergoing some kind of write.
1657 * that were made on behalf of a cowextsize hint when we start to run out
1658 * of quota or when the reservations sit around for too long. If the file
1659 * has dirty pages or is undergoing writeback, its CoW reservations will
1660 * be retained.
1661 *
1662 * The actual garbage collection piggybacks off the same code that runs
1663 * the speculative EOF preallocation garbage collector.
1664 */ 1664 */
1665STATIC int 1665static bool
1666xfs_inode_free_cowblocks( 1666xfs_prep_free_cowblocks(
1667 struct xfs_inode *ip, 1667 struct xfs_inode *ip,
1668 int flags, 1668 struct xfs_ifork *ifp)
1669 void *args)
1670{ 1669{
1671 int ret;
1672 struct xfs_eofblocks *eofb = args;
1673 int match;
1674 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
1675
1676 /* 1670 /*
1677 * Just clear the tag if we have an empty cow fork or none at all. It's 1671 * Just clear the tag if we have an empty cow fork or none at all. It's
1678 * possible the inode was fully unshared since it was originally tagged. 1672 * possible the inode was fully unshared since it was originally tagged.
@@ -1680,7 +1674,7 @@ xfs_inode_free_cowblocks(
1680 if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { 1674 if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
1681 trace_xfs_inode_free_cowblocks_invalid(ip); 1675 trace_xfs_inode_free_cowblocks_invalid(ip);
1682 xfs_inode_clear_cowblocks_tag(ip); 1676 xfs_inode_clear_cowblocks_tag(ip);
1683 return 0; 1677 return false;
1684 } 1678 }
1685 1679
1686 /* 1680 /*
@@ -1691,6 +1685,35 @@ xfs_inode_free_cowblocks(
1691 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1685 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
1692 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1686 mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
1693 atomic_read(&VFS_I(ip)->i_dio_count)) 1687 atomic_read(&VFS_I(ip)->i_dio_count))
1688 return false;
1689
1690 return true;
1691}
1692
1693/*
1694 * Automatic CoW Reservation Freeing
1695 *
1696 * These functions automatically garbage collect leftover CoW reservations
1697 * that were made on behalf of a cowextsize hint when we start to run out
1698 * of quota or when the reservations sit around for too long. If the file
1699 * has dirty pages or is undergoing writeback, its CoW reservations will
1700 * be retained.
1701 *
1702 * The actual garbage collection piggybacks off the same code that runs
1703 * the speculative EOF preallocation garbage collector.
1704 */
1705STATIC int
1706xfs_inode_free_cowblocks(
1707 struct xfs_inode *ip,
1708 int flags,
1709 void *args)
1710{
1711 struct xfs_eofblocks *eofb = args;
1712 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
1713 int match;
1714 int ret = 0;
1715
1716 if (!xfs_prep_free_cowblocks(ip, ifp))
1694 return 0; 1717 return 0;
1695 1718
1696 if (eofb) { 1719 if (eofb) {
@@ -1711,7 +1734,12 @@ xfs_inode_free_cowblocks(
1711 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1734 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1712 xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 1735 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
1713 1736
1714 ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 1737 /*
1738 * Check again, nobody else should be able to dirty blocks or change
1739 * the reflink iflag now that we have the first two locks held.
1740 */
1741 if (xfs_prep_free_cowblocks(ip, ifp))
1742 ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
1715 1743
1716 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 1744 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1717 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1745 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9f424e0aef1f..604ee384a00a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -547,23 +547,36 @@ again:
547 547
548/* 548/*
549 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - 549 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
550 * the iolock, the mmaplock or the ilock, but not more than one at a time. If we 550 * the mmaplock or the ilock, but not more than one type at a time. If we lock
551 * lock more than one at a time, lockdep will report false positives saying we 551 * more than one at a time, lockdep will report false positives saying we have
552 * have violated locking orders. 552 * violated locking orders. The iolock must be double-locked separately since
553 * we use i_rwsem for that. We now support taking one lock EXCL and the other
554 * SHARED.
553 */ 555 */
554void 556void
555xfs_lock_two_inodes( 557xfs_lock_two_inodes(
556 xfs_inode_t *ip0, 558 struct xfs_inode *ip0,
557 xfs_inode_t *ip1, 559 uint ip0_mode,
558 uint lock_mode) 560 struct xfs_inode *ip1,
561 uint ip1_mode)
559{ 562{
560 xfs_inode_t *temp; 563 struct xfs_inode *temp;
564 uint mode_temp;
561 int attempts = 0; 565 int attempts = 0;
562 xfs_log_item_t *lp; 566 xfs_log_item_t *lp;
563 567
564 ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); 568 ASSERT(hweight32(ip0_mode) == 1);
565 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) 569 ASSERT(hweight32(ip1_mode) == 1);
566 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 570 ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
571 ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
572 ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
573 !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
574 ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
575 !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
576 ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
577 !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
578 ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
579 !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
567 580
568 ASSERT(ip0->i_ino != ip1->i_ino); 581 ASSERT(ip0->i_ino != ip1->i_ino);
569 582
@@ -571,10 +584,13 @@ xfs_lock_two_inodes(
571 temp = ip0; 584 temp = ip0;
572 ip0 = ip1; 585 ip0 = ip1;
573 ip1 = temp; 586 ip1 = temp;
587 mode_temp = ip0_mode;
588 ip0_mode = ip1_mode;
589 ip1_mode = mode_temp;
574 } 590 }
575 591
576 again: 592 again:
577 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 593 xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
578 594
579 /* 595 /*
580 * If the first lock we have locked is in the AIL, we must TRY to get 596 * If the first lock we have locked is in the AIL, we must TRY to get
@@ -583,18 +599,17 @@ xfs_lock_two_inodes(
583 */ 599 */
584 lp = (xfs_log_item_t *)ip0->i_itemp; 600 lp = (xfs_log_item_t *)ip0->i_itemp;
585 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 601 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
586 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 602 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
587 xfs_iunlock(ip0, lock_mode); 603 xfs_iunlock(ip0, ip0_mode);
588 if ((++attempts % 5) == 0) 604 if ((++attempts % 5) == 0)
589 delay(1); /* Don't just spin the CPU */ 605 delay(1); /* Don't just spin the CPU */
590 goto again; 606 goto again;
591 } 607 }
592 } else { 608 } else {
593 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 609 xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
594 } 610 }
595} 611}
596 612
597
598void 613void
599__xfs_iflock( 614__xfs_iflock(
600 struct xfs_inode *ip) 615 struct xfs_inode *ip)
@@ -1422,7 +1437,7 @@ xfs_link(
1422 if (error) 1437 if (error)
1423 goto std_return; 1438 goto std_return;
1424 1439
1425 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1440 xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
1426 1441
1427 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1442 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
1428 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1443 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
@@ -2215,7 +2230,7 @@ xfs_ifree_cluster(
2215 xfs_buf_t *bp; 2230 xfs_buf_t *bp;
2216 xfs_inode_t *ip; 2231 xfs_inode_t *ip;
2217 xfs_inode_log_item_t *iip; 2232 xfs_inode_log_item_t *iip;
2218 xfs_log_item_t *lip; 2233 struct xfs_log_item *lip;
2219 struct xfs_perag *pag; 2234 struct xfs_perag *pag;
2220 xfs_ino_t inum; 2235 xfs_ino_t inum;
2221 2236
@@ -2273,8 +2288,7 @@ xfs_ifree_cluster(
2273 * stale first, we will not attempt to lock them in the loop 2288 * stale first, we will not attempt to lock them in the loop
2274 * below as the XFS_ISTALE flag will be set. 2289 * below as the XFS_ISTALE flag will be set.
2275 */ 2290 */
2276 lip = bp->b_fspriv; 2291 list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
2277 while (lip) {
2278 if (lip->li_type == XFS_LI_INODE) { 2292 if (lip->li_type == XFS_LI_INODE) {
2279 iip = (xfs_inode_log_item_t *)lip; 2293 iip = (xfs_inode_log_item_t *)lip;
2280 ASSERT(iip->ili_logged == 1); 2294 ASSERT(iip->ili_logged == 1);
@@ -2284,7 +2298,6 @@ xfs_ifree_cluster(
2284 &iip->ili_item.li_lsn); 2298 &iip->ili_item.li_lsn);
2285 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2299 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2286 } 2300 }
2287 lip = lip->li_bio_list;
2288 } 2301 }
2289 2302
2290 2303
@@ -2452,6 +2465,7 @@ xfs_ifree(
2452 2465
2453 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 2466 VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
2454 ip->i_d.di_flags = 0; 2467 ip->i_d.di_flags = 0;
2468 ip->i_d.di_flags2 = 0;
2455 ip->i_d.di_dmevmask = 0; 2469 ip->i_d.di_dmevmask = 0;
2456 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2470 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
2457 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 2471 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
@@ -2587,7 +2601,7 @@ xfs_remove(
2587 goto std_return; 2601 goto std_return;
2588 } 2602 }
2589 2603
2590 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2604 xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
2591 2605
2592 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2606 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2593 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2607 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -3480,6 +3494,36 @@ abort_out:
3480 return error; 3494 return error;
3481} 3495}
3482 3496
3497/*
3498 * If there are inline format data / attr forks attached to this inode,
3499 * make sure they're not corrupt.
3500 */
3501bool
3502xfs_inode_verify_forks(
3503 struct xfs_inode *ip)
3504{
3505 struct xfs_ifork *ifp;
3506 xfs_failaddr_t fa;
3507
3508 fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
3509 if (fa) {
3510 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
3511 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
3512 ifp->if_u1.if_data, ifp->if_bytes, fa);
3513 return false;
3514 }
3515
3516 fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
3517 if (fa) {
3518 ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
3519 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
3520 ifp ? ifp->if_u1.if_data : NULL,
3521 ifp ? ifp->if_bytes : 0, fa);
3522 return false;
3523 }
3524 return true;
3525}
3526
3483STATIC int 3527STATIC int
3484xfs_iflush_int( 3528xfs_iflush_int(
3485 struct xfs_inode *ip, 3529 struct xfs_inode *ip,
@@ -3502,7 +3546,7 @@ xfs_iflush_int(
3502 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 3546 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
3503 mp, XFS_ERRTAG_IFLUSH_1)) { 3547 mp, XFS_ERRTAG_IFLUSH_1)) {
3504 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3548 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
3505 "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3549 "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
3506 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 3550 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
3507 goto corrupt_out; 3551 goto corrupt_out;
3508 } 3552 }
@@ -3512,7 +3556,7 @@ xfs_iflush_int(
3512 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 3556 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
3513 mp, XFS_ERRTAG_IFLUSH_3)) { 3557 mp, XFS_ERRTAG_IFLUSH_3)) {
3514 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3558 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
3515 "%s: Bad regular inode %Lu, ptr 0x%p", 3559 "%s: Bad regular inode %Lu, ptr "PTR_FMT,
3516 __func__, ip->i_ino, ip); 3560 __func__, ip->i_ino, ip);
3517 goto corrupt_out; 3561 goto corrupt_out;
3518 } 3562 }
@@ -3523,7 +3567,7 @@ xfs_iflush_int(
3523 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 3567 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
3524 mp, XFS_ERRTAG_IFLUSH_4)) { 3568 mp, XFS_ERRTAG_IFLUSH_4)) {
3525 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3569 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
3526 "%s: Bad directory inode %Lu, ptr 0x%p", 3570 "%s: Bad directory inode %Lu, ptr "PTR_FMT,
3527 __func__, ip->i_ino, ip); 3571 __func__, ip->i_ino, ip);
3528 goto corrupt_out; 3572 goto corrupt_out;
3529 } 3573 }
@@ -3532,7 +3576,7 @@ xfs_iflush_int(
3532 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { 3576 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
3533 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3577 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
3534 "%s: detected corrupt incore inode %Lu, " 3578 "%s: detected corrupt incore inode %Lu, "
3535 "total extents = %d, nblocks = %Ld, ptr 0x%p", 3579 "total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
3536 __func__, ip->i_ino, 3580 __func__, ip->i_ino,
3537 ip->i_d.di_nextents + ip->i_d.di_anextents, 3581 ip->i_d.di_nextents + ip->i_d.di_anextents,
3538 ip->i_d.di_nblocks, ip); 3582 ip->i_d.di_nblocks, ip);
@@ -3541,7 +3585,7 @@ xfs_iflush_int(
3541 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 3585 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
3542 mp, XFS_ERRTAG_IFLUSH_6)) { 3586 mp, XFS_ERRTAG_IFLUSH_6)) {
3543 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 3587 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
3544 "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 3588 "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
3545 __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 3589 __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
3546 goto corrupt_out; 3590 goto corrupt_out;
3547 } 3591 }
@@ -3558,10 +3602,8 @@ xfs_iflush_int(
3558 if (ip->i_d.di_version < 3) 3602 if (ip->i_d.di_version < 3)
3559 ip->i_d.di_flushiter++; 3603 ip->i_d.di_flushiter++;
3560 3604
3561 /* Check the inline directory data. */ 3605 /* Check the inline fork data before we write out. */
3562 if (S_ISDIR(VFS_I(ip)->i_mode) && 3606 if (!xfs_inode_verify_forks(ip))
3563 ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
3564 xfs_dir2_sf_verify(ip))
3565 goto corrupt_out; 3607 goto corrupt_out;
3566 3608
3567 /* 3609 /*
@@ -3624,7 +3666,7 @@ xfs_iflush_int(
3624 /* generate the checksum. */ 3666 /* generate the checksum. */
3625 xfs_dinode_calc_crc(mp, dip); 3667 xfs_dinode_calc_crc(mp, dip);
3626 3668
3627 ASSERT(bp->b_fspriv != NULL); 3669 ASSERT(!list_empty(&bp->b_li_list));
3628 ASSERT(bp->b_iodone != NULL); 3670 ASSERT(bp->b_iodone != NULL);
3629 return 0; 3671 return 0;
3630 3672
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d383e392ec9d..3e8dc990d41c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -423,7 +423,8 @@ void xfs_iunpin_wait(xfs_inode_t *);
423#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 423#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
424 424
425int xfs_iflush(struct xfs_inode *, struct xfs_buf **); 425int xfs_iflush(struct xfs_inode *, struct xfs_buf **);
426void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 426void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
427 struct xfs_inode *ip1, uint ip1_mode);
427 428
428xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); 429xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
429xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); 430xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
@@ -491,4 +492,6 @@ extern struct kmem_zone *xfs_inode_zone;
491/* The default CoW extent size hint. */ 492/* The default CoW extent size hint. */
492#define XFS_DEFAULT_COWEXTSZ_HINT 32 493#define XFS_DEFAULT_COWEXTSZ_HINT 32
493 494
495bool xfs_inode_verify_forks(struct xfs_inode *ip);
496
494#endif /* __XFS_INODE_H__ */ 497#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7571abf5dfb3..d5037f060d6f 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -522,7 +522,7 @@ xfs_inode_item_push(
522 if (!xfs_buf_trylock(bp)) 522 if (!xfs_buf_trylock(bp))
523 return XFS_ITEM_LOCKED; 523 return XFS_ITEM_LOCKED;
524 524
525 if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) 525 if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
526 rval = XFS_ITEM_FLUSHING; 526 rval = XFS_ITEM_FLUSHING;
527 527
528 xfs_buf_unlock(bp); 528 xfs_buf_unlock(bp);
@@ -713,37 +713,23 @@ xfs_iflush_done(
713 struct xfs_log_item *lip) 713 struct xfs_log_item *lip)
714{ 714{
715 struct xfs_inode_log_item *iip; 715 struct xfs_inode_log_item *iip;
716 struct xfs_log_item *blip; 716 struct xfs_log_item *blip, *n;
717 struct xfs_log_item *next;
718 struct xfs_log_item *prev;
719 struct xfs_ail *ailp = lip->li_ailp; 717 struct xfs_ail *ailp = lip->li_ailp;
720 int need_ail = 0; 718 int need_ail = 0;
719 LIST_HEAD(tmp);
721 720
722 /* 721 /*
723 * Scan the buffer IO completions for other inodes being completed and 722 * Scan the buffer IO completions for other inodes being completed and
724 * attach them to the current inode log item. 723 * attach them to the current inode log item.
725 */ 724 */
726 blip = bp->b_fspriv;
727 prev = NULL;
728 while (blip != NULL) {
729 if (blip->li_cb != xfs_iflush_done) {
730 prev = blip;
731 blip = blip->li_bio_list;
732 continue;
733 }
734 725
735 /* remove from list */ 726 list_add_tail(&lip->li_bio_list, &tmp);
736 next = blip->li_bio_list;
737 if (!prev) {
738 bp->b_fspriv = next;
739 } else {
740 prev->li_bio_list = next;
741 }
742 727
743 /* add to current list */ 728 list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
744 blip->li_bio_list = lip->li_bio_list; 729 if (lip->li_cb != xfs_iflush_done)
745 lip->li_bio_list = blip; 730 continue;
746 731
732 list_move_tail(&blip->li_bio_list, &tmp);
747 /* 733 /*
748 * while we have the item, do the unlocked check for needing 734 * while we have the item, do the unlocked check for needing
749 * the AIL lock. 735 * the AIL lock.
@@ -752,8 +738,6 @@ xfs_iflush_done(
752 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || 738 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
753 (blip->li_flags & XFS_LI_FAILED)) 739 (blip->li_flags & XFS_LI_FAILED))
754 need_ail++; 740 need_ail++;
755
756 blip = next;
757 } 741 }
758 742
759 /* make sure we capture the state of the initial inode. */ 743 /* make sure we capture the state of the initial inode. */
@@ -776,7 +760,7 @@ xfs_iflush_done(
776 760
777 /* this is an opencoded batch version of xfs_trans_ail_delete */ 761 /* this is an opencoded batch version of xfs_trans_ail_delete */
778 spin_lock(&ailp->xa_lock); 762 spin_lock(&ailp->xa_lock);
779 for (blip = lip; blip; blip = blip->li_bio_list) { 763 list_for_each_entry(blip, &tmp, li_bio_list) {
780 if (INODE_ITEM(blip)->ili_logged && 764 if (INODE_ITEM(blip)->ili_logged &&
781 blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) 765 blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
782 mlip_changed |= xfs_ail_delete_one(ailp, blip); 766 mlip_changed |= xfs_ail_delete_one(ailp, blip);
@@ -802,15 +786,14 @@ xfs_iflush_done(
802 * ili_last_fields bits now that we know that the data corresponding to 786 * ili_last_fields bits now that we know that the data corresponding to
803 * them is safely on disk. 787 * them is safely on disk.
804 */ 788 */
805 for (blip = lip; blip; blip = next) { 789 list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
806 next = blip->li_bio_list; 790 list_del_init(&blip->li_bio_list);
807 blip->li_bio_list = NULL;
808
809 iip = INODE_ITEM(blip); 791 iip = INODE_ITEM(blip);
810 iip->ili_logged = 0; 792 iip->ili_logged = 0;
811 iip->ili_last_fields = 0; 793 iip->ili_last_fields = 0;
812 xfs_ifunlock(iip->ili_inode); 794 xfs_ifunlock(iip->ili_inode);
813 } 795 }
796 list_del(&tmp);
814} 797}
815 798
816/* 799/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 20dc65fef6a4..89fb1eb80aae 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -45,6 +45,7 @@
45#include <linux/fsmap.h> 45#include <linux/fsmap.h>
46#include "xfs_fsmap.h" 46#include "xfs_fsmap.h"
47#include "scrub/xfs_scrub.h" 47#include "scrub/xfs_scrub.h"
48#include "xfs_sb.h"
48 49
49#include <linux/capability.h> 50#include <linux/capability.h>
50#include <linux/cred.h> 51#include <linux/cred.h>
@@ -809,7 +810,7 @@ xfs_ioc_fsgeometry_v1(
809 xfs_fsop_geom_t fsgeo; 810 xfs_fsop_geom_t fsgeo;
810 int error; 811 int error;
811 812
812 error = xfs_fs_geometry(mp, &fsgeo, 3); 813 error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3);
813 if (error) 814 if (error)
814 return error; 815 return error;
815 816
@@ -831,7 +832,7 @@ xfs_ioc_fsgeometry(
831 xfs_fsop_geom_t fsgeo; 832 xfs_fsop_geom_t fsgeo;
832 int error; 833 int error;
833 834
834 error = xfs_fs_geometry(mp, &fsgeo, 4); 835 error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4);
835 if (error) 836 if (error)
836 return error; 837 return error;
837 838
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 35c79e246fde..10fbde359649 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -37,6 +37,7 @@
37#include "xfs_ioctl.h" 37#include "xfs_ioctl.h"
38#include "xfs_ioctl32.h" 38#include "xfs_ioctl32.h"
39#include "xfs_trace.h" 39#include "xfs_trace.h"
40#include "xfs_sb.h"
40 41
41#define _NATIVE_IOC(cmd, type) \ 42#define _NATIVE_IOC(cmd, type) \
42 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) 43 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
@@ -66,7 +67,7 @@ xfs_compat_ioc_fsgeometry_v1(
66 xfs_fsop_geom_t fsgeo; 67 xfs_fsop_geom_t fsgeo;
67 int error; 68 int error;
68 69
69 error = xfs_fs_geometry(mp, &fsgeo, 3); 70 error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3);
70 if (error) 71 if (error)
71 return error; 72 return error;
72 /* The 32-bit variant simply has some padding at the end */ 73 /* The 32-bit variant simply has some padding at the end */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 99562ec0de56..bee51a14a906 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -285,8 +285,22 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
285#define XFS_IS_REALTIME_INODE(ip) \ 285#define XFS_IS_REALTIME_INODE(ip) \
286 (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \ 286 (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \
287 (ip)->i_mount->m_rtdev_targp) 287 (ip)->i_mount->m_rtdev_targp)
288#define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0)
288#else 289#else
289#define XFS_IS_REALTIME_INODE(ip) (0) 290#define XFS_IS_REALTIME_INODE(ip) (0)
291#define XFS_IS_REALTIME_MOUNT(mp) (0)
292#endif
293
294/*
295 * Starting in Linux 4.15, the %p (raw pointer value) printk modifier
296 * prints a hashed version of the pointer to avoid leaking kernel
297 * pointers into dmesg. If we're trying to debug the kernel we want the
298 * raw values, so override this behavior as best we can.
299 */
300#ifdef DEBUG
301# define PTR_FMT "%px"
302#else
303# define PTR_FMT "%p"
290#endif 304#endif
291 305
292#endif /* __XFS_LINUX__ */ 306#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a503af96d780..3e5ba1ecc080 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1047,6 +1047,7 @@ xfs_log_item_init(
1047 1047
1048 INIT_LIST_HEAD(&item->li_ail); 1048 INIT_LIST_HEAD(&item->li_ail);
1049 INIT_LIST_HEAD(&item->li_cil); 1049 INIT_LIST_HEAD(&item->li_cil);
1050 INIT_LIST_HEAD(&item->li_bio_list);
1050} 1051}
1051 1052
1052/* 1053/*
@@ -1242,7 +1243,7 @@ xlog_space_left(
1242static void 1243static void
1243xlog_iodone(xfs_buf_t *bp) 1244xlog_iodone(xfs_buf_t *bp)
1244{ 1245{
1245 struct xlog_in_core *iclog = bp->b_fspriv; 1246 struct xlog_in_core *iclog = bp->b_log_item;
1246 struct xlog *l = iclog->ic_log; 1247 struct xlog *l = iclog->ic_log;
1247 int aborted = 0; 1248 int aborted = 0;
1248 1249
@@ -1773,7 +1774,7 @@ STATIC int
1773xlog_bdstrat( 1774xlog_bdstrat(
1774 struct xfs_buf *bp) 1775 struct xfs_buf *bp)
1775{ 1776{
1776 struct xlog_in_core *iclog = bp->b_fspriv; 1777 struct xlog_in_core *iclog = bp->b_log_item;
1777 1778
1778 xfs_buf_lock(bp); 1779 xfs_buf_lock(bp);
1779 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1780 if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -1919,7 +1920,7 @@ xlog_sync(
1919 } 1920 }
1920 1921
1921 bp->b_io_length = BTOBB(count); 1922 bp->b_io_length = BTOBB(count);
1922 bp->b_fspriv = iclog; 1923 bp->b_log_item = iclog;
1923 bp->b_flags &= ~XBF_FLUSH; 1924 bp->b_flags &= ~XBF_FLUSH;
1924 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1925 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
1925 1926
@@ -1958,7 +1959,7 @@ xlog_sync(
1958 XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ 1959 XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
1959 xfs_buf_associate_memory(bp, 1960 xfs_buf_associate_memory(bp,
1960 (char *)&iclog->ic_header + count, split); 1961 (char *)&iclog->ic_header + count, split);
1961 bp->b_fspriv = iclog; 1962 bp->b_log_item = iclog;
1962 bp->b_flags &= ~XBF_FLUSH; 1963 bp->b_flags &= ~XBF_FLUSH;
1963 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); 1964 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
1964 1965
@@ -2117,7 +2118,9 @@ xlog_print_trans(
2117 2118
2118 /* dump core transaction and ticket info */ 2119 /* dump core transaction and ticket info */
2119 xfs_warn(mp, "transaction summary:"); 2120 xfs_warn(mp, "transaction summary:");
2120 xfs_warn(mp, " flags = 0x%x", tp->t_flags); 2121 xfs_warn(mp, " log res = %d", tp->t_log_res);
2122 xfs_warn(mp, " log count = %d", tp->t_log_count);
2123 xfs_warn(mp, " flags = 0x%x", tp->t_flags);
2121 2124
2122 xlog_print_tic_res(mp, tp->t_ticket); 2125 xlog_print_tic_res(mp, tp->t_ticket);
2123 2126
@@ -2242,7 +2245,7 @@ xlog_write_setup_ophdr(
2242 break; 2245 break;
2243 default: 2246 default:
2244 xfs_warn(log->l_mp, 2247 xfs_warn(log->l_mp,
2245 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 2248 "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT,
2246 ophdr->oh_clientid, ticket); 2249 ophdr->oh_clientid, ticket);
2247 return NULL; 2250 return NULL;
2248 } 2251 }
@@ -3924,7 +3927,7 @@ xlog_verify_iclog(
3924 } 3927 }
3925 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) 3928 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
3926 xfs_warn(log->l_mp, 3929 xfs_warn(log->l_mp,
3927 "%s: invalid clientid %d op 0x%p offset 0x%lx", 3930 "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx",
3928 __func__, clientid, ophead, 3931 __func__, clientid, ophead,
3929 (unsigned long)field_offset); 3932 (unsigned long)field_offset);
3930 3933
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 28d1abfe835e..00240c9ee72e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -400,9 +400,9 @@ xlog_recover_iodone(
400 * On v5 supers, a bli could be attached to update the metadata LSN. 400 * On v5 supers, a bli could be attached to update the metadata LSN.
401 * Clean it up. 401 * Clean it up.
402 */ 402 */
403 if (bp->b_fspriv) 403 if (bp->b_log_item)
404 xfs_buf_item_relse(bp); 404 xfs_buf_item_relse(bp);
405 ASSERT(bp->b_fspriv == NULL); 405 ASSERT(bp->b_log_item == NULL);
406 406
407 bp->b_iodone = NULL; 407 bp->b_iodone = NULL;
408 xfs_buf_ioend(bp); 408 xfs_buf_ioend(bp);
@@ -2218,7 +2218,7 @@ xlog_recover_do_inode_buffer(
2218 next_unlinked_offset - reg_buf_offset; 2218 next_unlinked_offset - reg_buf_offset;
2219 if (unlikely(*logged_nextp == 0)) { 2219 if (unlikely(*logged_nextp == 0)) {
2220 xfs_alert(mp, 2220 xfs_alert(mp,
2221 "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " 2221 "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
2222 "Trying to replay bad (0) inode di_next_unlinked field.", 2222 "Trying to replay bad (0) inode di_next_unlinked field.",
2223 item, bp); 2223 item, bp);
2224 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 2224 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
@@ -2630,7 +2630,7 @@ xlog_recover_validate_buf_type(
2630 ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); 2630 ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
2631 bp->b_iodone = xlog_recover_iodone; 2631 bp->b_iodone = xlog_recover_iodone;
2632 xfs_buf_item_init(bp, mp); 2632 xfs_buf_item_init(bp, mp);
2633 bip = bp->b_fspriv; 2633 bip = bp->b_log_item;
2634 bip->bli_item.li_lsn = current_lsn; 2634 bip->bli_item.li_lsn = current_lsn;
2635 } 2635 }
2636} 2636}
@@ -2652,7 +2652,7 @@ xlog_recover_do_reg_buffer(
2652 int i; 2652 int i;
2653 int bit; 2653 int bit;
2654 int nbits; 2654 int nbits;
2655 int error; 2655 xfs_failaddr_t fa;
2656 2656
2657 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 2657 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
2658 2658
@@ -2687,7 +2687,7 @@ xlog_recover_do_reg_buffer(
2687 * the first dquot in the buffer should do. XXXThis is 2687 * the first dquot in the buffer should do. XXXThis is
2688 * probably a good thing to do for other buf types also. 2688 * probably a good thing to do for other buf types also.
2689 */ 2689 */
2690 error = 0; 2690 fa = NULL;
2691 if (buf_f->blf_flags & 2691 if (buf_f->blf_flags &
2692 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2692 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
2693 if (item->ri_buf[i].i_addr == NULL) { 2693 if (item->ri_buf[i].i_addr == NULL) {
@@ -2701,11 +2701,14 @@ xlog_recover_do_reg_buffer(
2701 item->ri_buf[i].i_len, __func__); 2701 item->ri_buf[i].i_len, __func__);
2702 goto next; 2702 goto next;
2703 } 2703 }
2704 error = xfs_dqcheck(mp, item->ri_buf[i].i_addr, 2704 fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
2705 -1, 0, XFS_QMOPT_DOWARN, 2705 -1, 0, 0);
2706 "dquot_buf_recover"); 2706 if (fa) {
2707 if (error) 2707 xfs_alert(mp,
2708 "dquot corrupt at %pS trying to replay into block 0x%llx",
2709 fa, bp->b_bn);
2708 goto next; 2710 goto next;
2711 }
2709 } 2712 }
2710 2713
2711 memcpy(xfs_buf_offset(bp, 2714 memcpy(xfs_buf_offset(bp,
@@ -2957,6 +2960,10 @@ xfs_recover_inode_owner_change(
2957 if (error) 2960 if (error)
2958 goto out_free_ip; 2961 goto out_free_ip;
2959 2962
2963 if (!xfs_inode_verify_forks(ip)) {
2964 error = -EFSCORRUPTED;
2965 goto out_free_ip;
2966 }
2960 2967
2961 if (in_f->ilf_fields & XFS_ILOG_DOWNER) { 2968 if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
2962 ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); 2969 ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
@@ -3042,7 +3049,7 @@ xlog_recover_inode_pass2(
3042 */ 3049 */
3043 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { 3050 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
3044 xfs_alert(mp, 3051 xfs_alert(mp,
3045 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", 3052 "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
3046 __func__, dip, bp, in_f->ilf_ino); 3053 __func__, dip, bp, in_f->ilf_ino);
3047 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 3054 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
3048 XFS_ERRLEVEL_LOW, mp); 3055 XFS_ERRLEVEL_LOW, mp);
@@ -3052,7 +3059,7 @@ xlog_recover_inode_pass2(
3052 ldip = item->ri_buf[1].i_addr; 3059 ldip = item->ri_buf[1].i_addr;
3053 if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { 3060 if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
3054 xfs_alert(mp, 3061 xfs_alert(mp,
3055 "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", 3062 "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
3056 __func__, item, in_f->ilf_ino); 3063 __func__, item, in_f->ilf_ino);
3057 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 3064 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
3058 XFS_ERRLEVEL_LOW, mp); 3065 XFS_ERRLEVEL_LOW, mp);
@@ -3110,8 +3117,8 @@ xlog_recover_inode_pass2(
3110 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 3117 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
3111 XFS_ERRLEVEL_LOW, mp, ldip); 3118 XFS_ERRLEVEL_LOW, mp, ldip);
3112 xfs_alert(mp, 3119 xfs_alert(mp,
3113 "%s: Bad regular inode log record, rec ptr 0x%p, " 3120 "%s: Bad regular inode log record, rec ptr "PTR_FMT", "
3114 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 3121 "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
3115 __func__, item, dip, bp, in_f->ilf_ino); 3122 __func__, item, dip, bp, in_f->ilf_ino);
3116 error = -EFSCORRUPTED; 3123 error = -EFSCORRUPTED;
3117 goto out_release; 3124 goto out_release;
@@ -3123,8 +3130,8 @@ xlog_recover_inode_pass2(
3123 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 3130 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
3124 XFS_ERRLEVEL_LOW, mp, ldip); 3131 XFS_ERRLEVEL_LOW, mp, ldip);
3125 xfs_alert(mp, 3132 xfs_alert(mp,
3126 "%s: Bad dir inode log record, rec ptr 0x%p, " 3133 "%s: Bad dir inode log record, rec ptr "PTR_FMT", "
3127 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 3134 "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
3128 __func__, item, dip, bp, in_f->ilf_ino); 3135 __func__, item, dip, bp, in_f->ilf_ino);
3129 error = -EFSCORRUPTED; 3136 error = -EFSCORRUPTED;
3130 goto out_release; 3137 goto out_release;
@@ -3134,8 +3141,8 @@ xlog_recover_inode_pass2(
3134 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 3141 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
3135 XFS_ERRLEVEL_LOW, mp, ldip); 3142 XFS_ERRLEVEL_LOW, mp, ldip);
3136 xfs_alert(mp, 3143 xfs_alert(mp,
3137 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 3144 "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
3138 "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 3145 "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
3139 __func__, item, dip, bp, in_f->ilf_ino, 3146 __func__, item, dip, bp, in_f->ilf_ino,
3140 ldip->di_nextents + ldip->di_anextents, 3147 ldip->di_nextents + ldip->di_anextents,
3141 ldip->di_nblocks); 3148 ldip->di_nblocks);
@@ -3146,8 +3153,8 @@ xlog_recover_inode_pass2(
3146 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 3153 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
3147 XFS_ERRLEVEL_LOW, mp, ldip); 3154 XFS_ERRLEVEL_LOW, mp, ldip);
3148 xfs_alert(mp, 3155 xfs_alert(mp,
3149 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 3156 "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
3150 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, 3157 "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
3151 item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); 3158 item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
3152 error = -EFSCORRUPTED; 3159 error = -EFSCORRUPTED;
3153 goto out_release; 3160 goto out_release;
@@ -3157,7 +3164,7 @@ xlog_recover_inode_pass2(
3157 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 3164 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
3158 XFS_ERRLEVEL_LOW, mp, ldip); 3165 XFS_ERRLEVEL_LOW, mp, ldip);
3159 xfs_alert(mp, 3166 xfs_alert(mp,
3160 "%s: Bad inode log record length %d, rec ptr 0x%p", 3167 "%s: Bad inode log record length %d, rec ptr "PTR_FMT,
3161 __func__, item->ri_buf[1].i_len, item); 3168 __func__, item->ri_buf[1].i_len, item);
3162 error = -EFSCORRUPTED; 3169 error = -EFSCORRUPTED;
3163 goto out_release; 3170 goto out_release;
@@ -3303,6 +3310,7 @@ xlog_recover_dquot_pass2(
3303 xfs_mount_t *mp = log->l_mp; 3310 xfs_mount_t *mp = log->l_mp;
3304 xfs_buf_t *bp; 3311 xfs_buf_t *bp;
3305 struct xfs_disk_dquot *ddq, *recddq; 3312 struct xfs_disk_dquot *ddq, *recddq;
3313 xfs_failaddr_t fa;
3306 int error; 3314 int error;
3307 xfs_dq_logformat_t *dq_f; 3315 xfs_dq_logformat_t *dq_f;
3308 uint type; 3316 uint type;
@@ -3345,10 +3353,12 @@ xlog_recover_dquot_pass2(
3345 */ 3353 */
3346 dq_f = item->ri_buf[0].i_addr; 3354 dq_f = item->ri_buf[0].i_addr;
3347 ASSERT(dq_f); 3355 ASSERT(dq_f);
3348 error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 3356 fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0);
3349 "xlog_recover_dquot_pass2 (log copy)"); 3357 if (fa) {
3350 if (error) 3358 xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
3359 dq_f->qlf_id, fa);
3351 return -EIO; 3360 return -EIO;
3361 }
3352 ASSERT(dq_f->qlf_len == 1); 3362 ASSERT(dq_f->qlf_len == 1);
3353 3363
3354 /* 3364 /*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c879b517cc94..98fd41cbb9e1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -162,6 +162,7 @@ xfs_free_perag(
162 ASSERT(pag); 162 ASSERT(pag);
163 ASSERT(atomic_read(&pag->pag_ref) == 0); 163 ASSERT(atomic_read(&pag->pag_ref) == 0);
164 xfs_buf_hash_destroy(pag); 164 xfs_buf_hash_destroy(pag);
165 mutex_destroy(&pag->pag_ici_reclaim_lock);
165 call_rcu(&pag->rcu_head, __xfs_free_perag); 166 call_rcu(&pag->rcu_head, __xfs_free_perag);
166 } 167 }
167} 168}
@@ -248,6 +249,7 @@ xfs_initialize_perag(
248out_hash_destroy: 249out_hash_destroy:
249 xfs_buf_hash_destroy(pag); 250 xfs_buf_hash_destroy(pag);
250out_free_pag: 251out_free_pag:
252 mutex_destroy(&pag->pag_ici_reclaim_lock);
251 kmem_free(pag); 253 kmem_free(pag);
252out_unwind_new_pags: 254out_unwind_new_pags:
253 /* unwind any prior newly initialized pags */ 255 /* unwind any prior newly initialized pags */
@@ -256,6 +258,7 @@ out_unwind_new_pags:
256 if (!pag) 258 if (!pag)
257 break; 259 break;
258 xfs_buf_hash_destroy(pag); 260 xfs_buf_hash_destroy(pag);
261 mutex_destroy(&pag->pag_ici_reclaim_lock);
259 kmem_free(pag); 262 kmem_free(pag);
260 } 263 }
261 return error; 264 return error;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b897b11afb2c..5b848f4b637f 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -162,7 +162,7 @@ xfs_qm_dqpurge(
162 */ 162 */
163 error = xfs_qm_dqflush(dqp, &bp); 163 error = xfs_qm_dqflush(dqp, &bp);
164 if (error) { 164 if (error) {
165 xfs_warn(mp, "%s: dquot %p flush failed", 165 xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed",
166 __func__, dqp); 166 __func__, dqp);
167 } else { 167 } else {
168 error = xfs_bwrite(bp); 168 error = xfs_bwrite(bp);
@@ -291,8 +291,7 @@ xfs_qm_dqattach_one(
291 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got 291 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
292 * turned off suddenly. 292 * turned off suddenly.
293 */ 293 */
294 error = xfs_qm_dqget(ip->i_mount, ip, id, type, 294 error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp);
295 doalloc | XFS_QMOPT_DOWARN, &dqp);
296 if (error) 295 if (error)
297 return error; 296 return error;
298 297
@@ -481,7 +480,7 @@ xfs_qm_dquot_isolate(
481 480
482 error = xfs_qm_dqflush(dqp, &bp); 481 error = xfs_qm_dqflush(dqp, &bp);
483 if (error) { 482 if (error) {
484 xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", 483 xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed",
485 __func__, dqp); 484 __func__, dqp);
486 goto out_unlock_dirty; 485 goto out_unlock_dirty;
487 } 486 }
@@ -574,7 +573,7 @@ xfs_qm_set_defquota(
574 struct xfs_def_quota *defq; 573 struct xfs_def_quota *defq;
575 int error; 574 int error;
576 575
577 error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp); 576 error = xfs_qm_dqread(mp, 0, type, 0, &dqp);
578 577
579 if (!error) { 578 if (!error) {
580 xfs_disk_dquot_t *ddqp = &dqp->q_core; 579 xfs_disk_dquot_t *ddqp = &dqp->q_core;
@@ -652,7 +651,7 @@ xfs_qm_init_quotainfo(
652 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 651 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
653 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 652 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
654 XFS_DQ_PROJ), 653 XFS_DQ_PROJ),
655 XFS_QMOPT_DOWARN, &dqp); 654 0, &dqp);
656 655
657 if (!error) { 656 if (!error) {
658 xfs_disk_dquot_t *ddqp = &dqp->q_core; 657 xfs_disk_dquot_t *ddqp = &dqp->q_core;
@@ -843,6 +842,7 @@ xfs_qm_reset_dqcounts(
843{ 842{
844 struct xfs_dqblk *dqb; 843 struct xfs_dqblk *dqb;
845 int j; 844 int j;
845 xfs_failaddr_t fa;
846 846
847 trace_xfs_reset_dqcounts(bp, _RET_IP_); 847 trace_xfs_reset_dqcounts(bp, _RET_IP_);
848 848
@@ -864,10 +864,13 @@ xfs_qm_reset_dqcounts(
864 /* 864 /*
865 * Do a sanity check, and if needed, repair the dqblk. Don't 865 * Do a sanity check, and if needed, repair the dqblk. Don't
866 * output any warnings because it's perfectly possible to 866 * output any warnings because it's perfectly possible to
867 * find uninitialised dquot blks. See comment in xfs_dqcheck. 867 * find uninitialised dquot blks. See comment in
868 * xfs_dquot_verify.
868 */ 869 */
869 xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, 870 fa = xfs_dquot_verify(mp, ddq, id + j, type, 0);
870 "xfs_quotacheck"); 871 if (fa)
872 xfs_dquot_repair(mp, ddq, id + j, type);
873
871 /* 874 /*
872 * Reset type in case we are reusing group quota file for 875 * Reset type in case we are reusing group quota file for
873 * project quotas or vice versa 876 * project quotas or vice versa
@@ -1074,8 +1077,7 @@ xfs_qm_quotacheck_dqadjust(
1074 struct xfs_dquot *dqp; 1077 struct xfs_dquot *dqp;
1075 int error; 1078 int error;
1076 1079
1077 error = xfs_qm_dqget(mp, ip, id, type, 1080 error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp);
1078 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
1079 if (error) { 1081 if (error) {
1080 /* 1082 /*
1081 * Shouldn't be able to turn off quotas here. 1083 * Shouldn't be able to turn off quotas here.
@@ -1696,8 +1698,7 @@ xfs_qm_vop_dqalloc(
1696 xfs_iunlock(ip, lockflags); 1698 xfs_iunlock(ip, lockflags);
1697 error = xfs_qm_dqget(mp, NULL, uid, 1699 error = xfs_qm_dqget(mp, NULL, uid,
1698 XFS_DQ_USER, 1700 XFS_DQ_USER,
1699 XFS_QMOPT_DQALLOC | 1701 XFS_QMOPT_DQALLOC,
1700 XFS_QMOPT_DOWARN,
1701 &uq); 1702 &uq);
1702 if (error) { 1703 if (error) {
1703 ASSERT(error != -ENOENT); 1704 ASSERT(error != -ENOENT);
@@ -1723,8 +1724,7 @@ xfs_qm_vop_dqalloc(
1723 xfs_iunlock(ip, lockflags); 1724 xfs_iunlock(ip, lockflags);
1724 error = xfs_qm_dqget(mp, NULL, gid, 1725 error = xfs_qm_dqget(mp, NULL, gid,
1725 XFS_DQ_GROUP, 1726 XFS_DQ_GROUP,
1726 XFS_QMOPT_DQALLOC | 1727 XFS_QMOPT_DQALLOC,
1727 XFS_QMOPT_DOWARN,
1728 &gq); 1728 &gq);
1729 if (error) { 1729 if (error) {
1730 ASSERT(error != -ENOENT); 1730 ASSERT(error != -ENOENT);
@@ -1743,8 +1743,7 @@ xfs_qm_vop_dqalloc(
1743 xfs_iunlock(ip, lockflags); 1743 xfs_iunlock(ip, lockflags);
1744 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, 1744 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
1745 XFS_DQ_PROJ, 1745 XFS_DQ_PROJ,
1746 XFS_QMOPT_DQALLOC | 1746 XFS_QMOPT_DQALLOC,
1747 XFS_QMOPT_DOWARN,
1748 &pq); 1747 &pq);
1749 if (error) { 1748 if (error) {
1750 ASSERT(error != -ENOENT); 1749 ASSERT(error != -ENOENT);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 47aea2e82c26..270246943a06 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -464,6 +464,13 @@ retry:
464 error = xfs_trans_commit(tp); 464 error = xfs_trans_commit(tp);
465 if (error) 465 if (error)
466 return error; 466 return error;
467
468 /*
469 * Allocation succeeded but the requested range was not even partially
470 * satisfied? Bail out!
471 */
472 if (nimaps == 0)
473 return -ENOSPC;
467convert: 474convert:
468 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, 475 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
469 &dfops); 476 &dfops);
@@ -599,10 +606,6 @@ xfs_reflink_cancel_cow_blocks(
599 del.br_startblock, del.br_blockcount, 606 del.br_startblock, del.br_blockcount,
600 NULL); 607 NULL);
601 608
602 /* Update quota accounting */
603 xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
604 -(long)del.br_blockcount);
605
606 /* Roll the transaction */ 609 /* Roll the transaction */
607 xfs_defer_ijoin(&dfops, ip); 610 xfs_defer_ijoin(&dfops, ip);
608 error = xfs_defer_finish(tpp, &dfops); 611 error = xfs_defer_finish(tpp, &dfops);
@@ -613,6 +616,13 @@ xfs_reflink_cancel_cow_blocks(
613 616
614 /* Remove the mapping from the CoW fork. */ 617 /* Remove the mapping from the CoW fork. */
615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 618 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
619
620 /* Remove the quota reservation */
621 error = xfs_trans_reserve_quota_nblks(NULL, ip,
622 -(long)del.br_blockcount, 0,
623 XFS_QMOPT_RES_REGBLKS);
624 if (error)
625 break;
616 } else { 626 } else {
617 /* Didn't do anything, push cursor back. */ 627 /* Didn't do anything, push cursor back. */
618 xfs_iext_prev(ifp, &icur); 628 xfs_iext_prev(ifp, &icur);
@@ -795,6 +805,10 @@ xfs_reflink_end_cow(
795 if (error) 805 if (error)
796 goto out_defer; 806 goto out_defer;
797 807
808 /* Charge this new data fork mapping to the on-disk quota. */
809 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
810 (long)del.br_blockcount);
811
798 /* Remove the mapping from the CoW fork. */ 812 /* Remove the mapping from the CoW fork. */
799 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 813 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
800 814
@@ -944,7 +958,7 @@ xfs_reflink_set_inode_flag(
944 if (src->i_ino == dest->i_ino) 958 if (src->i_ino == dest->i_ino)
945 xfs_ilock(src, XFS_ILOCK_EXCL); 959 xfs_ilock(src, XFS_ILOCK_EXCL);
946 else 960 else
947 xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL); 961 xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL);
948 962
949 if (!xfs_is_reflink_inode(src)) { 963 if (!xfs_is_reflink_inode(src)) {
950 trace_xfs_reflink_set_inode_flag(src); 964 trace_xfs_reflink_set_inode_flag(src);
@@ -1202,13 +1216,16 @@ xfs_reflink_remap_blocks(
1202 1216
1203 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ 1217 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
1204 while (len) { 1218 while (len) {
1219 uint lock_mode;
1220
1205 trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, 1221 trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
1206 dest, destoff); 1222 dest, destoff);
1223
1207 /* Read extent from the source file */ 1224 /* Read extent from the source file */
1208 nimaps = 1; 1225 nimaps = 1;
1209 xfs_ilock(src, XFS_ILOCK_EXCL); 1226 lock_mode = xfs_ilock_data_map_shared(src);
1210 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); 1227 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
1211 xfs_iunlock(src, XFS_ILOCK_EXCL); 1228 xfs_iunlock(src, lock_mode);
1212 if (error) 1229 if (error)
1213 goto err; 1230 goto err;
1214 ASSERT(nimaps == 1); 1231 ASSERT(nimaps == 1);
@@ -1245,6 +1262,50 @@ err:
1245} 1262}
1246 1263
1247/* 1264/*
1265 * Grab the exclusive iolock for a data copy from src to dest, making
1266 * sure to abide vfs locking order (lowest pointer value goes first) and
1267 * breaking the pnfs layout leases on dest before proceeding. The loop
1268 * is needed because we cannot call the blocking break_layout() with the
1269 * src iolock held, and therefore have to back out both locks.
1270 */
1271static int
1272xfs_iolock_two_inodes_and_break_layout(
1273 struct inode *src,
1274 struct inode *dest)
1275{
1276 int error;
1277
1278retry:
1279 if (src < dest) {
1280 inode_lock_shared(src);
1281 inode_lock_nested(dest, I_MUTEX_NONDIR2);
1282 } else {
1283 /* src >= dest */
1284 inode_lock(dest);
1285 }
1286
1287 error = break_layout(dest, false);
1288 if (error == -EWOULDBLOCK) {
1289 inode_unlock(dest);
1290 if (src < dest)
1291 inode_unlock_shared(src);
1292 error = break_layout(dest, true);
1293 if (error)
1294 return error;
1295 goto retry;
1296 }
1297 if (error) {
1298 inode_unlock(dest);
1299 if (src < dest)
1300 inode_unlock_shared(src);
1301 return error;
1302 }
1303 if (src > dest)
1304 inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
1305 return 0;
1306}
1307
1308/*
1248 * Link a range of blocks from one file to another. 1309 * Link a range of blocks from one file to another.
1249 */ 1310 */
1250int 1311int
@@ -1274,11 +1335,14 @@ xfs_reflink_remap_range(
1274 return -EIO; 1335 return -EIO;
1275 1336
1276 /* Lock both files against IO */ 1337 /* Lock both files against IO */
1277 lock_two_nondirectories(inode_in, inode_out); 1338 ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
1339 if (ret)
1340 return ret;
1278 if (same_inode) 1341 if (same_inode)
1279 xfs_ilock(src, XFS_MMAPLOCK_EXCL); 1342 xfs_ilock(src, XFS_MMAPLOCK_EXCL);
1280 else 1343 else
1281 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); 1344 xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest,
1345 XFS_MMAPLOCK_EXCL);
1282 1346
1283 /* Check file eligibility and prepare for block sharing. */ 1347 /* Check file eligibility and prepare for block sharing. */
1284 ret = -EINVAL; 1348 ret = -EINVAL;
@@ -1295,6 +1359,11 @@ xfs_reflink_remap_range(
1295 if (ret <= 0) 1359 if (ret <= 0)
1296 goto out_unlock; 1360 goto out_unlock;
1297 1361
1362 /* Attach dquots to dest inode before changing block map */
1363 ret = xfs_qm_dqattach(dest, 0);
1364 if (ret)
1365 goto out_unlock;
1366
1298 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1367 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1299 1368
1300 /* 1369 /*
@@ -1341,10 +1410,12 @@ xfs_reflink_remap_range(
1341 is_dedupe); 1410 is_dedupe);
1342 1411
1343out_unlock: 1412out_unlock:
1344 xfs_iunlock(src, XFS_MMAPLOCK_EXCL); 1413 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
1414 if (!same_inode)
1415 xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
1416 inode_unlock(inode_out);
1345 if (!same_inode) 1417 if (!same_inode)
1346 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1418 inode_unlock_shared(inode_in);
1347 unlock_two_nondirectories(inode_in, inode_out);
1348 if (ret) 1419 if (ret)
1349 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1420 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1350 return ret; 1421 return ret;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 3f30f846d7f2..dfee3c991155 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -139,6 +139,9 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
139 xfs_rtalloc_query_range_fn fn, 139 xfs_rtalloc_query_range_fn fn,
140 void *priv); 140 void *priv);
141bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); 141bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
142int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
143 xfs_rtblock_t start, xfs_extlen_t len,
144 bool *is_free);
142#else 145#else
143# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) 146# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
144# define xfs_rtfree_extent(t,b,l) (ENOSYS) 147# define xfs_rtfree_extent(t,b,l) (ENOSYS)
@@ -148,6 +151,7 @@ bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
148# define xfs_rtalloc_query_all(t,f,p) (ENOSYS) 151# define xfs_rtalloc_query_all(t,f,p) (ENOSYS)
149# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) 152# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
150# define xfs_verify_rtbno(m, r) (false) 153# define xfs_verify_rtbno(m, r) (false)
154# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS)
151static inline int /* error */ 155static inline int /* error */
152xfs_rtmount_init( 156xfs_rtmount_init(
153 xfs_mount_t *mp) /* file system mount structure */ 157 xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 1dacccc367f8..f3e0001f9992 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1153,6 +1153,14 @@ xfs_fs_statfs(
1153 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 1153 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
1154 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 1154 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
1155 xfs_qm_statvfs(ip, statp); 1155 xfs_qm_statvfs(ip, statp);
1156
1157 if (XFS_IS_REALTIME_MOUNT(mp) &&
1158 (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
1159 statp->f_blocks = sbp->sb_rblocks;
1160 statp->f_bavail = statp->f_bfree =
1161 sbp->sb_frextents * sbp->sb_rextsize;
1162 }
1163
1156 return 0; 1164 return 0;
1157} 1165}
1158 1166
@@ -1660,7 +1668,7 @@ xfs_fs_fill_super(
1660 } 1668 }
1661 if (xfs_sb_version_hasreflink(&mp->m_sb)) 1669 if (xfs_sb_version_hasreflink(&mp->m_sb))
1662 xfs_alert(mp, 1670 xfs_alert(mp,
1663 "DAX and reflink have not been tested together!"); 1671 "DAX and reflink cannot be used together!");
1664 } 1672 }
1665 1673
1666 if (mp->m_flags & XFS_MOUNT_DISCARD) { 1674 if (mp->m_flags & XFS_MOUNT_DISCARD) {
@@ -1684,10 +1692,6 @@ xfs_fs_fill_super(
1684 "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); 1692 "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
1685 } 1693 }
1686 1694
1687 if (xfs_sb_version_hasreflink(&mp->m_sb))
1688 xfs_alert(mp,
1689 "EXPERIMENTAL reflink feature enabled. Use at your own risk!");
1690
1691 error = xfs_mountfs(mp); 1695 error = xfs_mountfs(mp);
1692 if (error) 1696 if (error)
1693 goto out_filestream_unmount; 1697 goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index d718a10c2271..945de08af7ba 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
72 __entry->flags = ctx->flags; 72 __entry->flags = ctx->flags;
73 ), 73 ),
74 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " 74 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
75 "alist 0x%p size %u count %u firstu %u flags %d %s", 75 "alist %p size %u count %u firstu %u flags %d %s",
76 MAJOR(__entry->dev), MINOR(__entry->dev), 76 MAJOR(__entry->dev), MINOR(__entry->dev),
77 __entry->ino, 77 __entry->ino,
78 __entry->hashval, 78 __entry->hashval,
@@ -119,7 +119,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
119 __entry->refcount = refcount; 119 __entry->refcount = refcount;
120 __entry->caller_ip = caller_ip; 120 __entry->caller_ip = caller_ip;
121 ), 121 ),
122 TP_printk("dev %d:%d agno %u refcount %d caller %ps", 122 TP_printk("dev %d:%d agno %u refcount %d caller %pS",
123 MAJOR(__entry->dev), MINOR(__entry->dev), 123 MAJOR(__entry->dev), MINOR(__entry->dev),
124 __entry->agno, 124 __entry->agno,
125 __entry->refcount, 125 __entry->refcount,
@@ -200,7 +200,7 @@ TRACE_EVENT(xfs_attr_list_node_descend,
200 __entry->bt_before = be32_to_cpu(btree->before); 200 __entry->bt_before = be32_to_cpu(btree->before);
201 ), 201 ),
202 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " 202 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
203 "alist 0x%p size %u count %u firstu %u flags %d %s " 203 "alist %p size %u count %u firstu %u flags %d %s "
204 "node hashval %u, node before %u", 204 "node hashval %u, node before %u",
205 MAJOR(__entry->dev), MINOR(__entry->dev), 205 MAJOR(__entry->dev), MINOR(__entry->dev),
206 __entry->ino, 206 __entry->ino,
@@ -251,8 +251,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
251 __entry->bmap_state = state; 251 __entry->bmap_state = state;
252 __entry->caller_ip = caller_ip; 252 __entry->caller_ip = caller_ip;
253 ), 253 ),
254 TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d " 254 TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d "
255 "offset %lld block %lld count %lld flag %d caller %ps", 255 "offset %lld block %lld count %lld flag %d caller %pS",
256 MAJOR(__entry->dev), MINOR(__entry->dev), 256 MAJOR(__entry->dev), MINOR(__entry->dev),
257 __entry->ino, 257 __entry->ino,
258 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 258 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -301,7 +301,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
301 __entry->caller_ip = caller_ip; 301 __entry->caller_ip = caller_ip;
302 ), 302 ),
303 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " 303 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
304 "lock %d flags %s caller %ps", 304 "lock %d flags %s caller %pS",
305 MAJOR(__entry->dev), MINOR(__entry->dev), 305 MAJOR(__entry->dev), MINOR(__entry->dev),
306 (unsigned long long)__entry->bno, 306 (unsigned long long)__entry->bno,
307 __entry->nblks, 307 __entry->nblks,
@@ -370,7 +370,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
370 __entry->caller_ip = caller_ip; 370 __entry->caller_ip = caller_ip;
371 ), 371 ),
372 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 372 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
373 "lock %d flags %s caller %ps", 373 "lock %d flags %s caller %pS",
374 MAJOR(__entry->dev), MINOR(__entry->dev), 374 MAJOR(__entry->dev), MINOR(__entry->dev),
375 (unsigned long long)__entry->bno, 375 (unsigned long long)__entry->bno,
376 __entry->buffer_length, 376 __entry->buffer_length,
@@ -390,7 +390,7 @@ DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
390DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); 390DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
391 391
392TRACE_EVENT(xfs_buf_ioerror, 392TRACE_EVENT(xfs_buf_ioerror,
393 TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), 393 TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip),
394 TP_ARGS(bp, error, caller_ip), 394 TP_ARGS(bp, error, caller_ip),
395 TP_STRUCT__entry( 395 TP_STRUCT__entry(
396 __field(dev_t, dev) 396 __field(dev_t, dev)
@@ -401,7 +401,7 @@ TRACE_EVENT(xfs_buf_ioerror,
401 __field(int, pincount) 401 __field(int, pincount)
402 __field(unsigned, lockval) 402 __field(unsigned, lockval)
403 __field(int, error) 403 __field(int, error)
404 __field(unsigned long, caller_ip) 404 __field(xfs_failaddr_t, caller_ip)
405 ), 405 ),
406 TP_fast_assign( 406 TP_fast_assign(
407 __entry->dev = bp->b_target->bt_dev; 407 __entry->dev = bp->b_target->bt_dev;
@@ -415,7 +415,7 @@ TRACE_EVENT(xfs_buf_ioerror,
415 __entry->caller_ip = caller_ip; 415 __entry->caller_ip = caller_ip;
416 ), 416 ),
417 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 417 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
418 "lock %d error %d flags %s caller %ps", 418 "lock %d error %d flags %s caller %pS",
419 MAJOR(__entry->dev), MINOR(__entry->dev), 419 MAJOR(__entry->dev), MINOR(__entry->dev),
420 (unsigned long long)__entry->bno, 420 (unsigned long long)__entry->bno,
421 __entry->buffer_length, 421 __entry->buffer_length,
@@ -460,7 +460,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
460 ), 460 ),
461 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 461 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
462 "lock %d flags %s recur %d refcount %d bliflags %s " 462 "lock %d flags %s recur %d refcount %d bliflags %s "
463 "lidesc 0x%p liflags %s", 463 "lidesc %p liflags %s",
464 MAJOR(__entry->dev), MINOR(__entry->dev), 464 MAJOR(__entry->dev), MINOR(__entry->dev),
465 (unsigned long long)__entry->buf_bno, 465 (unsigned long long)__entry->buf_bno,
466 __entry->buf_len, 466 __entry->buf_len,
@@ -579,7 +579,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
579 __entry->lock_flags = lock_flags; 579 __entry->lock_flags = lock_flags;
580 __entry->caller_ip = caller_ip; 580 __entry->caller_ip = caller_ip;
581 ), 581 ),
582 TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps", 582 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS",
583 MAJOR(__entry->dev), MINOR(__entry->dev), 583 MAJOR(__entry->dev), MINOR(__entry->dev),
584 __entry->ino, 584 __entry->ino,
585 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), 585 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -697,7 +697,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
697 __entry->pincount = atomic_read(&ip->i_pincount); 697 __entry->pincount = atomic_read(&ip->i_pincount);
698 __entry->caller_ip = caller_ip; 698 __entry->caller_ip = caller_ip;
699 ), 699 ),
700 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps", 700 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS",
701 MAJOR(__entry->dev), MINOR(__entry->dev), 701 MAJOR(__entry->dev), MINOR(__entry->dev),
702 __entry->ino, 702 __entry->ino,
703 __entry->count, 703 __entry->count,
@@ -1028,7 +1028,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
1028 __entry->flags = lip->li_flags; 1028 __entry->flags = lip->li_flags;
1029 __entry->lsn = lip->li_lsn; 1029 __entry->lsn = lip->li_lsn;
1030 ), 1030 ),
1031 TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s", 1031 TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s",
1032 MAJOR(__entry->dev), MINOR(__entry->dev), 1032 MAJOR(__entry->dev), MINOR(__entry->dev),
1033 __entry->lip, 1033 __entry->lip,
1034 CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), 1034 CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn),
@@ -1049,7 +1049,7 @@ TRACE_EVENT(xfs_log_force,
1049 __entry->lsn = lsn; 1049 __entry->lsn = lsn;
1050 __entry->caller_ip = caller_ip; 1050 __entry->caller_ip = caller_ip;
1051 ), 1051 ),
1052 TP_printk("dev %d:%d lsn 0x%llx caller %ps", 1052 TP_printk("dev %d:%d lsn 0x%llx caller %pS",
1053 MAJOR(__entry->dev), MINOR(__entry->dev), 1053 MAJOR(__entry->dev), MINOR(__entry->dev),
1054 __entry->lsn, (void *)__entry->caller_ip) 1054 __entry->lsn, (void *)__entry->caller_ip)
1055) 1055)
@@ -1082,7 +1082,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class,
1082 __entry->old_lsn = old_lsn; 1082 __entry->old_lsn = old_lsn;
1083 __entry->new_lsn = new_lsn; 1083 __entry->new_lsn = new_lsn;
1084 ), 1084 ),
1085 TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", 1085 TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s",
1086 MAJOR(__entry->dev), MINOR(__entry->dev), 1086 MAJOR(__entry->dev), MINOR(__entry->dev),
1087 __entry->lip, 1087 __entry->lip,
1088 CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), 1088 CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
@@ -1403,7 +1403,7 @@ TRACE_EVENT(xfs_bunmap,
1403 __entry->flags = flags; 1403 __entry->flags = flags;
1404 ), 1404 ),
1405 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" 1405 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1406 "flags %s caller %ps", 1406 "flags %s caller %pS",
1407 MAJOR(__entry->dev), MINOR(__entry->dev), 1407 MAJOR(__entry->dev), MINOR(__entry->dev),
1408 __entry->ino, 1408 __entry->ino,
1409 __entry->size, 1409 __entry->size,
@@ -1517,7 +1517,7 @@ TRACE_EVENT(xfs_agf,
1517 ), 1517 ),
1518 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " 1518 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1519 "levels b %u c %u flfirst %u fllast %u flcount %u " 1519 "levels b %u c %u flfirst %u fllast %u flcount %u "
1520 "freeblks %u longest %u caller %ps", 1520 "freeblks %u longest %u caller %pS",
1521 MAJOR(__entry->dev), MINOR(__entry->dev), 1521 MAJOR(__entry->dev), MINOR(__entry->dev),
1522 __entry->agno, 1522 __entry->agno,
1523 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), 1523 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
@@ -2014,7 +2014,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
2014 __entry->count = item->ri_cnt; 2014 __entry->count = item->ri_cnt;
2015 __entry->total = item->ri_total; 2015 __entry->total = item->ri_total;
2016 ), 2016 ),
2017 TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, " 2017 TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, "
2018 "item type %s item region count/total %d/%d", 2018 "item type %s item region count/total %d/%d",
2019 MAJOR(__entry->dev), MINOR(__entry->dev), 2019 MAJOR(__entry->dev), MINOR(__entry->dev),
2020 __entry->tid, 2020 __entry->tid,
@@ -2486,7 +2486,7 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class,
2486 __entry->error = error; 2486 __entry->error = error;
2487 __entry->caller_ip = caller_ip; 2487 __entry->caller_ip = caller_ip;
2488 ), 2488 ),
2489 TP_printk("dev %d:%d agno %u error %d caller %ps", 2489 TP_printk("dev %d:%d agno %u error %d caller %pS",
2490 MAJOR(__entry->dev), MINOR(__entry->dev), 2490 MAJOR(__entry->dev), MINOR(__entry->dev),
2491 __entry->agno, 2491 __entry->agno,
2492 __entry->error, 2492 __entry->error,
@@ -2977,7 +2977,7 @@ DECLARE_EVENT_CLASS(xfs_inode_error_class,
2977 __entry->error = error; 2977 __entry->error = error;
2978 __entry->caller_ip = caller_ip; 2978 __entry->caller_ip = caller_ip;
2979 ), 2979 ),
2980 TP_printk("dev %d:%d ino %llx error %d caller %ps", 2980 TP_printk("dev %d:%d ino %llx error %d caller %pS",
2981 MAJOR(__entry->dev), MINOR(__entry->dev), 2981 MAJOR(__entry->dev), MINOR(__entry->dev),
2982 __entry->ino, 2982 __entry->ino,
2983 __entry->error, 2983 __entry->error,
@@ -3313,6 +3313,32 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
3313DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); 3313DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
3314DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); 3314DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
3315 3315
3316TRACE_EVENT(xfs_trans_resv_calc,
3317 TP_PROTO(struct xfs_mount *mp, unsigned int type,
3318 struct xfs_trans_res *res),
3319 TP_ARGS(mp, type, res),
3320 TP_STRUCT__entry(
3321 __field(dev_t, dev)
3322 __field(int, type)
3323 __field(uint, logres)
3324 __field(int, logcount)
3325 __field(int, logflags)
3326 ),
3327 TP_fast_assign(
3328 __entry->dev = mp->m_super->s_dev;
3329 __entry->type = type;
3330 __entry->logres = res->tr_logres;
3331 __entry->logcount = res->tr_logcount;
3332 __entry->logflags = res->tr_logflags;
3333 ),
3334 TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x",
3335 MAJOR(__entry->dev), MINOR(__entry->dev),
3336 __entry->type,
3337 __entry->logres,
3338 __entry->logcount,
3339 __entry->logflags)
3340);
3341
3316#endif /* _TRACE_XFS_H */ 3342#endif /* _TRACE_XFS_H */
3317 3343
3318#undef TRACE_INCLUDE_PATH 3344#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index a87f657f59c9..86f92df32c42 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -35,6 +35,27 @@
35kmem_zone_t *xfs_trans_zone; 35kmem_zone_t *xfs_trans_zone;
36kmem_zone_t *xfs_log_item_desc_zone; 36kmem_zone_t *xfs_log_item_desc_zone;
37 37
38#if defined(CONFIG_TRACEPOINTS)
39static void
40xfs_trans_trace_reservations(
41 struct xfs_mount *mp)
42{
43 struct xfs_trans_res resv;
44 struct xfs_trans_res *res;
45 struct xfs_trans_res *end_res;
46 int i;
47
48 res = (struct xfs_trans_res *)M_RES(mp);
49 end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
50 for (i = 0; res < end_res; i++, res++)
51 trace_xfs_trans_resv_calc(mp, i, res);
52 xfs_log_get_max_trans_res(mp, &resv);
53 trace_xfs_trans_resv_calc(mp, -1, &resv);
54}
55#else
56# define xfs_trans_trace_reservations(mp)
57#endif
58
38/* 59/*
39 * Initialize the precomputed transaction reservation values 60 * Initialize the precomputed transaction reservation values
40 * in the mount structure. 61 * in the mount structure.
@@ -44,6 +65,7 @@ xfs_trans_init(
44 struct xfs_mount *mp) 65 struct xfs_mount *mp)
45{ 66{
46 xfs_trans_resv_calc(mp, M_RES(mp)); 67 xfs_trans_resv_calc(mp, M_RES(mp));
68 xfs_trans_trace_reservations(mp);
47} 69}
48 70
49/* 71/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 815b53d20e26..9d542dfe0052 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -50,7 +50,7 @@ typedef struct xfs_log_item {
50 uint li_type; /* item type */ 50 uint li_type; /* item type */
51 uint li_flags; /* misc flags */ 51 uint li_flags; /* misc flags */
52 struct xfs_buf *li_buf; /* real buffer pointer */ 52 struct xfs_buf *li_buf; /* real buffer pointer */
53 struct xfs_log_item *li_bio_list; /* buffer item list */ 53 struct list_head li_bio_list; /* buffer item list */
54 void (*li_cb)(struct xfs_buf *, 54 void (*li_cb)(struct xfs_buf *,
55 struct xfs_log_item *); 55 struct xfs_log_item *);
56 /* buffer item iodone */ 56 /* buffer item iodone */
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 3ba7a96a8abd..653ce379d36b 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -82,12 +82,12 @@ _xfs_trans_bjoin(
82 ASSERT(bp->b_transp == NULL); 82 ASSERT(bp->b_transp == NULL);
83 83
84 /* 84 /*
85 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 85 * The xfs_buf_log_item pointer is stored in b_log_item. If
86 * it doesn't have one yet, then allocate one and initialize it. 86 * it doesn't have one yet, then allocate one and initialize it.
87 * The checks to see if one is there are in xfs_buf_item_init(). 87 * The checks to see if one is there are in xfs_buf_item_init().
88 */ 88 */
89 xfs_buf_item_init(bp, tp->t_mountp); 89 xfs_buf_item_init(bp, tp->t_mountp);
90 bip = bp->b_fspriv; 90 bip = bp->b_log_item;
91 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 91 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
92 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); 92 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
93 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 93 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -118,7 +118,7 @@ xfs_trans_bjoin(
118 struct xfs_buf *bp) 118 struct xfs_buf *bp)
119{ 119{
120 _xfs_trans_bjoin(tp, bp, 0); 120 _xfs_trans_bjoin(tp, bp, 0);
121 trace_xfs_trans_bjoin(bp->b_fspriv); 121 trace_xfs_trans_bjoin(bp->b_log_item);
122} 122}
123 123
124/* 124/*
@@ -139,7 +139,7 @@ xfs_trans_get_buf_map(
139 xfs_buf_flags_t flags) 139 xfs_buf_flags_t flags)
140{ 140{
141 xfs_buf_t *bp; 141 xfs_buf_t *bp;
142 xfs_buf_log_item_t *bip; 142 struct xfs_buf_log_item *bip;
143 143
144 if (!tp) 144 if (!tp)
145 return xfs_buf_get_map(target, map, nmaps, flags); 145 return xfs_buf_get_map(target, map, nmaps, flags);
@@ -159,7 +159,7 @@ xfs_trans_get_buf_map(
159 } 159 }
160 160
161 ASSERT(bp->b_transp == tp); 161 ASSERT(bp->b_transp == tp);
162 bip = bp->b_fspriv; 162 bip = bp->b_log_item;
163 ASSERT(bip != NULL); 163 ASSERT(bip != NULL);
164 ASSERT(atomic_read(&bip->bli_refcount) > 0); 164 ASSERT(atomic_read(&bip->bli_refcount) > 0);
165 bip->bli_recur++; 165 bip->bli_recur++;
@@ -175,7 +175,7 @@ xfs_trans_get_buf_map(
175 ASSERT(!bp->b_error); 175 ASSERT(!bp->b_error);
176 176
177 _xfs_trans_bjoin(tp, bp, 1); 177 _xfs_trans_bjoin(tp, bp, 1);
178 trace_xfs_trans_get_buf(bp->b_fspriv); 178 trace_xfs_trans_get_buf(bp->b_log_item);
179 return bp; 179 return bp;
180} 180}
181 181
@@ -188,12 +188,13 @@ xfs_trans_get_buf_map(
188 * mount structure. 188 * mount structure.
189 */ 189 */
190xfs_buf_t * 190xfs_buf_t *
191xfs_trans_getsb(xfs_trans_t *tp, 191xfs_trans_getsb(
192 struct xfs_mount *mp, 192 xfs_trans_t *tp,
193 int flags) 193 struct xfs_mount *mp,
194 int flags)
194{ 195{
195 xfs_buf_t *bp; 196 xfs_buf_t *bp;
196 xfs_buf_log_item_t *bip; 197 struct xfs_buf_log_item *bip;
197 198
198 /* 199 /*
199 * Default to just trying to lock the superblock buffer 200 * Default to just trying to lock the superblock buffer
@@ -210,7 +211,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
210 */ 211 */
211 bp = mp->m_sb_bp; 212 bp = mp->m_sb_bp;
212 if (bp->b_transp == tp) { 213 if (bp->b_transp == tp) {
213 bip = bp->b_fspriv; 214 bip = bp->b_log_item;
214 ASSERT(bip != NULL); 215 ASSERT(bip != NULL);
215 ASSERT(atomic_read(&bip->bli_refcount) > 0); 216 ASSERT(atomic_read(&bip->bli_refcount) > 0);
216 bip->bli_recur++; 217 bip->bli_recur++;
@@ -223,7 +224,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
223 return NULL; 224 return NULL;
224 225
225 _xfs_trans_bjoin(tp, bp, 1); 226 _xfs_trans_bjoin(tp, bp, 1);
226 trace_xfs_trans_getsb(bp->b_fspriv); 227 trace_xfs_trans_getsb(bp->b_log_item);
227 return bp; 228 return bp;
228} 229}
229 230
@@ -266,7 +267,7 @@ xfs_trans_read_buf_map(
266 if (bp) { 267 if (bp) {
267 ASSERT(xfs_buf_islocked(bp)); 268 ASSERT(xfs_buf_islocked(bp));
268 ASSERT(bp->b_transp == tp); 269 ASSERT(bp->b_transp == tp);
269 ASSERT(bp->b_fspriv != NULL); 270 ASSERT(bp->b_log_item != NULL);
270 ASSERT(!bp->b_error); 271 ASSERT(!bp->b_error);
271 ASSERT(bp->b_flags & XBF_DONE); 272 ASSERT(bp->b_flags & XBF_DONE);
272 273
@@ -279,7 +280,7 @@ xfs_trans_read_buf_map(
279 return -EIO; 280 return -EIO;
280 } 281 }
281 282
282 bip = bp->b_fspriv; 283 bip = bp->b_log_item;
283 bip->bli_recur++; 284 bip->bli_recur++;
284 285
285 ASSERT(atomic_read(&bip->bli_refcount) > 0); 286 ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -329,7 +330,7 @@ xfs_trans_read_buf_map(
329 330
330 if (tp) { 331 if (tp) {
331 _xfs_trans_bjoin(tp, bp, 1); 332 _xfs_trans_bjoin(tp, bp, 1);
332 trace_xfs_trans_read_buf(bp->b_fspriv); 333 trace_xfs_trans_read_buf(bp->b_log_item);
333 } 334 }
334 *bpp = bp; 335 *bpp = bp;
335 return 0; 336 return 0;
@@ -352,10 +353,11 @@ xfs_trans_read_buf_map(
352 * brelse() call. 353 * brelse() call.
353 */ 354 */
354void 355void
355xfs_trans_brelse(xfs_trans_t *tp, 356xfs_trans_brelse(
356 xfs_buf_t *bp) 357 xfs_trans_t *tp,
358 xfs_buf_t *bp)
357{ 359{
358 xfs_buf_log_item_t *bip; 360 struct xfs_buf_log_item *bip;
359 int freed; 361 int freed;
360 362
361 /* 363 /*
@@ -368,7 +370,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
368 } 370 }
369 371
370 ASSERT(bp->b_transp == tp); 372 ASSERT(bp->b_transp == tp);
371 bip = bp->b_fspriv; 373 bip = bp->b_log_item;
372 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 374 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
373 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 375 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
374 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); 376 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -456,10 +458,11 @@ xfs_trans_brelse(xfs_trans_t *tp,
456 */ 458 */
457/* ARGSUSED */ 459/* ARGSUSED */
458void 460void
459xfs_trans_bhold(xfs_trans_t *tp, 461xfs_trans_bhold(
460 xfs_buf_t *bp) 462 xfs_trans_t *tp,
463 xfs_buf_t *bp)
461{ 464{
462 xfs_buf_log_item_t *bip = bp->b_fspriv; 465 struct xfs_buf_log_item *bip = bp->b_log_item;
463 466
464 ASSERT(bp->b_transp == tp); 467 ASSERT(bp->b_transp == tp);
465 ASSERT(bip != NULL); 468 ASSERT(bip != NULL);
@@ -476,10 +479,11 @@ xfs_trans_bhold(xfs_trans_t *tp,
476 * for this transaction. 479 * for this transaction.
477 */ 480 */
478void 481void
479xfs_trans_bhold_release(xfs_trans_t *tp, 482xfs_trans_bhold_release(
480 xfs_buf_t *bp) 483 xfs_trans_t *tp,
484 xfs_buf_t *bp)
481{ 485{
482 xfs_buf_log_item_t *bip = bp->b_fspriv; 486 struct xfs_buf_log_item *bip = bp->b_log_item;
483 487
484 ASSERT(bp->b_transp == tp); 488 ASSERT(bp->b_transp == tp);
485 ASSERT(bip != NULL); 489 ASSERT(bip != NULL);
@@ -500,7 +504,7 @@ xfs_trans_dirty_buf(
500 struct xfs_trans *tp, 504 struct xfs_trans *tp,
501 struct xfs_buf *bp) 505 struct xfs_buf *bp)
502{ 506{
503 struct xfs_buf_log_item *bip = bp->b_fspriv; 507 struct xfs_buf_log_item *bip = bp->b_log_item;
504 508
505 ASSERT(bp->b_transp == tp); 509 ASSERT(bp->b_transp == tp);
506 ASSERT(bip != NULL); 510 ASSERT(bip != NULL);
@@ -557,7 +561,7 @@ xfs_trans_log_buf(
557 uint first, 561 uint first,
558 uint last) 562 uint last)
559{ 563{
560 struct xfs_buf_log_item *bip = bp->b_fspriv; 564 struct xfs_buf_log_item *bip = bp->b_log_item;
561 565
562 ASSERT(first <= last && last < BBTOB(bp->b_length)); 566 ASSERT(first <= last && last < BBTOB(bp->b_length));
563 ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); 567 ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
@@ -600,10 +604,10 @@ xfs_trans_log_buf(
600 */ 604 */
601void 605void
602xfs_trans_binval( 606xfs_trans_binval(
603 xfs_trans_t *tp, 607 xfs_trans_t *tp,
604 xfs_buf_t *bp) 608 xfs_buf_t *bp)
605{ 609{
606 xfs_buf_log_item_t *bip = bp->b_fspriv; 610 struct xfs_buf_log_item *bip = bp->b_log_item;
607 int i; 611 int i;
608 612
609 ASSERT(bp->b_transp == tp); 613 ASSERT(bp->b_transp == tp);
@@ -655,10 +659,10 @@ xfs_trans_binval(
655 */ 659 */
656void 660void
657xfs_trans_inode_buf( 661xfs_trans_inode_buf(
658 xfs_trans_t *tp, 662 xfs_trans_t *tp,
659 xfs_buf_t *bp) 663 xfs_buf_t *bp)
660{ 664{
661 xfs_buf_log_item_t *bip = bp->b_fspriv; 665 struct xfs_buf_log_item *bip = bp->b_log_item;
662 666
663 ASSERT(bp->b_transp == tp); 667 ASSERT(bp->b_transp == tp);
664 ASSERT(bip != NULL); 668 ASSERT(bip != NULL);
@@ -679,10 +683,10 @@ xfs_trans_inode_buf(
679 */ 683 */
680void 684void
681xfs_trans_stale_inode_buf( 685xfs_trans_stale_inode_buf(
682 xfs_trans_t *tp, 686 xfs_trans_t *tp,
683 xfs_buf_t *bp) 687 xfs_buf_t *bp)
684{ 688{
685 xfs_buf_log_item_t *bip = bp->b_fspriv; 689 struct xfs_buf_log_item *bip = bp->b_log_item;
686 690
687 ASSERT(bp->b_transp == tp); 691 ASSERT(bp->b_transp == tp);
688 ASSERT(bip != NULL); 692 ASSERT(bip != NULL);
@@ -704,10 +708,10 @@ xfs_trans_stale_inode_buf(
704/* ARGSUSED */ 708/* ARGSUSED */
705void 709void
706xfs_trans_inode_alloc_buf( 710xfs_trans_inode_alloc_buf(
707 xfs_trans_t *tp, 711 xfs_trans_t *tp,
708 xfs_buf_t *bp) 712 xfs_buf_t *bp)
709{ 713{
710 xfs_buf_log_item_t *bip = bp->b_fspriv; 714 struct xfs_buf_log_item *bip = bp->b_log_item;
711 715
712 ASSERT(bp->b_transp == tp); 716 ASSERT(bp->b_transp == tp);
713 ASSERT(bip != NULL); 717 ASSERT(bip != NULL);
@@ -729,7 +733,7 @@ xfs_trans_ordered_buf(
729 struct xfs_trans *tp, 733 struct xfs_trans *tp,
730 struct xfs_buf *bp) 734 struct xfs_buf *bp)
731{ 735{
732 struct xfs_buf_log_item *bip = bp->b_fspriv; 736 struct xfs_buf_log_item *bip = bp->b_log_item;
733 737
734 ASSERT(bp->b_transp == tp); 738 ASSERT(bp->b_transp == tp);
735 ASSERT(bip != NULL); 739 ASSERT(bip != NULL);
@@ -759,7 +763,7 @@ xfs_trans_buf_set_type(
759 struct xfs_buf *bp, 763 struct xfs_buf *bp,
760 enum xfs_blft type) 764 enum xfs_blft type)
761{ 765{
762 struct xfs_buf_log_item *bip = bp->b_fspriv; 766 struct xfs_buf_log_item *bip = bp->b_log_item;
763 767
764 if (!tp) 768 if (!tp)
765 return; 769 return;
@@ -776,8 +780,8 @@ xfs_trans_buf_copy_type(
776 struct xfs_buf *dst_bp, 780 struct xfs_buf *dst_bp,
777 struct xfs_buf *src_bp) 781 struct xfs_buf *src_bp)
778{ 782{
779 struct xfs_buf_log_item *sbip = src_bp->b_fspriv; 783 struct xfs_buf_log_item *sbip = src_bp->b_log_item;
780 struct xfs_buf_log_item *dbip = dst_bp->b_fspriv; 784 struct xfs_buf_log_item *dbip = dst_bp->b_log_item;
781 enum xfs_blft type; 785 enum xfs_blft type;
782 786
783 type = xfs_blft_from_flags(&sbip->__bli_format); 787 type = xfs_blft_from_flags(&sbip->__bli_format);
@@ -797,11 +801,11 @@ xfs_trans_buf_copy_type(
797/* ARGSUSED */ 801/* ARGSUSED */
798void 802void
799xfs_trans_dquot_buf( 803xfs_trans_dquot_buf(
800 xfs_trans_t *tp, 804 xfs_trans_t *tp,
801 xfs_buf_t *bp, 805 xfs_buf_t *bp,
802 uint type) 806 uint type)
803{ 807{
804 struct xfs_buf_log_item *bip = bp->b_fspriv; 808 struct xfs_buf_log_item *bip = bp->b_log_item;
805 809
806 ASSERT(type == XFS_BLF_UDQUOT_BUF || 810 ASSERT(type == XFS_BLF_UDQUOT_BUF ||
807 type == XFS_BLF_PDQUOT_BUF || 811 type == XFS_BLF_PDQUOT_BUF ||