aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c16
-rw-r--r--fs/9p/vfs_super.c3
-rw-r--r--fs/Kconfig5
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/super.c7
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/fsclient.c8
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/rxrpc.c3
-rw-r--r--fs/afs/super.c7
-rw-r--r--fs/aio.c119
-rw-r--r--fs/anon_inodes.c109
-rw-r--r--fs/autofs4/autofs_i.h1
-rw-r--r--fs/autofs4/dev-ioctl.c1
-rw-r--r--fs/autofs4/expire.c2
-rw-r--r--fs/autofs4/init.c6
-rw-r--r--fs/autofs4/inode.c12
-rw-r--r--fs/autofs4/waitq.c22
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/bfs/inode.c3
-rw-r--r--fs/binfmt_aout.c18
-rw-r--r--fs/binfmt_elf.c7
-rw-r--r--fs/binfmt_elf_fdpic.c6
-rw-r--r--fs/binfmt_em86.c3
-rw-r--r--fs/binfmt_flat.c4
-rw-r--r--fs/binfmt_misc.c7
-rw-r--r--fs/binfmt_script.c3
-rw-r--r--fs/binfmt_som.c4
-rw-r--r--fs/bio-integrity.c10
-rw-r--r--fs/block_dev.c16
-rw-r--r--fs/btrfs/backref.c10
-rw-r--r--fs/btrfs/check-integrity.c3
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/extent_io.c129
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c29
-rw-r--r--fs/btrfs/free-space-cache.c3
-rw-r--r--fs/btrfs/inode-map.c6
-rw-r--r--fs/btrfs/inode.c66
-rw-r--r--fs/btrfs/ioctl.c59
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/reada.c2
-rw-r--r--fs/btrfs/scrub.c16
-rw-r--r--fs/btrfs/super.c8
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/ceph/super.c3
-rw-r--r--fs/cifs/cifsacl.c1
-rw-r--r--fs/cifs/cifsfs.c7
-rw-r--r--fs/cifs/dir.c20
-rw-r--r--fs/cifs/file.c69
-rw-r--r--fs/cifs/inode.c28
-rw-r--r--fs/cifs/xattr.c6
-rw-r--r--fs/coda/inode.c6
-rw-r--r--fs/compat.c56
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/configfs_internal.h7
-rw-r--r--fs/configfs/dir.c72
-rw-r--r--fs/configfs/inode.c62
-rw-r--r--fs/configfs/mount.c16
-rw-r--r--fs/configfs/symlink.c12
-rw-r--r--fs/cramfs/inode.c12
-rw-r--r--fs/dcache.c94
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c149
-rw-r--r--fs/devpts/inode.c88
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/dlm/dir.c17
-rw-r--r--fs/dlm/lock.c8
-rw-r--r--fs/dlm/lock.h3
-rw-r--r--fs/dlm/lowcomms.c24
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/ecryptfs/main.c19
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/efs/super.c3
-rw-r--r--fs/eventpoll.c34
-rw-r--r--fs/exec.c46
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/namei.c13
-rw-r--r--fs/exofs/super.c4
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/namei.c13
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext3/super.c3
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/freevxfs/vxfs_super.c3
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fs_struct.c29
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/inode.c9
-rw-r--r--fs/gfs2/aops.c12
-rw-r--r--fs/gfs2/bmap.c4
-rw-r--r--fs/gfs2/file.c15
-rw-r--r--fs/gfs2/glock.c224
-rw-r--r--fs/gfs2/incore.h50
-rw-r--r--fs/gfs2/inode.c9
-rw-r--r--fs/gfs2/lock_dlm.c123
-rw-r--r--fs/gfs2/log.c244
-rw-r--r--fs/gfs2/log.h5
-rw-r--r--fs/gfs2/lops.c103
-rw-r--r--fs/gfs2/main.c18
-rw-r--r--fs/gfs2/ops_fstype.c17
-rw-r--r--fs/gfs2/quota.c6
-rw-r--r--fs/gfs2/rgrp.c202
-rw-r--r--fs/gfs2/rgrp.h10
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trace_gfs2.h60
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/gfs2/util.h3
-rw-r--r--fs/gfs2/xattr.c4
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h5
-rw-r--r--fs/hfsplus/hfsplus_raw.h2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c34
-rw-r--r--fs/hfsplus/super.c17
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/hpfs/super.c6
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c151
-rw-r--r--fs/inode.c40
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd/journal.c14
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jbd2/journal.c14
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/compr.c2
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c8
-rw-r--r--fs/logfs/dir.c21
-rw-r--r--fs/logfs/readwrite.c38
-rw-r--r--fs/logfs/segment.c4
-rw-r--r--fs/logfs/super.c12
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/minix/inode.c38
-rw-r--r--fs/minix/minix.h1
-rw-r--r--fs/minix/namei.c14
-rw-r--r--fs/namei.c221
-rw-r--r--fs/ncpfs/inode.c6
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/dir.c8
-rw-r--r--fs/nfs/getroot.c6
-rw-r--r--fs/nfs/idmap.c1
-rw-r--r--fs/nfs/nfs4proc.c134
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c5
-rw-r--r--fs/nfsd/fault_inject.c2
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/nilfs2/cpfile.c94
-rw-r--r--fs/nilfs2/dat.c38
-rw-r--r--fs/nilfs2/dir.c4
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c11
-rw-r--r--fs/nilfs2/page.c8
-rw-r--r--fs/nilfs2/recovery.c4
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/sufile.c68
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/nilfs2/the_nilfs.c7
-rw-r--r--fs/ntfs/aops.c20
-rw-r--r--fs/ntfs/attrib.c26
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/layout.h4
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ntfs/super.c21
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/super.c51
-rw-r--r--fs/omfs/inode.c6
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/pipe.c8
-rw-r--r--fs/proc/base.c15
-rw-r--r--fs/proc/inode.c16
-rw-r--r--fs/proc/internal.h9
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/task_mmu.c357
-rw-r--r--fs/proc/task_nommu.c69
-rw-r--r--fs/proc/vmcore.c23
-rw-r--r--fs/pstore/inode.c25
-rw-r--r--fs/qnx4/inode.c88
-rw-r--r--fs/qnx4/namei.c9
-rw-r--r--fs/qnx4/qnx4.h2
-rw-r--r--fs/qnx6/Kconfig26
-rw-r--r--fs/qnx6/Makefile7
-rw-r--r--fs/qnx6/README8
-rw-r--r--fs/qnx6/dir.c291
-rw-r--r--fs/qnx6/inode.c698
-rw-r--r--fs/qnx6/namei.c42
-rw-r--r--fs/qnx6/qnx6.h135
-rw-r--r--fs/qnx6/super_mmi.c150
-rw-r--r--fs/quota/dquot.c1
-rw-r--r--fs/quota/quota.c24
-rw-r--r--fs/ramfs/inode.c30
-rw-r--r--fs/reiserfs/acl.h76
-rw-r--r--fs/reiserfs/bitmap.c4
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c6
-rw-r--r--fs/reiserfs/fix_node.c2
-rw-r--r--fs/reiserfs/hashes.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c6
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c2
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/lbalance.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/objectid.c3
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/reiserfs.h2922
-rw-r--r--fs/reiserfs/resize.c3
-rw-r--r--fs/reiserfs/stree.c6
-rw-r--r--fs/reiserfs/super.c12
-rw-r--r--fs/reiserfs/tail_conversion.c6
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/reiserfs/xattr.h122
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/reiserfs/xattr_security.c4
-rw-r--r--fs/reiserfs/xattr_trusted.c4
-rw-r--r--fs/reiserfs/xattr_user.c4
-rw-r--r--fs/romfs/super.c6
-rw-r--r--fs/select.c2
-rw-r--r--fs/seq_file.c28
-rw-r--r--fs/signalfd.c15
-rw-r--r--fs/splice.c7
-rw-r--r--fs/squashfs/file.c8
-rw-r--r--fs/squashfs/super.c3
-rw-r--r--fs/squashfs/symlink.c4
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c23
-rw-r--r--fs/sysfs/dir.c224
-rw-r--r--fs/sysfs/inode.c11
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/sysfs/sysfs.h17
-rw-r--r--fs/sysv/namei.c12
-rw-r--r--fs/sysv/super.c27
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/file.c4
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/udf/file.c6
-rw-r--r--fs/udf/namei.c13
-rw-r--r--fs/udf/super.c6
-rw-r--r--fs/ufs/namei.c14
-rw-r--r--fs/ufs/super.c7
-rw-r--r--fs/xfs/xfs_dquot.c24
-rw-r--r--fs/xfs/xfs_log_recover.c6
-rw-r--r--fs/xfs/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/xfs_rename.c11
-rw-r--r--fs/xfs/xfs_super.c7
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c16
275 files changed, 7918 insertions, 2211 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 1964f98e74be..b85efa773949 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -594,21 +594,21 @@ static int __init init_v9fs(void)
594 int err; 594 int err;
595 pr_info("Installing v9fs 9p2000 file system support\n"); 595 pr_info("Installing v9fs 9p2000 file system support\n");
596 /* TODO: Setup list of registered trasnport modules */ 596 /* TODO: Setup list of registered trasnport modules */
597 err = register_filesystem(&v9fs_fs_type);
598 if (err < 0) {
599 pr_err("Failed to register filesystem\n");
600 return err;
601 }
602 597
603 err = v9fs_cache_register(); 598 err = v9fs_cache_register();
604 if (err < 0) { 599 if (err < 0) {
605 pr_err("Failed to register v9fs for caching\n"); 600 pr_err("Failed to register v9fs for caching\n");
606 goto out_fs_unreg; 601 return err;
607 } 602 }
608 603
609 err = v9fs_sysfs_init(); 604 err = v9fs_sysfs_init();
610 if (err < 0) { 605 if (err < 0) {
611 pr_err("Failed to register with sysfs\n"); 606 pr_err("Failed to register with sysfs\n");
607 goto out_cache;
608 }
609 err = register_filesystem(&v9fs_fs_type);
610 if (err < 0) {
611 pr_err("Failed to register filesystem\n");
612 goto out_sysfs_cleanup; 612 goto out_sysfs_cleanup;
613 } 613 }
614 614
@@ -617,8 +617,8 @@ static int __init init_v9fs(void)
617out_sysfs_cleanup: 617out_sysfs_cleanup:
618 v9fs_sysfs_cleanup(); 618 v9fs_sysfs_cleanup();
619 619
620out_fs_unreg: 620out_cache:
621 unregister_filesystem(&v9fs_fs_type); 621 v9fs_cache_unregister();
622 622
623 return err; 623 return err;
624} 624}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7b0cd87b07c2..10b7d3c9dba8 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -155,9 +155,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
155 goto release_sb; 155 goto release_sb;
156 } 156 }
157 157
158 root = d_alloc_root(inode); 158 root = d_make_root(inode);
159 if (!root) { 159 if (!root) {
160 iput(inode);
161 retval = -ENOMEM; 160 retval = -ENOMEM;
162 goto release_sb; 161 goto release_sb;
163 } 162 }
diff --git a/fs/Kconfig b/fs/Kconfig
index d621f02a3f9e..f95ae3a027f3 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
4 4
5menu "File systems" 5menu "File systems"
6 6
7# Use unaligned word dcache accesses
8config DCACHE_WORD_ACCESS
9 bool
10
7if BLOCK 11if BLOCK
8 12
9source "fs/ext2/Kconfig" 13source "fs/ext2/Kconfig"
@@ -210,6 +214,7 @@ source "fs/minix/Kconfig"
210source "fs/omfs/Kconfig" 214source "fs/omfs/Kconfig"
211source "fs/hpfs/Kconfig" 215source "fs/hpfs/Kconfig"
212source "fs/qnx4/Kconfig" 216source "fs/qnx4/Kconfig"
217source "fs/qnx6/Kconfig"
213source "fs/romfs/Kconfig" 218source "fs/romfs/Kconfig"
214source "fs/pstore/Kconfig" 219source "fs/pstore/Kconfig"
215source "fs/sysv/Kconfig" 220source "fs/sysv/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 93804d4d66e1..2fb977934673 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
102obj-$(CONFIG_AFFS_FS) += affs/ 102obj-$(CONFIG_AFFS_FS) += affs/
103obj-$(CONFIG_ROMFS_FS) += romfs/ 103obj-$(CONFIG_ROMFS_FS) += romfs/
104obj-$(CONFIG_QNX4FS_FS) += qnx4/ 104obj-$(CONFIG_QNX4FS_FS) += qnx4/
105obj-$(CONFIG_QNX6FS_FS) += qnx6/
105obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 106obj-$(CONFIG_AUTOFS4_FS) += autofs4/
106obj-$(CONFIG_ADFS_FS) += adfs/ 107obj-$(CONFIG_ADFS_FS) += adfs/
107obj-$(CONFIG_FUSE_FS) += fuse/ 108obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 8e3b36ace305..06fdcc9382c4 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -483,10 +483,9 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
483 483
484 sb->s_d_op = &adfs_dentry_operations; 484 sb->s_d_op = &adfs_dentry_operations;
485 root = adfs_iget(sb, &root_obj); 485 root = adfs_iget(sb, &root_obj);
486 sb->s_root = d_alloc_root(root); 486 sb->s_root = d_make_root(root);
487 if (!sb->s_root) { 487 if (!sb->s_root) {
488 int i; 488 int i;
489 iput(root);
490 for (i = 0; i < asb->s_map_size; i++) 489 for (i = 0; i < asb->s_map_size; i++)
491 brelse(asb->s_map[i].dm_bh); 490 brelse(asb->s_map[i].dm_bh);
492 kfree(asb->s_map); 491 kfree(asb->s_map);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 8ba73fed7964..0782653a05a2 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -473,7 +473,7 @@ got_root:
473 root_inode = affs_iget(sb, root_block); 473 root_inode = affs_iget(sb, root_block);
474 if (IS_ERR(root_inode)) { 474 if (IS_ERR(root_inode)) {
475 ret = PTR_ERR(root_inode); 475 ret = PTR_ERR(root_inode);
476 goto out_error_noinode; 476 goto out_error;
477 } 477 }
478 478
479 if (AFFS_SB(sb)->s_flags & SF_INTL) 479 if (AFFS_SB(sb)->s_flags & SF_INTL)
@@ -481,7 +481,7 @@ got_root:
481 else 481 else
482 sb->s_d_op = &affs_dentry_operations; 482 sb->s_d_op = &affs_dentry_operations;
483 483
484 sb->s_root = d_alloc_root(root_inode); 484 sb->s_root = d_make_root(root_inode);
485 if (!sb->s_root) { 485 if (!sb->s_root) {
486 printk(KERN_ERR "AFFS: Get root inode failed\n"); 486 printk(KERN_ERR "AFFS: Get root inode failed\n");
487 goto out_error; 487 goto out_error;
@@ -494,9 +494,6 @@ got_root:
494 * Begin the cascaded cleanup ... 494 * Begin the cascaded cleanup ...
495 */ 495 */
496out_error: 496out_error:
497 if (root_inode)
498 iput(root_inode);
499out_error_noinode:
500 kfree(sbi->s_bitmap); 497 kfree(sbi->s_bitmap);
501 affs_brelse(root_bh); 498 affs_brelse(root_bh);
502 kfree(sbi->s_prefix); 499 kfree(sbi->s_prefix);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 14d89fa58fee..8f6e9234d565 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
251 ASSERT(key != NULL); 251 ASSERT(key != NULL);
252 252
253 vnode = AFS_FS_I(mapping->host); 253 vnode = AFS_FS_I(mapping->host);
254 if (vnode->flags & AFS_VNODE_DELETED) { 254 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
255 _leave(" = -ESTALE"); 255 _leave(" = -ESTALE");
256 return -ESTALE; 256 return -ESTALE;
257 } 257 }
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f213d109c21..b960ff05ea0b 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -365,10 +365,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
365 _debug("extract data"); 365 _debug("extract data");
366 if (call->count > 0) { 366 if (call->count > 0) {
367 page = call->reply3; 367 page = call->reply3;
368 buffer = kmap_atomic(page, KM_USER0); 368 buffer = kmap_atomic(page);
369 ret = afs_extract_data(call, skb, last, buffer, 369 ret = afs_extract_data(call, skb, last, buffer,
370 call->count); 370 call->count);
371 kunmap_atomic(buffer, KM_USER0); 371 kunmap_atomic(buffer);
372 switch (ret) { 372 switch (ret) {
373 case 0: break; 373 case 0: break;
374 case -EAGAIN: return 0; 374 case -EAGAIN: return 0;
@@ -411,9 +411,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
411 if (call->count < PAGE_SIZE) { 411 if (call->count < PAGE_SIZE) {
412 _debug("clear"); 412 _debug("clear");
413 page = call->reply3; 413 page = call->reply3;
414 buffer = kmap_atomic(page, KM_USER0); 414 buffer = kmap_atomic(page);
415 memset(buffer + call->count, 0, PAGE_SIZE - call->count); 415 memset(buffer + call->count, 0, PAGE_SIZE - call->count);
416 kunmap_atomic(buffer, KM_USER0); 416 kunmap_atomic(buffer);
417 } 417 }
418 418
419 _leave(" = 0 [done]"); 419 _leave(" = 0 [done]");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d2b0888126d4..a306bb6d88d9 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,7 +109,7 @@ struct afs_call {
109 unsigned reply_size; /* current size of reply */ 109 unsigned reply_size; /* current size of reply */
110 unsigned first_offset; /* offset into mapping[first] */ 110 unsigned first_offset; /* offset into mapping[first] */
111 unsigned last_to; /* amount of mapping[last] */ 111 unsigned last_to; /* amount of mapping[last] */
112 unsigned short offset; /* offset into received data store */ 112 unsigned offset; /* offset into received data store */
113 unsigned char unmarshall; /* unmarshalling phase */ 113 unsigned char unmarshall; /* unmarshalling phase */
114 bool incoming; /* T if incoming call */ 114 bool incoming; /* T if incoming call */
115 bool send_pages; /* T if data from mapping should be sent */ 115 bool send_pages; /* T if data from mapping should be sent */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 8f4ce2658b7d..298cf8919ec7 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -200,9 +200,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
200 if (PageError(page)) 200 if (PageError(page))
201 goto error; 201 goto error;
202 202
203 buf = kmap_atomic(page, KM_USER0); 203 buf = kmap_atomic(page);
204 memcpy(devname, buf, size); 204 memcpy(devname, buf, size);
205 kunmap_atomic(buf, KM_USER0); 205 kunmap_atomic(buf);
206 page_cache_release(page); 206 page_cache_release(page);
207 page = NULL; 207 page = NULL;
208 } 208 }
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e45a323aebb4..8ad8c2a0703a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
314 struct msghdr msg; 314 struct msghdr msg;
315 struct kvec iov[1]; 315 struct kvec iov[1];
316 int ret; 316 int ret;
317 struct sk_buff *skb;
317 318
318 _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); 319 _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
319 320
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
380 381
381error_do_abort: 382error_do_abort:
382 rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); 383 rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
384 while ((skb = skb_dequeue(&call->rx_queue)))
385 afs_free_skb(skb);
383 rxrpc_kernel_end_call(rxcall); 386 rxrpc_kernel_end_call(rxcall);
384 call->rxcall = NULL; 387 call->rxcall = NULL;
385error_kill_call: 388error_kill_call:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 983ec59fc80d..f02b31e7e648 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -301,7 +301,6 @@ static int afs_fill_super(struct super_block *sb,
301{ 301{
302 struct afs_super_info *as = sb->s_fs_info; 302 struct afs_super_info *as = sb->s_fs_info;
303 struct afs_fid fid; 303 struct afs_fid fid;
304 struct dentry *root = NULL;
305 struct inode *inode = NULL; 304 struct inode *inode = NULL;
306 int ret; 305 int ret;
307 306
@@ -327,18 +326,16 @@ static int afs_fill_super(struct super_block *sb,
327 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); 326 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
328 327
329 ret = -ENOMEM; 328 ret = -ENOMEM;
330 root = d_alloc_root(inode); 329 sb->s_root = d_make_root(inode);
331 if (!root) 330 if (!sb->s_root)
332 goto error; 331 goto error;
333 332
334 sb->s_d_op = &afs_fs_dentry_operations; 333 sb->s_d_op = &afs_fs_dentry_operations;
335 sb->s_root = root;
336 334
337 _leave(" = 0"); 335 _leave(" = 0");
338 return 0; 336 return 0;
339 337
340error: 338error:
341 iput(inode);
342 _leave(" = %d", ret); 339 _leave(" = %d", ret);
343 return ret; 340 return ret;
344} 341}
diff --git a/fs/aio.c b/fs/aio.c
index 969beb0e2231..c7acaf3167aa 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -160,7 +160,7 @@ static int aio_setup_ring(struct kioctx *ctx)
160 160
161 info->nr = nr_events; /* trusted copy */ 161 info->nr = nr_events; /* trusted copy */
162 162
163 ring = kmap_atomic(info->ring_pages[0], KM_USER0); 163 ring = kmap_atomic(info->ring_pages[0]);
164 ring->nr = nr_events; /* user copy */ 164 ring->nr = nr_events; /* user copy */
165 ring->id = ctx->user_id; 165 ring->id = ctx->user_id;
166 ring->head = ring->tail = 0; 166 ring->head = ring->tail = 0;
@@ -168,47 +168,38 @@ static int aio_setup_ring(struct kioctx *ctx)
168 ring->compat_features = AIO_RING_COMPAT_FEATURES; 168 ring->compat_features = AIO_RING_COMPAT_FEATURES;
169 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; 169 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
170 ring->header_length = sizeof(struct aio_ring); 170 ring->header_length = sizeof(struct aio_ring);
171 kunmap_atomic(ring, KM_USER0); 171 kunmap_atomic(ring);
172 172
173 return 0; 173 return 0;
174} 174}
175 175
176 176
177/* aio_ring_event: returns a pointer to the event at the given index from 177/* aio_ring_event: returns a pointer to the event at the given index from
178 * kmap_atomic(, km). Release the pointer with put_aio_ring_event(); 178 * kmap_atomic(). Release the pointer with put_aio_ring_event();
179 */ 179 */
180#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) 180#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
181#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 181#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
182#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 182#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
183 183
184#define aio_ring_event(info, nr, km) ({ \ 184#define aio_ring_event(info, nr) ({ \
185 unsigned pos = (nr) + AIO_EVENTS_OFFSET; \ 185 unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
186 struct io_event *__event; \ 186 struct io_event *__event; \
187 __event = kmap_atomic( \ 187 __event = kmap_atomic( \
188 (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \ 188 (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
189 __event += pos % AIO_EVENTS_PER_PAGE; \ 189 __event += pos % AIO_EVENTS_PER_PAGE; \
190 __event; \ 190 __event; \
191}) 191})
192 192
193#define put_aio_ring_event(event, km) do { \ 193#define put_aio_ring_event(event) do { \
194 struct io_event *__event = (event); \ 194 struct io_event *__event = (event); \
195 (void)__event; \ 195 (void)__event; \
196 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 196 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
197} while(0) 197} while(0)
198 198
199static void ctx_rcu_free(struct rcu_head *head) 199static void ctx_rcu_free(struct rcu_head *head)
200{ 200{
201 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); 201 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
202 unsigned nr_events = ctx->max_reqs;
203
204 kmem_cache_free(kioctx_cachep, ctx); 202 kmem_cache_free(kioctx_cachep, ctx);
205
206 if (nr_events) {
207 spin_lock(&aio_nr_lock);
208 BUG_ON(aio_nr - nr_events > aio_nr);
209 aio_nr -= nr_events;
210 spin_unlock(&aio_nr_lock);
211 }
212} 203}
213 204
214/* __put_ioctx 205/* __put_ioctx
@@ -217,23 +208,23 @@ static void ctx_rcu_free(struct rcu_head *head)
217 */ 208 */
218static void __put_ioctx(struct kioctx *ctx) 209static void __put_ioctx(struct kioctx *ctx)
219{ 210{
211 unsigned nr_events = ctx->max_reqs;
220 BUG_ON(ctx->reqs_active); 212 BUG_ON(ctx->reqs_active);
221 213
222 cancel_delayed_work(&ctx->wq); 214 cancel_delayed_work_sync(&ctx->wq);
223 cancel_work_sync(&ctx->wq.work);
224 aio_free_ring(ctx); 215 aio_free_ring(ctx);
225 mmdrop(ctx->mm); 216 mmdrop(ctx->mm);
226 ctx->mm = NULL; 217 ctx->mm = NULL;
218 if (nr_events) {
219 spin_lock(&aio_nr_lock);
220 BUG_ON(aio_nr - nr_events > aio_nr);
221 aio_nr -= nr_events;
222 spin_unlock(&aio_nr_lock);
223 }
227 pr_debug("__put_ioctx: freeing %p\n", ctx); 224 pr_debug("__put_ioctx: freeing %p\n", ctx);
228 call_rcu(&ctx->rcu_head, ctx_rcu_free); 225 call_rcu(&ctx->rcu_head, ctx_rcu_free);
229} 226}
230 227
231static inline void get_ioctx(struct kioctx *kioctx)
232{
233 BUG_ON(atomic_read(&kioctx->users) <= 0);
234 atomic_inc(&kioctx->users);
235}
236
237static inline int try_get_ioctx(struct kioctx *kioctx) 228static inline int try_get_ioctx(struct kioctx *kioctx)
238{ 229{
239 return atomic_inc_not_zero(&kioctx->users); 230 return atomic_inc_not_zero(&kioctx->users);
@@ -253,7 +244,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
253{ 244{
254 struct mm_struct *mm; 245 struct mm_struct *mm;
255 struct kioctx *ctx; 246 struct kioctx *ctx;
256 int did_sync = 0; 247 int err = -ENOMEM;
257 248
258 /* Prevent overflows */ 249 /* Prevent overflows */
259 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 250 if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -262,7 +253,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
262 return ERR_PTR(-EINVAL); 253 return ERR_PTR(-EINVAL);
263 } 254 }
264 255
265 if ((unsigned long)nr_events > aio_max_nr) 256 if (!nr_events || (unsigned long)nr_events > aio_max_nr)
266 return ERR_PTR(-EAGAIN); 257 return ERR_PTR(-EAGAIN);
267 258
268 ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); 259 ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -273,7 +264,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
273 mm = ctx->mm = current->mm; 264 mm = ctx->mm = current->mm;
274 atomic_inc(&mm->mm_count); 265 atomic_inc(&mm->mm_count);
275 266
276 atomic_set(&ctx->users, 1); 267 atomic_set(&ctx->users, 2);
277 spin_lock_init(&ctx->ctx_lock); 268 spin_lock_init(&ctx->ctx_lock);
278 spin_lock_init(&ctx->ring_info.ring_lock); 269 spin_lock_init(&ctx->ring_info.ring_lock);
279 init_waitqueue_head(&ctx->wait); 270 init_waitqueue_head(&ctx->wait);
@@ -286,25 +277,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
286 goto out_freectx; 277 goto out_freectx;
287 278
288 /* limit the number of system wide aios */ 279 /* limit the number of system wide aios */
289 do { 280 spin_lock(&aio_nr_lock);
290 spin_lock_bh(&aio_nr_lock); 281 if (aio_nr + nr_events > aio_max_nr ||
291 if (aio_nr + nr_events > aio_max_nr || 282 aio_nr + nr_events < aio_nr) {
292 aio_nr + nr_events < aio_nr) 283 spin_unlock(&aio_nr_lock);
293 ctx->max_reqs = 0;
294 else
295 aio_nr += ctx->max_reqs;
296 spin_unlock_bh(&aio_nr_lock);
297 if (ctx->max_reqs || did_sync)
298 break;
299
300 /* wait for rcu callbacks to have completed before giving up */
301 synchronize_rcu();
302 did_sync = 1;
303 ctx->max_reqs = nr_events;
304 } while (1);
305
306 if (ctx->max_reqs == 0)
307 goto out_cleanup; 284 goto out_cleanup;
285 }
286 aio_nr += ctx->max_reqs;
287 spin_unlock(&aio_nr_lock);
308 288
309 /* now link into global list. */ 289 /* now link into global list. */
310 spin_lock(&mm->ioctx_lock); 290 spin_lock(&mm->ioctx_lock);
@@ -316,16 +296,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
316 return ctx; 296 return ctx;
317 297
318out_cleanup: 298out_cleanup:
319 __put_ioctx(ctx); 299 err = -EAGAIN;
320 return ERR_PTR(-EAGAIN); 300 aio_free_ring(ctx);
321
322out_freectx: 301out_freectx:
323 mmdrop(mm); 302 mmdrop(mm);
324 kmem_cache_free(kioctx_cachep, ctx); 303 kmem_cache_free(kioctx_cachep, ctx);
325 ctx = ERR_PTR(-ENOMEM); 304 dprintk("aio: error allocating ioctx %d\n", err);
326 305 return ERR_PTR(err);
327 dprintk("aio: error allocating ioctx %p\n", ctx);
328 return ctx;
329} 306}
330 307
331/* aio_cancel_all 308/* aio_cancel_all
@@ -413,10 +390,6 @@ void exit_aio(struct mm_struct *mm)
413 aio_cancel_all(ctx); 390 aio_cancel_all(ctx);
414 391
415 wait_for_all_aios(ctx); 392 wait_for_all_aios(ctx);
416 /*
417 * Ensure we don't leave the ctx on the aio_wq
418 */
419 cancel_work_sync(&ctx->wq.work);
420 393
421 if (1 != atomic_read(&ctx->users)) 394 if (1 != atomic_read(&ctx->users))
422 printk(KERN_DEBUG 395 printk(KERN_DEBUG
@@ -490,6 +463,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
490 kmem_cache_free(kiocb_cachep, req); 463 kmem_cache_free(kiocb_cachep, req);
491 ctx->reqs_active--; 464 ctx->reqs_active--;
492 } 465 }
466 if (unlikely(!ctx->reqs_active && ctx->dead))
467 wake_up_all(&ctx->wait);
493 spin_unlock_irq(&ctx->ctx_lock); 468 spin_unlock_irq(&ctx->ctx_lock);
494} 469}
495 470
@@ -607,11 +582,16 @@ static void aio_fput_routine(struct work_struct *data)
607 fput(req->ki_filp); 582 fput(req->ki_filp);
608 583
609 /* Link the iocb into the context's free list */ 584 /* Link the iocb into the context's free list */
585 rcu_read_lock();
610 spin_lock_irq(&ctx->ctx_lock); 586 spin_lock_irq(&ctx->ctx_lock);
611 really_put_req(ctx, req); 587 really_put_req(ctx, req);
588 /*
589 * at that point ctx might've been killed, but actual
590 * freeing is RCU'd
591 */
612 spin_unlock_irq(&ctx->ctx_lock); 592 spin_unlock_irq(&ctx->ctx_lock);
593 rcu_read_unlock();
613 594
614 put_ioctx(ctx);
615 spin_lock_irq(&fput_lock); 595 spin_lock_irq(&fput_lock);
616 } 596 }
617 spin_unlock_irq(&fput_lock); 597 spin_unlock_irq(&fput_lock);
@@ -642,7 +622,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
642 * this function will be executed w/out any aio kthread wakeup. 622 * this function will be executed w/out any aio kthread wakeup.
643 */ 623 */
644 if (unlikely(!fput_atomic(req->ki_filp))) { 624 if (unlikely(!fput_atomic(req->ki_filp))) {
645 get_ioctx(ctx);
646 spin_lock(&fput_lock); 625 spin_lock(&fput_lock);
647 list_add(&req->ki_list, &fput_head); 626 list_add(&req->ki_list, &fput_head);
648 spin_unlock(&fput_lock); 627 spin_unlock(&fput_lock);
@@ -920,7 +899,7 @@ static void aio_kick_handler(struct work_struct *work)
920 unuse_mm(mm); 899 unuse_mm(mm);
921 set_fs(oldfs); 900 set_fs(oldfs);
922 /* 901 /*
923 * we're in a worker thread already, don't use queue_delayed_work, 902 * we're in a worker thread already; no point using non-zero delay
924 */ 903 */
925 if (requeue) 904 if (requeue)
926 queue_delayed_work(aio_wq, &ctx->wq, 0); 905 queue_delayed_work(aio_wq, &ctx->wq, 0);
@@ -1019,10 +998,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
1019 if (kiocbIsCancelled(iocb)) 998 if (kiocbIsCancelled(iocb))
1020 goto put_rq; 999 goto put_rq;
1021 1000
1022 ring = kmap_atomic(info->ring_pages[0], KM_IRQ1); 1001 ring = kmap_atomic(info->ring_pages[0]);
1023 1002
1024 tail = info->tail; 1003 tail = info->tail;
1025 event = aio_ring_event(info, tail, KM_IRQ0); 1004 event = aio_ring_event(info, tail);
1026 if (++tail >= info->nr) 1005 if (++tail >= info->nr)
1027 tail = 0; 1006 tail = 0;
1028 1007
@@ -1043,8 +1022,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
1043 info->tail = tail; 1022 info->tail = tail;
1044 ring->tail = tail; 1023 ring->tail = tail;
1045 1024
1046 put_aio_ring_event(event, KM_IRQ0); 1025 put_aio_ring_event(event);
1047 kunmap_atomic(ring, KM_IRQ1); 1026 kunmap_atomic(ring);
1048 1027
1049 pr_debug("added to ring %p at [%lu]\n", iocb, tail); 1028 pr_debug("added to ring %p at [%lu]\n", iocb, tail);
1050 1029
@@ -1089,7 +1068,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
1089 unsigned long head; 1068 unsigned long head;
1090 int ret = 0; 1069 int ret = 0;
1091 1070
1092 ring = kmap_atomic(info->ring_pages[0], KM_USER0); 1071 ring = kmap_atomic(info->ring_pages[0]);
1093 dprintk("in aio_read_evt h%lu t%lu m%lu\n", 1072 dprintk("in aio_read_evt h%lu t%lu m%lu\n",
1094 (unsigned long)ring->head, (unsigned long)ring->tail, 1073 (unsigned long)ring->head, (unsigned long)ring->tail,
1095 (unsigned long)ring->nr); 1074 (unsigned long)ring->nr);
@@ -1101,18 +1080,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
1101 1080
1102 head = ring->head % info->nr; 1081 head = ring->head % info->nr;
1103 if (head != ring->tail) { 1082 if (head != ring->tail) {
1104 struct io_event *evp = aio_ring_event(info, head, KM_USER1); 1083 struct io_event *evp = aio_ring_event(info, head);
1105 *ent = *evp; 1084 *ent = *evp;
1106 head = (head + 1) % info->nr; 1085 head = (head + 1) % info->nr;
1107 smp_mb(); /* finish reading the event before updatng the head */ 1086 smp_mb(); /* finish reading the event before updatng the head */
1108 ring->head = head; 1087 ring->head = head;
1109 ret = 1; 1088 ret = 1;
1110 put_aio_ring_event(evp, KM_USER1); 1089 put_aio_ring_event(evp);
1111 } 1090 }
1112 spin_unlock(&info->ring_lock); 1091 spin_unlock(&info->ring_lock);
1113 1092
1114out: 1093out:
1115 kunmap_atomic(ring, KM_USER0); 1094 kunmap_atomic(ring);
1116 dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, 1095 dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
1117 (unsigned long)ring->head, (unsigned long)ring->tail); 1096 (unsigned long)ring->head, (unsigned long)ring->tail);
1118 return ret; 1097 return ret;
@@ -1336,10 +1315,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1336 ret = PTR_ERR(ioctx); 1315 ret = PTR_ERR(ioctx);
1337 if (!IS_ERR(ioctx)) { 1316 if (!IS_ERR(ioctx)) {
1338 ret = put_user(ioctx->user_id, ctxp); 1317 ret = put_user(ioctx->user_id, ctxp);
1339 if (!ret) 1318 if (!ret) {
1319 put_ioctx(ioctx);
1340 return 0; 1320 return 0;
1341 1321 }
1342 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
1343 io_destroy(ioctx); 1322 io_destroy(ioctx);
1344 } 1323 }
1345 1324
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f11e43ed907d..28d39fb84ae3 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -39,19 +39,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
39 .d_dname = anon_inodefs_dname, 39 .d_dname = anon_inodefs_dname,
40}; 40};
41 41
42static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
43 int flags, const char *dev_name, void *data)
44{
45 return mount_pseudo(fs_type, "anon_inode:", NULL,
46 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
47}
48
49static struct file_system_type anon_inode_fs_type = {
50 .name = "anon_inodefs",
51 .mount = anon_inodefs_mount,
52 .kill_sb = kill_anon_super,
53};
54
55/* 42/*
56 * nop .set_page_dirty method so that people can use .page_mkwrite on 43 * nop .set_page_dirty method so that people can use .page_mkwrite on
57 * anon inodes. 44 * anon inodes.
@@ -65,6 +52,62 @@ static const struct address_space_operations anon_aops = {
65 .set_page_dirty = anon_set_page_dirty, 52 .set_page_dirty = anon_set_page_dirty,
66}; 53};
67 54
55/*
56 * A single inode exists for all anon_inode files. Contrary to pipes,
57 * anon_inode inodes have no associated per-instance data, so we need
58 * only allocate one of them.
59 */
60static struct inode *anon_inode_mkinode(struct super_block *s)
61{
62 struct inode *inode = new_inode_pseudo(s);
63
64 if (!inode)
65 return ERR_PTR(-ENOMEM);
66
67 inode->i_ino = get_next_ino();
68 inode->i_fop = &anon_inode_fops;
69
70 inode->i_mapping->a_ops = &anon_aops;
71
72 /*
73 * Mark the inode dirty from the very beginning,
74 * that way it will never be moved to the dirty
75 * list because mark_inode_dirty() will think
76 * that it already _is_ on the dirty list.
77 */
78 inode->i_state = I_DIRTY;
79 inode->i_mode = S_IRUSR | S_IWUSR;
80 inode->i_uid = current_fsuid();
81 inode->i_gid = current_fsgid();
82 inode->i_flags |= S_PRIVATE;
83 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
84 return inode;
85}
86
87static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
88 int flags, const char *dev_name, void *data)
89{
90 struct dentry *root;
91 root = mount_pseudo(fs_type, "anon_inode:", NULL,
92 &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
93 if (!IS_ERR(root)) {
94 struct super_block *s = root->d_sb;
95 anon_inode_inode = anon_inode_mkinode(s);
96 if (IS_ERR(anon_inode_inode)) {
97 dput(root);
98 deactivate_locked_super(s);
99 root = ERR_CAST(anon_inode_inode);
100 }
101 }
102 return root;
103}
104
105static struct file_system_type anon_inode_fs_type = {
106 .name = "anon_inodefs",
107 .mount = anon_inodefs_mount,
108 .kill_sb = kill_anon_super,
109};
110
68/** 111/**
69 * anon_inode_getfile - creates a new file instance by hooking it up to an 112 * anon_inode_getfile - creates a new file instance by hooking it up to an
70 * anonymous inode, and a dentry that describe the "class" 113 * anonymous inode, and a dentry that describe the "class"
@@ -180,38 +223,6 @@ err_put_unused_fd:
180} 223}
181EXPORT_SYMBOL_GPL(anon_inode_getfd); 224EXPORT_SYMBOL_GPL(anon_inode_getfd);
182 225
183/*
184 * A single inode exists for all anon_inode files. Contrary to pipes,
185 * anon_inode inodes have no associated per-instance data, so we need
186 * only allocate one of them.
187 */
188static struct inode *anon_inode_mkinode(void)
189{
190 struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
191
192 if (!inode)
193 return ERR_PTR(-ENOMEM);
194
195 inode->i_ino = get_next_ino();
196 inode->i_fop = &anon_inode_fops;
197
198 inode->i_mapping->a_ops = &anon_aops;
199
200 /*
201 * Mark the inode dirty from the very beginning,
202 * that way it will never be moved to the dirty
203 * list because mark_inode_dirty() will think
204 * that it already _is_ on the dirty list.
205 */
206 inode->i_state = I_DIRTY;
207 inode->i_mode = S_IRUSR | S_IWUSR;
208 inode->i_uid = current_fsuid();
209 inode->i_gid = current_fsgid();
210 inode->i_flags |= S_PRIVATE;
211 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
212 return inode;
213}
214
215static int __init anon_inode_init(void) 226static int __init anon_inode_init(void)
216{ 227{
217 int error; 228 int error;
@@ -224,16 +235,8 @@ static int __init anon_inode_init(void)
224 error = PTR_ERR(anon_inode_mnt); 235 error = PTR_ERR(anon_inode_mnt);
225 goto err_unregister_filesystem; 236 goto err_unregister_filesystem;
226 } 237 }
227 anon_inode_inode = anon_inode_mkinode();
228 if (IS_ERR(anon_inode_inode)) {
229 error = PTR_ERR(anon_inode_inode);
230 goto err_mntput;
231 }
232
233 return 0; 238 return 0;
234 239
235err_mntput:
236 kern_unmount(anon_inode_mnt);
237err_unregister_filesystem: 240err_unregister_filesystem:
238 unregister_filesystem(&anon_inode_fs_type); 241 unregister_filesystem(&anon_inode_fs_type);
239err_exit: 242err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d8d8e7ba6a1e..eb1cc92cd67d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -110,6 +110,7 @@ struct autofs_sb_info {
110 int sub_version; 110 int sub_version;
111 int min_proto; 111 int min_proto;
112 int max_proto; 112 int max_proto;
113 int compat_daemon;
113 unsigned long exp_timeout; 114 unsigned long exp_timeout;
114 unsigned int type; 115 unsigned int type;
115 int reghost_enabled; 116 int reghost_enabled;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 76741d8d7786..85f1fcdb30e7 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -385,6 +385,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
385 sbi->pipefd = pipefd; 385 sbi->pipefd = pipefd;
386 sbi->pipe = pipe; 386 sbi->pipe = pipe;
387 sbi->catatonic = 0; 387 sbi->catatonic = 0;
388 sbi->compat_daemon = is_compat_task();
388 } 389 }
389out: 390out:
390 mutex_unlock(&sbi->wq_mutex); 391 mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 450f529a4eae..1feb68ecef95 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -124,6 +124,7 @@ start:
124 /* Negative dentry - try next */ 124 /* Negative dentry - try next */
125 if (!simple_positive(q)) { 125 if (!simple_positive(q)) {
126 spin_unlock(&p->d_lock); 126 spin_unlock(&p->d_lock);
127 lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_);
127 p = q; 128 p = q;
128 goto again; 129 goto again;
129 } 130 }
@@ -186,6 +187,7 @@ again:
186 /* Negative dentry - try next */ 187 /* Negative dentry - try next */
187 if (!simple_positive(ret)) { 188 if (!simple_positive(ret)) {
188 spin_unlock(&p->d_lock); 189 spin_unlock(&p->d_lock);
190 lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_);
189 p = ret; 191 p = ret;
190 goto again; 192 goto again;
191 } 193 }
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index c038727b4050..cddc74b9cdb2 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -31,11 +31,11 @@ static int __init init_autofs4_fs(void)
31{ 31{
32 int err; 32 int err;
33 33
34 autofs_dev_ioctl_init();
35
34 err = register_filesystem(&autofs_fs_type); 36 err = register_filesystem(&autofs_fs_type);
35 if (err) 37 if (err)
36 return err; 38 autofs_dev_ioctl_exit();
37
38 autofs_dev_ioctl_init();
39 39
40 return err; 40 return err;
41} 41}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e16980b00b8d..d8dc002e9cc3 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
19#include <linux/parser.h> 19#include <linux/parser.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/magic.h> 21#include <linux/magic.h>
22#include <linux/compat.h>
22#include "autofs_i.h" 23#include "autofs_i.h"
23#include <linux/module.h> 24#include <linux/module.h>
24 25
@@ -224,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
224 set_autofs_type_indirect(&sbi->type); 225 set_autofs_type_indirect(&sbi->type);
225 sbi->min_proto = 0; 226 sbi->min_proto = 0;
226 sbi->max_proto = 0; 227 sbi->max_proto = 0;
228 sbi->compat_daemon = is_compat_task();
227 mutex_init(&sbi->wq_mutex); 229 mutex_init(&sbi->wq_mutex);
228 mutex_init(&sbi->pipe_mutex); 230 mutex_init(&sbi->pipe_mutex);
229 spin_lock_init(&sbi->fs_lock); 231 spin_lock_init(&sbi->fs_lock);
@@ -245,12 +247,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
245 if (!ino) 247 if (!ino)
246 goto fail_free; 248 goto fail_free;
247 root_inode = autofs4_get_inode(s, S_IFDIR | 0755); 249 root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
248 if (!root_inode) 250 root = d_make_root(root_inode);
249 goto fail_ino;
250
251 root = d_alloc_root(root_inode);
252 if (!root) 251 if (!root)
253 goto fail_iput; 252 goto fail_ino;
254 pipe = NULL; 253 pipe = NULL;
255 254
256 root->d_fsdata = ino; 255 root->d_fsdata = ino;
@@ -315,9 +314,6 @@ fail_fput:
315fail_dput: 314fail_dput:
316 dput(root); 315 dput(root);
317 goto fail_free; 316 goto fail_free;
318fail_iput:
319 printk("autofs: get root dentry failed\n");
320 iput(root_inode);
321fail_ino: 317fail_ino:
322 kfree(ino); 318 kfree(ino);
323fail_free: 319fail_free:
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7b..9c098db43344 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,7 +91,24 @@ static int autofs4_write(struct autofs_sb_info *sbi,
91 91
92 return (bytes > 0); 92 return (bytes > 0);
93} 93}
94 94
95/*
96 * The autofs_v5 packet was misdesigned.
97 *
98 * The packets are identical on x86-32 and x86-64, but have different
99 * alignment. Which means that 'sizeof()' will give different results.
100 * Fix it up for the case of running 32-bit user mode on a 64-bit kernel.
101 */
102static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi)
103{
104 size_t pktsz = sizeof(struct autofs_v5_packet);
105#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
106 if (sbi->compat_daemon > 0)
107 pktsz -= 4;
108#endif
109 return pktsz;
110}
111
95static void autofs4_notify_daemon(struct autofs_sb_info *sbi, 112static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
96 struct autofs_wait_queue *wq, 113 struct autofs_wait_queue *wq,
97 int type) 114 int type)
@@ -155,8 +172,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
155 { 172 {
156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; 173 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
157 174
158 pktsz = sizeof(*packet); 175 pktsz = autofs_v5_packet_size(sbi);
159
160 packet->wait_queue_token = wq->wait_queue_token; 176 packet->wait_queue_token = wq->wait_queue_token;
161 packet->len = wq->name.len; 177 packet->len = wq->name.len;
162 memcpy(packet->name, wq->name.name, wq->name.len); 178 memcpy(packet->name, wq->name.name, wq->name.len);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 6e6d536767fe..e18da23d42b5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -852,9 +852,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
852 ret = PTR_ERR(root); 852 ret = PTR_ERR(root);
853 goto unacquire_priv_sbp; 853 goto unacquire_priv_sbp;
854 } 854 }
855 sb->s_root = d_alloc_root(root); 855 sb->s_root = d_make_root(root);
856 if (!sb->s_root) { 856 if (!sb->s_root) {
857 iput(root);
858 befs_error(sb, "get root inode failed"); 857 befs_error(sb, "get root inode failed");
859 goto unacquire_priv_sbp; 858 goto unacquire_priv_sbp;
860 } 859 }
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b0391bc402b1..e23dc7c8b884 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -367,9 +367,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
367 ret = PTR_ERR(inode); 367 ret = PTR_ERR(inode);
368 goto out2; 368 goto out2;
369 } 369 }
370 s->s_root = d_alloc_root(inode); 370 s->s_root = d_make_root(inode);
371 if (!s->s_root) { 371 if (!s->s_root) {
372 iput(inode);
373 ret = -ENOMEM; 372 ret = -ENOMEM;
374 goto out2; 373 goto out2;
375 } 374 }
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a6395bdb26ae..4d5e6d26578c 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -259,8 +259,14 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
259 current->mm->free_area_cache = current->mm->mmap_base; 259 current->mm->free_area_cache = current->mm->mmap_base;
260 current->mm->cached_hole_size = 0; 260 current->mm->cached_hole_size = 0;
261 261
262 retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
263 if (retval < 0) {
264 /* Someone check-me: is this error path enough? */
265 send_sig(SIGKILL, current, 0);
266 return retval;
267 }
268
262 install_exec_creds(bprm); 269 install_exec_creds(bprm);
263 current->flags &= ~PF_FORKNOEXEC;
264 270
265 if (N_MAGIC(ex) == OMAGIC) { 271 if (N_MAGIC(ex) == OMAGIC) {
266 unsigned long text_addr, map_size; 272 unsigned long text_addr, map_size;
@@ -352,13 +358,6 @@ beyond_if:
352 return retval; 358 return retval;
353 } 359 }
354 360
355 retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
356 if (retval < 0) {
357 /* Someone check-me: is this error path enough? */
358 send_sig(SIGKILL, current, 0);
359 return retval;
360 }
361
362 current->mm->start_stack = 361 current->mm->start_stack =
363 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); 362 (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
364#ifdef __alpha__ 363#ifdef __alpha__
@@ -454,7 +453,8 @@ out:
454 453
455static int __init init_aout_binfmt(void) 454static int __init init_aout_binfmt(void)
456{ 455{
457 return register_binfmt(&aout_format); 456 register_binfmt(&aout_format);
457 return 0;
458} 458}
459 459
460static void __exit exit_aout_binfmt(void) 460static void __exit exit_aout_binfmt(void)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bcb884e2d613..81878b78c9d4 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -712,7 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
712 goto out_free_dentry; 712 goto out_free_dentry;
713 713
714 /* OK, This is the point of no return */ 714 /* OK, This is the point of no return */
715 current->flags &= ~PF_FORKNOEXEC;
716 current->mm->def_flags = def_flags; 715 current->mm->def_flags = def_flags;
717 716
718 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 717 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
@@ -934,7 +933,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
934#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 933#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
935 934
936 install_exec_creds(bprm); 935 install_exec_creds(bprm);
937 current->flags &= ~PF_FORKNOEXEC;
938 retval = create_elf_tables(bprm, &loc->elf_ex, 936 retval = create_elf_tables(bprm, &loc->elf_ex,
939 load_addr, interp_load_addr); 937 load_addr, interp_load_addr);
940 if (retval < 0) { 938 if (retval < 0) {
@@ -1421,7 +1419,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1421 for (i = 1; i < view->n; ++i) { 1419 for (i = 1; i < view->n; ++i) {
1422 const struct user_regset *regset = &view->regsets[i]; 1420 const struct user_regset *regset = &view->regsets[i];
1423 do_thread_regset_writeback(t->task, regset); 1421 do_thread_regset_writeback(t->task, regset);
1424 if (regset->core_note_type && 1422 if (regset->core_note_type && regset->get &&
1425 (!regset->active || regset->active(t->task, regset))) { 1423 (!regset->active || regset->active(t->task, regset))) {
1426 int ret; 1424 int ret;
1427 size_t size = regset->n * regset->size; 1425 size_t size = regset->n * regset->size;
@@ -2077,7 +2075,8 @@ out:
2077 2075
2078static int __init init_elf_binfmt(void) 2076static int __init init_elf_binfmt(void)
2079{ 2077{
2080 return register_binfmt(&elf_format); 2078 register_binfmt(&elf_format);
2079 return 0;
2081} 2080}
2082 2081
2083static void __exit exit_elf_binfmt(void) 2082static void __exit exit_elf_binfmt(void)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 30745f459faf..c64bf5ee2df4 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -91,7 +91,8 @@ static struct linux_binfmt elf_fdpic_format = {
91 91
92static int __init init_elf_fdpic_binfmt(void) 92static int __init init_elf_fdpic_binfmt(void)
93{ 93{
94 return register_binfmt(&elf_fdpic_format); 94 register_binfmt(&elf_fdpic_format);
95 return 0;
95} 96}
96 97
97static void __exit exit_elf_fdpic_binfmt(void) 98static void __exit exit_elf_fdpic_binfmt(void)
@@ -334,8 +335,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
334 current->mm->context.exec_fdpic_loadmap = 0; 335 current->mm->context.exec_fdpic_loadmap = 0;
335 current->mm->context.interp_fdpic_loadmap = 0; 336 current->mm->context.interp_fdpic_loadmap = 0;
336 337
337 current->flags &= ~PF_FORKNOEXEC;
338
339#ifdef CONFIG_MMU 338#ifdef CONFIG_MMU
340 elf_fdpic_arch_lay_out_mm(&exec_params, 339 elf_fdpic_arch_lay_out_mm(&exec_params,
341 &interp_params, 340 &interp_params,
@@ -413,7 +412,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
413#endif 412#endif
414 413
415 install_exec_creds(bprm); 414 install_exec_creds(bprm);
416 current->flags &= ~PF_FORKNOEXEC;
417 if (create_elf_fdpic_tables(bprm, current->mm, 415 if (create_elf_fdpic_tables(bprm, current->mm,
418 &exec_params, &interp_params) < 0) 416 &exec_params, &interp_params) < 0)
419 goto error_kill; 417 goto error_kill;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index b8e8b0acf9bd..2790c7e1912e 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = {
100 100
101static int __init init_em86_binfmt(void) 101static int __init init_em86_binfmt(void)
102{ 102{
103 return register_binfmt(&em86_format); 103 register_binfmt(&em86_format);
104 return 0;
104} 105}
105 106
106static void __exit exit_em86_binfmt(void) 107static void __exit exit_em86_binfmt(void)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 1bffbe0ed778..04f61f0bdfde 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -902,7 +902,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
902 libinfo.lib_list[j].start_data:UNLOADED_LIB; 902 libinfo.lib_list[j].start_data:UNLOADED_LIB;
903 903
904 install_exec_creds(bprm); 904 install_exec_creds(bprm);
905 current->flags &= ~PF_FORKNOEXEC;
906 905
907 set_binfmt(&flat_format); 906 set_binfmt(&flat_format);
908 907
@@ -950,7 +949,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
950 949
951static int __init init_flat_binfmt(void) 950static int __init init_flat_binfmt(void)
952{ 951{
953 return register_binfmt(&flat_format); 952 register_binfmt(&flat_format);
953 return 0;
954} 954}
955 955
956/****************************************************************************/ 956/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a9198dfd5f85..1ffb60355cae 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -726,11 +726,8 @@ static struct file_system_type bm_fs_type = {
726static int __init init_misc_binfmt(void) 726static int __init init_misc_binfmt(void)
727{ 727{
728 int err = register_filesystem(&bm_fs_type); 728 int err = register_filesystem(&bm_fs_type);
729 if (!err) { 729 if (!err)
730 err = insert_binfmt(&misc_format); 730 insert_binfmt(&misc_format);
731 if (err)
732 unregister_filesystem(&bm_fs_type);
733 }
734 return err; 731 return err;
735} 732}
736 733
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 396a9884591f..d3b8c1f63155 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -105,7 +105,8 @@ static struct linux_binfmt script_format = {
105 105
106static int __init init_script_binfmt(void) 106static int __init init_script_binfmt(void)
107{ 107{
108 return register_binfmt(&script_format); 108 register_binfmt(&script_format);
109 return 0;
109} 110}
110 111
111static void __exit exit_script_binfmt(void) 112static void __exit exit_script_binfmt(void)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index cc8560f6c9b0..e4fc746629a7 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -225,7 +225,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
225 goto out_free; 225 goto out_free;
226 226
227 /* OK, This is the point of no return */ 227 /* OK, This is the point of no return */
228 current->flags &= ~PF_FORKNOEXEC;
229 current->personality = PER_HPUX; 228 current->personality = PER_HPUX;
230 setup_new_exec(bprm); 229 setup_new_exec(bprm);
231 230
@@ -289,7 +288,8 @@ static int load_som_library(struct file *f)
289 288
290static int __init init_som_binfmt(void) 289static int __init init_som_binfmt(void)
291{ 290{
292 return register_binfmt(&som_format); 291 register_binfmt(&som_format);
292 return 0;
293} 293}
294 294
295static void __exit exit_som_binfmt(void) 295static void __exit exit_som_binfmt(void)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c2183f3917cd..e85c04b9f61c 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio)
357 bix.sector_size = bi->sector_size; 357 bix.sector_size = bi->sector_size;
358 358
359 bio_for_each_segment(bv, bio, i) { 359 bio_for_each_segment(bv, bio, i) {
360 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); 360 void *kaddr = kmap_atomic(bv->bv_page);
361 bix.data_buf = kaddr + bv->bv_offset; 361 bix.data_buf = kaddr + bv->bv_offset;
362 bix.data_size = bv->bv_len; 362 bix.data_size = bv->bv_len;
363 bix.prot_buf = prot_buf; 363 bix.prot_buf = prot_buf;
@@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio)
371 total += sectors * bi->tuple_size; 371 total += sectors * bi->tuple_size;
372 BUG_ON(total > bio->bi_integrity->bip_size); 372 BUG_ON(total > bio->bi_integrity->bip_size);
373 373
374 kunmap_atomic(kaddr, KM_USER0); 374 kunmap_atomic(kaddr);
375 } 375 }
376} 376}
377 377
@@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio)
498 bix.sector_size = bi->sector_size; 498 bix.sector_size = bi->sector_size;
499 499
500 bio_for_each_segment(bv, bio, i) { 500 bio_for_each_segment(bv, bio, i) {
501 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); 501 void *kaddr = kmap_atomic(bv->bv_page);
502 bix.data_buf = kaddr + bv->bv_offset; 502 bix.data_buf = kaddr + bv->bv_offset;
503 bix.data_size = bv->bv_len; 503 bix.data_size = bv->bv_len;
504 bix.prot_buf = prot_buf; 504 bix.prot_buf = prot_buf;
@@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio)
507 ret = bi->verify_fn(&bix); 507 ret = bi->verify_fn(&bix);
508 508
509 if (ret) { 509 if (ret) {
510 kunmap_atomic(kaddr, KM_USER0); 510 kunmap_atomic(kaddr);
511 return ret; 511 return ret;
512 } 512 }
513 513
@@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio)
517 total += sectors * bi->tuple_size; 517 total += sectors * bi->tuple_size;
518 BUG_ON(total > bio->bi_integrity->bip_size); 518 BUG_ON(total > bio->bi_integrity->bip_size);
519 519
520 kunmap_atomic(kaddr, KM_USER0); 520 kunmap_atomic(kaddr);
521 } 521 }
522 522
523 return ret; 523 return ret;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0e575d1304b4..5e9f198f7712 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1183 * The latter is necessary to prevent ghost 1183 * The latter is necessary to prevent ghost
1184 * partitions on a removed medium. 1184 * partitions on a removed medium.
1185 */ 1185 */
1186 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) 1186 if (bdev->bd_invalidated) {
1187 rescan_partitions(disk, bdev); 1187 if (!ret)
1188 rescan_partitions(disk, bdev);
1189 else if (ret == -ENOMEDIUM)
1190 invalidate_partitions(disk, bdev);
1191 }
1188 if (ret) 1192 if (ret)
1189 goto out_clear; 1193 goto out_clear;
1190 } else { 1194 } else {
@@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1214 if (bdev->bd_disk->fops->open) 1218 if (bdev->bd_disk->fops->open)
1215 ret = bdev->bd_disk->fops->open(bdev, mode); 1219 ret = bdev->bd_disk->fops->open(bdev, mode);
1216 /* the same as first opener case, read comment there */ 1220 /* the same as first opener case, read comment there */
1217 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) 1221 if (bdev->bd_invalidated) {
1218 rescan_partitions(bdev->bd_disk, bdev); 1222 if (!ret)
1223 rescan_partitions(bdev->bd_disk, bdev);
1224 else if (ret == -ENOMEDIUM)
1225 invalidate_partitions(bdev->bd_disk, bdev);
1226 }
1219 if (ret) 1227 if (ret)
1220 goto out_unlock_bdev; 1228 goto out_unlock_bdev;
1221 } 1229 }
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 633c701a287d..0436c12da8c2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -583,7 +583,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
583 struct btrfs_path *path; 583 struct btrfs_path *path;
584 struct btrfs_key info_key = { 0 }; 584 struct btrfs_key info_key = { 0 };
585 struct btrfs_delayed_ref_root *delayed_refs = NULL; 585 struct btrfs_delayed_ref_root *delayed_refs = NULL;
586 struct btrfs_delayed_ref_head *head = NULL; 586 struct btrfs_delayed_ref_head *head;
587 int info_level = 0; 587 int info_level = 0;
588 int ret; 588 int ret;
589 struct list_head prefs_delayed; 589 struct list_head prefs_delayed;
@@ -607,6 +607,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
607 * at a specified point in time 607 * at a specified point in time
608 */ 608 */
609again: 609again:
610 head = NULL;
611
610 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); 612 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
611 if (ret < 0) 613 if (ret < 0)
612 goto out; 614 goto out;
@@ -635,8 +637,10 @@ again:
635 goto again; 637 goto again;
636 } 638 }
637 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); 639 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
638 if (ret) 640 if (ret) {
641 spin_unlock(&delayed_refs->lock);
639 goto out; 642 goto out;
643 }
640 } 644 }
641 spin_unlock(&delayed_refs->lock); 645 spin_unlock(&delayed_refs->lock);
642 646
@@ -892,6 +896,8 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
892 if (eb != eb_in) 896 if (eb != eb_in)
893 free_extent_buffer(eb); 897 free_extent_buffer(eb);
894 ret = inode_ref_info(parent, 0, fs_root, path, &found_key); 898 ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
899 if (ret > 0)
900 ret = -ENOENT;
895 if (ret) 901 if (ret)
896 break; 902 break;
897 next_inum = found_key.offset; 903 next_inum = found_key.offset;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b669a7d8e499..c053e90f2006 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -89,7 +89,6 @@
89#include "disk-io.h" 89#include "disk-io.h"
90#include "transaction.h" 90#include "transaction.h"
91#include "extent_io.h" 91#include "extent_io.h"
92#include "disk-io.h"
93#include "volumes.h" 92#include "volumes.h"
94#include "print-tree.h" 93#include "print-tree.h"
95#include "locking.h" 94#include "locking.h"
@@ -644,7 +643,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
644static int btrfsic_process_superblock(struct btrfsic_state *state, 643static int btrfsic_process_superblock(struct btrfsic_state *state,
645 struct btrfs_fs_devices *fs_devices) 644 struct btrfs_fs_devices *fs_devices)
646{ 645{
647 int ret; 646 int ret = 0;
648 struct btrfs_super_block *selected_super; 647 struct btrfs_super_block *selected_super;
649 struct list_head *dev_head = &fs_devices->devices; 648 struct list_head *dev_head = &fs_devices->devices;
650 struct btrfs_device *device; 649 struct btrfs_device *device;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a0b2d2..b805afb37fa8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode,
120 page = cb->compressed_pages[i]; 120 page = cb->compressed_pages[i];
121 csum = ~(u32)0; 121 csum = ~(u32)0;
122 122
123 kaddr = kmap_atomic(page, KM_USER0); 123 kaddr = kmap_atomic(page);
124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); 124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
125 btrfs_csum_final(csum, (char *)&csum); 125 btrfs_csum_final(csum, (char *)&csum);
126 kunmap_atomic(kaddr, KM_USER0); 126 kunmap_atomic(kaddr);
127 127
128 if (csum != *cb_sum) { 128 if (csum != *cb_sum) {
129 printk(KERN_INFO "btrfs csum failed ino %llu " 129 printk(KERN_INFO "btrfs csum failed ino %llu "
@@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
521 if (zero_offset) { 521 if (zero_offset) {
522 int zeros; 522 int zeros;
523 zeros = PAGE_CACHE_SIZE - zero_offset; 523 zeros = PAGE_CACHE_SIZE - zero_offset;
524 userpage = kmap_atomic(page, KM_USER0); 524 userpage = kmap_atomic(page);
525 memset(userpage + zero_offset, 0, zeros); 525 memset(userpage + zero_offset, 0, zeros);
526 flush_dcache_page(page); 526 flush_dcache_page(page);
527 kunmap_atomic(userpage, KM_USER0); 527 kunmap_atomic(userpage);
528 } 528 }
529 } 529 }
530 530
@@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
588 page_offset(bio->bi_io_vec->bv_page), 588 page_offset(bio->bi_io_vec->bv_page),
589 PAGE_CACHE_SIZE); 589 PAGE_CACHE_SIZE);
590 read_unlock(&em_tree->lock); 590 read_unlock(&em_tree->lock);
591 if (!em)
592 return -EIO;
591 593
592 compressed_len = em->block_len; 594 compressed_len = em->block_len;
593 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 595 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -991,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
991 bytes = min(PAGE_CACHE_SIZE - *pg_offset, 993 bytes = min(PAGE_CACHE_SIZE - *pg_offset,
992 PAGE_CACHE_SIZE - buf_offset); 994 PAGE_CACHE_SIZE - buf_offset);
993 bytes = min(bytes, working_bytes); 995 bytes = min(bytes, working_bytes);
994 kaddr = kmap_atomic(page_out, KM_USER0); 996 kaddr = kmap_atomic(page_out);
995 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); 997 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
996 kunmap_atomic(kaddr, KM_USER0); 998 kunmap_atomic(kaddr);
997 flush_dcache_page(page_out); 999 flush_dcache_page(page_out);
998 1000
999 *pg_offset += bytes; 1001 *pg_offset += bytes;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 27ebe61d3ccc..80b6486fd5e6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -886,7 +886,7 @@ struct btrfs_block_rsv {
886 u64 reserved; 886 u64 reserved;
887 struct btrfs_space_info *space_info; 887 struct btrfs_space_info *space_info;
888 spinlock_t lock; 888 spinlock_t lock;
889 unsigned int full:1; 889 unsigned int full;
890}; 890};
891 891
892/* 892/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 811d9f918b1c..534266fe505f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2260,6 +2260,12 @@ int open_ctree(struct super_block *sb,
2260 goto fail_sb_buffer; 2260 goto fail_sb_buffer;
2261 } 2261 }
2262 2262
2263 if (sectorsize < PAGE_SIZE) {
2264 printk(KERN_WARNING "btrfs: Incompatible sector size "
2265 "found on %s\n", sb->s_id);
2266 goto fail_sb_buffer;
2267 }
2268
2263 mutex_lock(&fs_info->chunk_mutex); 2269 mutex_lock(&fs_info->chunk_mutex);
2264 ret = btrfs_read_sys_array(tree_root); 2270 ret = btrfs_read_sys_array(tree_root);
2265 mutex_unlock(&fs_info->chunk_mutex); 2271 mutex_unlock(&fs_info->chunk_mutex);
@@ -2301,6 +2307,12 @@ int open_ctree(struct super_block *sb,
2301 2307
2302 btrfs_close_extra_devices(fs_devices); 2308 btrfs_close_extra_devices(fs_devices);
2303 2309
2310 if (!fs_devices->latest_bdev) {
2311 printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
2312 sb->s_id);
2313 goto fail_tree_roots;
2314 }
2315
2304retry_root_backup: 2316retry_root_backup:
2305 blocksize = btrfs_level_size(tree_root, 2317 blocksize = btrfs_level_size(tree_root,
2306 btrfs_super_root_level(disk_super)); 2318 btrfs_super_root_level(disk_super));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 283af7a676a3..37e0a800d34e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3312,7 +3312,8 @@ commit_trans:
3312 } 3312 }
3313 data_sinfo->bytes_may_use += bytes; 3313 data_sinfo->bytes_may_use += bytes;
3314 trace_btrfs_space_reservation(root->fs_info, "space_info", 3314 trace_btrfs_space_reservation(root->fs_info, "space_info",
3315 (u64)data_sinfo, bytes, 1); 3315 (u64)(unsigned long)data_sinfo,
3316 bytes, 1);
3316 spin_unlock(&data_sinfo->lock); 3317 spin_unlock(&data_sinfo->lock);
3317 3318
3318 return 0; 3319 return 0;
@@ -3333,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3333 spin_lock(&data_sinfo->lock); 3334 spin_lock(&data_sinfo->lock);
3334 data_sinfo->bytes_may_use -= bytes; 3335 data_sinfo->bytes_may_use -= bytes;
3335 trace_btrfs_space_reservation(root->fs_info, "space_info", 3336 trace_btrfs_space_reservation(root->fs_info, "space_info",
3336 (u64)data_sinfo, bytes, 0); 3337 (u64)(unsigned long)data_sinfo,
3338 bytes, 0);
3337 spin_unlock(&data_sinfo->lock); 3339 spin_unlock(&data_sinfo->lock);
3338} 3340}
3339 3341
@@ -3611,12 +3613,15 @@ static int may_commit_transaction(struct btrfs_root *root,
3611 if (space_info != delayed_rsv->space_info) 3613 if (space_info != delayed_rsv->space_info)
3612 return -ENOSPC; 3614 return -ENOSPC;
3613 3615
3616 spin_lock(&space_info->lock);
3614 spin_lock(&delayed_rsv->lock); 3617 spin_lock(&delayed_rsv->lock);
3615 if (delayed_rsv->size < bytes) { 3618 if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
3616 spin_unlock(&delayed_rsv->lock); 3619 spin_unlock(&delayed_rsv->lock);
3620 spin_unlock(&space_info->lock);
3617 return -ENOSPC; 3621 return -ENOSPC;
3618 } 3622 }
3619 spin_unlock(&delayed_rsv->lock); 3623 spin_unlock(&delayed_rsv->lock);
3624 spin_unlock(&space_info->lock);
3620 3625
3621commit: 3626commit:
3622 trans = btrfs_join_transaction(root); 3627 trans = btrfs_join_transaction(root);
@@ -3695,9 +3700,9 @@ again:
3695 if (used + orig_bytes <= space_info->total_bytes) { 3700 if (used + orig_bytes <= space_info->total_bytes) {
3696 space_info->bytes_may_use += orig_bytes; 3701 space_info->bytes_may_use += orig_bytes;
3697 trace_btrfs_space_reservation(root->fs_info, 3702 trace_btrfs_space_reservation(root->fs_info,
3698 "space_info", 3703 "space_info",
3699 (u64)space_info, 3704 (u64)(unsigned long)space_info,
3700 orig_bytes, 1); 3705 orig_bytes, 1);
3701 ret = 0; 3706 ret = 0;
3702 } else { 3707 } else {
3703 /* 3708 /*
@@ -3766,9 +3771,9 @@ again:
3766 if (used + num_bytes < space_info->total_bytes + avail) { 3771 if (used + num_bytes < space_info->total_bytes + avail) {
3767 space_info->bytes_may_use += orig_bytes; 3772 space_info->bytes_may_use += orig_bytes;
3768 trace_btrfs_space_reservation(root->fs_info, 3773 trace_btrfs_space_reservation(root->fs_info,
3769 "space_info", 3774 "space_info",
3770 (u64)space_info, 3775 (u64)(unsigned long)space_info,
3771 orig_bytes, 1); 3776 orig_bytes, 1);
3772 ret = 0; 3777 ret = 0;
3773 } else { 3778 } else {
3774 wait_ordered = true; 3779 wait_ordered = true;
@@ -3913,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
3913 spin_lock(&space_info->lock); 3918 spin_lock(&space_info->lock);
3914 space_info->bytes_may_use -= num_bytes; 3919 space_info->bytes_may_use -= num_bytes;
3915 trace_btrfs_space_reservation(fs_info, "space_info", 3920 trace_btrfs_space_reservation(fs_info, "space_info",
3916 (u64)space_info, 3921 (u64)(unsigned long)space_info,
3917 num_bytes, 0); 3922 num_bytes, 0);
3918 space_info->reservation_progress++; 3923 space_info->reservation_progress++;
3919 spin_unlock(&space_info->lock); 3924 spin_unlock(&space_info->lock);
3920 } 3925 }
@@ -4105,7 +4110,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
4105 num_bytes += div64_u64(data_used + meta_used, 50); 4110 num_bytes += div64_u64(data_used + meta_used, 50);
4106 4111
4107 if (num_bytes * 3 > meta_used) 4112 if (num_bytes * 3 > meta_used)
4108 num_bytes = div64_u64(meta_used, 3); 4113 num_bytes = div64_u64(meta_used, 3) * 2;
4109 4114
4110 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); 4115 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
4111} 4116}
@@ -4132,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4132 block_rsv->reserved += num_bytes; 4137 block_rsv->reserved += num_bytes;
4133 sinfo->bytes_may_use += num_bytes; 4138 sinfo->bytes_may_use += num_bytes;
4134 trace_btrfs_space_reservation(fs_info, "space_info", 4139 trace_btrfs_space_reservation(fs_info, "space_info",
4135 (u64)sinfo, num_bytes, 1); 4140 (u64)(unsigned long)sinfo, num_bytes, 1);
4136 } 4141 }
4137 4142
4138 if (block_rsv->reserved >= block_rsv->size) { 4143 if (block_rsv->reserved >= block_rsv->size) {
4139 num_bytes = block_rsv->reserved - block_rsv->size; 4144 num_bytes = block_rsv->reserved - block_rsv->size;
4140 sinfo->bytes_may_use -= num_bytes; 4145 sinfo->bytes_may_use -= num_bytes;
4141 trace_btrfs_space_reservation(fs_info, "space_info", 4146 trace_btrfs_space_reservation(fs_info, "space_info",
4142 (u64)sinfo, num_bytes, 0); 4147 (u64)(unsigned long)sinfo, num_bytes, 0);
4143 sinfo->reservation_progress++; 4148 sinfo->reservation_progress++;
4144 block_rsv->reserved = block_rsv->size; 4149 block_rsv->reserved = block_rsv->size;
4145 block_rsv->full = 1; 4150 block_rsv->full = 1;
@@ -4192,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4192 if (!trans->bytes_reserved) 4197 if (!trans->bytes_reserved)
4193 return; 4198 return;
4194 4199
4195 trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans, 4200 trace_btrfs_space_reservation(root->fs_info, "transaction",
4201 (u64)(unsigned long)trans,
4196 trans->bytes_reserved, 0); 4202 trans->bytes_reserved, 0);
4197 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 4203 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
4198 trans->bytes_reserved = 0; 4204 trans->bytes_reserved = 0;
@@ -4710,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4710 space_info->bytes_reserved += num_bytes; 4716 space_info->bytes_reserved += num_bytes;
4711 if (reserve == RESERVE_ALLOC) { 4717 if (reserve == RESERVE_ALLOC) {
4712 trace_btrfs_space_reservation(cache->fs_info, 4718 trace_btrfs_space_reservation(cache->fs_info,
4713 "space_info", 4719 "space_info",
4714 (u64)space_info, 4720 (u64)(unsigned long)space_info,
4715 num_bytes, 0); 4721 num_bytes, 0);
4716 space_info->bytes_may_use -= num_bytes; 4722 space_info->bytes_may_use -= num_bytes;
4717 } 4723 }
4718 } 4724 }
@@ -7886,9 +7892,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
7886 u64 start; 7892 u64 start;
7887 u64 end; 7893 u64 end;
7888 u64 trimmed = 0; 7894 u64 trimmed = 0;
7895 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
7889 int ret = 0; 7896 int ret = 0;
7890 7897
7891 cache = btrfs_lookup_block_group(fs_info, range->start); 7898 /*
7899 * try to trim all FS space, our block group may start from non-zero.
7900 */
7901 if (range->len == total_bytes)
7902 cache = btrfs_lookup_first_block_group(fs_info, range->start);
7903 else
7904 cache = btrfs_lookup_block_group(fs_info, range->start);
7892 7905
7893 while (cache) { 7906 while (cache) {
7894 if (cache->key.objectid >= (range->start + range->len)) { 7907 if (cache->key.objectid >= (range->start + range->len)) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fcf77e1ded40..2862454bcdb3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -513,6 +513,15 @@ hit_next:
513 WARN_ON(state->end < start); 513 WARN_ON(state->end < start);
514 last_end = state->end; 514 last_end = state->end;
515 515
516 if (state->end < end && !need_resched())
517 next_node = rb_next(&state->rb_node);
518 else
519 next_node = NULL;
520
521 /* the state doesn't have the wanted bits, go ahead */
522 if (!(state->state & bits))
523 goto next;
524
516 /* 525 /*
517 * | ---- desired range ---- | 526 * | ---- desired range ---- |
518 * | state | or 527 * | state | or
@@ -565,20 +574,15 @@ hit_next:
565 goto out; 574 goto out;
566 } 575 }
567 576
568 if (state->end < end && prealloc && !need_resched())
569 next_node = rb_next(&state->rb_node);
570 else
571 next_node = NULL;
572
573 set |= clear_state_bit(tree, state, &bits, wake); 577 set |= clear_state_bit(tree, state, &bits, wake);
578next:
574 if (last_end == (u64)-1) 579 if (last_end == (u64)-1)
575 goto out; 580 goto out;
576 start = last_end + 1; 581 start = last_end + 1;
577 if (start <= end && next_node) { 582 if (start <= end && next_node) {
578 state = rb_entry(next_node, struct extent_state, 583 state = rb_entry(next_node, struct extent_state,
579 rb_node); 584 rb_node);
580 if (state->start == start) 585 goto hit_next;
581 goto hit_next;
582 } 586 }
583 goto search_again; 587 goto search_again;
584 588
@@ -961,8 +965,6 @@ hit_next:
961 965
962 set_state_bits(tree, state, &bits); 966 set_state_bits(tree, state, &bits);
963 clear_state_bit(tree, state, &clear_bits, 0); 967 clear_state_bit(tree, state, &clear_bits, 0);
964
965 merge_state(tree, state);
966 if (last_end == (u64)-1) 968 if (last_end == (u64)-1)
967 goto out; 969 goto out;
968 970
@@ -1007,7 +1009,6 @@ hit_next:
1007 if (state->end <= end) { 1009 if (state->end <= end) {
1008 set_state_bits(tree, state, &bits); 1010 set_state_bits(tree, state, &bits);
1009 clear_state_bit(tree, state, &clear_bits, 0); 1011 clear_state_bit(tree, state, &clear_bits, 0);
1010 merge_state(tree, state);
1011 if (last_end == (u64)-1) 1012 if (last_end == (u64)-1)
1012 goto out; 1013 goto out;
1013 start = last_end + 1; 1014 start = last_end + 1;
@@ -1068,8 +1069,6 @@ hit_next:
1068 1069
1069 set_state_bits(tree, prealloc, &bits); 1070 set_state_bits(tree, prealloc, &bits);
1070 clear_state_bit(tree, prealloc, &clear_bits, 0); 1071 clear_state_bit(tree, prealloc, &clear_bits, 0);
1071
1072 merge_state(tree, prealloc);
1073 prealloc = NULL; 1072 prealloc = NULL;
1074 goto out; 1073 goto out;
1075 } 1074 }
@@ -2154,13 +2153,46 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2154 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, 2153 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
2155 failrec->this_mirror, num_copies, failrec->in_validation); 2154 failrec->this_mirror, num_copies, failrec->in_validation);
2156 2155
2157 tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, 2156 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2158 failrec->bio_flags, 0); 2157 failrec->this_mirror,
2159 return 0; 2158 failrec->bio_flags, 0);
2159 return ret;
2160} 2160}
2161 2161
2162/* lots and lots of room for performance fixes in the end_bio funcs */ 2162/* lots and lots of room for performance fixes in the end_bio funcs */
2163 2163
2164int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2165{
2166 int uptodate = (err == 0);
2167 struct extent_io_tree *tree;
2168 int ret;
2169
2170 tree = &BTRFS_I(page->mapping->host)->io_tree;
2171
2172 if (tree->ops && tree->ops->writepage_end_io_hook) {
2173 ret = tree->ops->writepage_end_io_hook(page, start,
2174 end, NULL, uptodate);
2175 if (ret)
2176 uptodate = 0;
2177 }
2178
2179 if (!uptodate && tree->ops &&
2180 tree->ops->writepage_io_failed_hook) {
2181 ret = tree->ops->writepage_io_failed_hook(NULL, page,
2182 start, end, NULL);
2183 /* Writeback already completed */
2184 if (ret == 0)
2185 return 1;
2186 }
2187
2188 if (!uptodate) {
2189 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
2190 ClearPageUptodate(page);
2191 SetPageError(page);
2192 }
2193 return 0;
2194}
2195
2164/* 2196/*
2165 * after a writepage IO is done, we need to: 2197 * after a writepage IO is done, we need to:
2166 * clear the uptodate bits on error 2198 * clear the uptodate bits on error
@@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2172 */ 2204 */
2173static void end_bio_extent_writepage(struct bio *bio, int err) 2205static void end_bio_extent_writepage(struct bio *bio, int err)
2174{ 2206{
2175 int uptodate = err == 0;
2176 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2207 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2177 struct extent_io_tree *tree; 2208 struct extent_io_tree *tree;
2178 u64 start; 2209 u64 start;
2179 u64 end; 2210 u64 end;
2180 int whole_page; 2211 int whole_page;
2181 int ret;
2182 2212
2183 do { 2213 do {
2184 struct page *page = bvec->bv_page; 2214 struct page *page = bvec->bv_page;
@@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2195 2225
2196 if (--bvec >= bio->bi_io_vec) 2226 if (--bvec >= bio->bi_io_vec)
2197 prefetchw(&bvec->bv_page->flags); 2227 prefetchw(&bvec->bv_page->flags);
2198 if (tree->ops && tree->ops->writepage_end_io_hook) {
2199 ret = tree->ops->writepage_end_io_hook(page, start,
2200 end, NULL, uptodate);
2201 if (ret)
2202 uptodate = 0;
2203 }
2204
2205 if (!uptodate && tree->ops &&
2206 tree->ops->writepage_io_failed_hook) {
2207 ret = tree->ops->writepage_io_failed_hook(bio, page,
2208 start, end, NULL);
2209 if (ret == 0) {
2210 uptodate = (err == 0);
2211 continue;
2212 }
2213 }
2214 2228
2215 if (!uptodate) { 2229 if (end_extent_writepage(page, err, start, end))
2216 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); 2230 continue;
2217 ClearPageUptodate(page);
2218 SetPageError(page);
2219 }
2220 2231
2221 if (whole_page) 2232 if (whole_page)
2222 end_page_writeback(page); 2233 end_page_writeback(page);
@@ -2535,10 +2546,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2535 2546
2536 if (zero_offset) { 2547 if (zero_offset) {
2537 iosize = PAGE_CACHE_SIZE - zero_offset; 2548 iosize = PAGE_CACHE_SIZE - zero_offset;
2538 userpage = kmap_atomic(page, KM_USER0); 2549 userpage = kmap_atomic(page);
2539 memset(userpage + zero_offset, 0, iosize); 2550 memset(userpage + zero_offset, 0, iosize);
2540 flush_dcache_page(page); 2551 flush_dcache_page(page);
2541 kunmap_atomic(userpage, KM_USER0); 2552 kunmap_atomic(userpage);
2542 } 2553 }
2543 } 2554 }
2544 while (cur <= end) { 2555 while (cur <= end) {
@@ -2547,10 +2558,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2547 struct extent_state *cached = NULL; 2558 struct extent_state *cached = NULL;
2548 2559
2549 iosize = PAGE_CACHE_SIZE - pg_offset; 2560 iosize = PAGE_CACHE_SIZE - pg_offset;
2550 userpage = kmap_atomic(page, KM_USER0); 2561 userpage = kmap_atomic(page);
2551 memset(userpage + pg_offset, 0, iosize); 2562 memset(userpage + pg_offset, 0, iosize);
2552 flush_dcache_page(page); 2563 flush_dcache_page(page);
2553 kunmap_atomic(userpage, KM_USER0); 2564 kunmap_atomic(userpage);
2554 set_extent_uptodate(tree, cur, cur + iosize - 1, 2565 set_extent_uptodate(tree, cur, cur + iosize - 1,
2555 &cached, GFP_NOFS); 2566 &cached, GFP_NOFS);
2556 unlock_extent_cached(tree, cur, cur + iosize - 1, 2567 unlock_extent_cached(tree, cur, cur + iosize - 1,
@@ -2596,10 +2607,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2596 char *userpage; 2607 char *userpage;
2597 struct extent_state *cached = NULL; 2608 struct extent_state *cached = NULL;
2598 2609
2599 userpage = kmap_atomic(page, KM_USER0); 2610 userpage = kmap_atomic(page);
2600 memset(userpage + pg_offset, 0, iosize); 2611 memset(userpage + pg_offset, 0, iosize);
2601 flush_dcache_page(page); 2612 flush_dcache_page(page);
2602 kunmap_atomic(userpage, KM_USER0); 2613 kunmap_atomic(userpage);
2603 2614
2604 set_extent_uptodate(tree, cur, cur + iosize - 1, 2615 set_extent_uptodate(tree, cur, cur + iosize - 1,
2605 &cached, GFP_NOFS); 2616 &cached, GFP_NOFS);
@@ -2745,10 +2756,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2745 if (page->index == end_index) { 2756 if (page->index == end_index) {
2746 char *userpage; 2757 char *userpage;
2747 2758
2748 userpage = kmap_atomic(page, KM_USER0); 2759 userpage = kmap_atomic(page);
2749 memset(userpage + pg_offset, 0, 2760 memset(userpage + pg_offset, 0,
2750 PAGE_CACHE_SIZE - pg_offset); 2761 PAGE_CACHE_SIZE - pg_offset);
2751 kunmap_atomic(userpage, KM_USER0); 2762 kunmap_atomic(userpage);
2752 flush_dcache_page(page); 2763 flush_dcache_page(page);
2753 } 2764 }
2754 pg_offset = 0; 2765 pg_offset = 0;
@@ -2779,9 +2790,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2779 delalloc_start = delalloc_end + 1; 2790 delalloc_start = delalloc_end + 1;
2780 continue; 2791 continue;
2781 } 2792 }
2782 tree->ops->fill_delalloc(inode, page, delalloc_start, 2793 ret = tree->ops->fill_delalloc(inode, page,
2783 delalloc_end, &page_started, 2794 delalloc_start,
2784 &nr_written); 2795 delalloc_end,
2796 &page_started,
2797 &nr_written);
2798 BUG_ON(ret);
2785 /* 2799 /*
2786 * delalloc_end is already one less than the total 2800 * delalloc_end is already one less than the total
2787 * length, so we don't subtract one from 2801 * length, so we don't subtract one from
@@ -2818,8 +2832,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2818 if (tree->ops && tree->ops->writepage_start_hook) { 2832 if (tree->ops && tree->ops->writepage_start_hook) {
2819 ret = tree->ops->writepage_start_hook(page, start, 2833 ret = tree->ops->writepage_start_hook(page, start,
2820 page_end); 2834 page_end);
2821 if (ret == -EAGAIN) { 2835 if (ret) {
2822 redirty_page_for_writepage(wbc, page); 2836 /* Fixup worker will requeue */
2837 if (ret == -EBUSY)
2838 wbc->pages_skipped++;
2839 else
2840 redirty_page_for_writepage(wbc, page);
2823 update_nr_written(page, wbc, nr_written); 2841 update_nr_written(page, wbc, nr_written);
2824 unlock_page(page); 2842 unlock_page(page);
2825 ret = 0; 2843 ret = 0;
@@ -3289,7 +3307,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
3289 len = end - start + 1; 3307 len = end - start + 1;
3290 write_lock(&map->lock); 3308 write_lock(&map->lock);
3291 em = lookup_extent_mapping(map, start, len); 3309 em = lookup_extent_mapping(map, start, len);
3292 if (IS_ERR_OR_NULL(em)) { 3310 if (!em) {
3293 write_unlock(&map->lock); 3311 write_unlock(&map->lock);
3294 break; 3312 break;
3295 } 3313 }
@@ -3853,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3853 num_pages = num_extent_pages(eb->start, eb->len); 3871 num_pages = num_extent_pages(eb->start, eb->len);
3854 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3872 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3855 3873
3856 if (eb_straddles_pages(eb)) { 3874 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3857 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3875 cached_state, GFP_NOFS);
3858 cached_state, GFP_NOFS); 3876
3859 }
3860 for (i = 0; i < num_pages; i++) { 3877 for (i = 0; i < num_pages; i++) {
3861 page = extent_buffer_page(eb, i); 3878 page = extent_buffer_page(eb, i);
3862 if (page) 3879 if (page)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc6a042cb6fc..cecc3518c121 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -319,4 +319,5 @@ struct btrfs_mapping_tree;
319int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, 319int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
320 u64 length, u64 logical, struct page *page, 320 u64 length, u64 logical, struct page *page,
321 int mirror_num); 321 int mirror_num);
322int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
322#endif 323#endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 33a7890b1f40..1195f09761fe 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,8 +26,8 @@ struct extent_map {
26 unsigned long flags; 26 unsigned long flags;
27 struct block_device *bdev; 27 struct block_device *bdev;
28 atomic_t refs; 28 atomic_t refs;
29 unsigned int in_tree:1; 29 unsigned int in_tree;
30 unsigned int compress_type:4; 30 unsigned int compress_type;
31}; 31};
32 32
33struct extent_map_tree { 33struct extent_map_tree {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c7fb3a4247d3..078b4fd54500 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -447,13 +447,13 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
447 sums->bytenr = ordered->start; 447 sums->bytenr = ordered->start;
448 } 448 }
449 449
450 data = kmap_atomic(bvec->bv_page, KM_USER0); 450 data = kmap_atomic(bvec->bv_page);
451 sector_sum->sum = ~(u32)0; 451 sector_sum->sum = ~(u32)0;
452 sector_sum->sum = btrfs_csum_data(root, 452 sector_sum->sum = btrfs_csum_data(root,
453 data + bvec->bv_offset, 453 data + bvec->bv_offset,
454 sector_sum->sum, 454 sector_sum->sum,
455 bvec->bv_len); 455 bvec->bv_len);
456 kunmap_atomic(data, KM_USER0); 456 kunmap_atomic(data);
457 btrfs_csum_final(sector_sum->sum, 457 btrfs_csum_final(sector_sum->sum,
458 (char *)&sector_sum->sum); 458 (char *)&sector_sum->sum);
459 sector_sum->bytenr = disk_bytenr; 459 sector_sum->bytenr = disk_bytenr;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 859ba2dd8890..e8d06b6b9194 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1605,6 +1605,14 @@ static long btrfs_fallocate(struct file *file, int mode,
1605 return -EOPNOTSUPP; 1605 return -EOPNOTSUPP;
1606 1606
1607 /* 1607 /*
1608 * Make sure we have enough space before we do the
1609 * allocation.
1610 */
1611 ret = btrfs_check_data_free_space(inode, len);
1612 if (ret)
1613 return ret;
1614
1615 /*
1608 * wait for ordered IO before we have any locks. We'll loop again 1616 * wait for ordered IO before we have any locks. We'll loop again
1609 * below with the locks held. 1617 * below with the locks held.
1610 */ 1618 */
@@ -1667,27 +1675,12 @@ static long btrfs_fallocate(struct file *file, int mode,
1667 if (em->block_start == EXTENT_MAP_HOLE || 1675 if (em->block_start == EXTENT_MAP_HOLE ||
1668 (cur_offset >= inode->i_size && 1676 (cur_offset >= inode->i_size &&
1669 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 1677 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
1670
1671 /*
1672 * Make sure we have enough space before we do the
1673 * allocation.
1674 */
1675 ret = btrfs_check_data_free_space(inode, last_byte -
1676 cur_offset);
1677 if (ret) {
1678 free_extent_map(em);
1679 break;
1680 }
1681
1682 ret = btrfs_prealloc_file_range(inode, mode, cur_offset, 1678 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
1683 last_byte - cur_offset, 1679 last_byte - cur_offset,
1684 1 << inode->i_blkbits, 1680 1 << inode->i_blkbits,
1685 offset + len, 1681 offset + len,
1686 &alloc_hint); 1682 &alloc_hint);
1687 1683
1688 /* Let go of our reservation. */
1689 btrfs_free_reserved_data_space(inode, last_byte -
1690 cur_offset);
1691 if (ret < 0) { 1684 if (ret < 0) {
1692 free_extent_map(em); 1685 free_extent_map(em);
1693 break; 1686 break;
@@ -1715,6 +1708,8 @@ static long btrfs_fallocate(struct file *file, int mode,
1715 &cached_state, GFP_NOFS); 1708 &cached_state, GFP_NOFS);
1716out: 1709out:
1717 mutex_unlock(&inode->i_mutex); 1710 mutex_unlock(&inode->i_mutex);
1711 /* Let go of our reservation. */
1712 btrfs_free_reserved_data_space(inode, len);
1718 return ret; 1713 return ret;
1719} 1714}
1720 1715
@@ -1761,7 +1756,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1761 start - root->sectorsize, 1756 start - root->sectorsize,
1762 root->sectorsize, 0); 1757 root->sectorsize, 0);
1763 if (IS_ERR(em)) { 1758 if (IS_ERR(em)) {
1764 ret = -ENXIO; 1759 ret = PTR_ERR(em);
1765 goto out; 1760 goto out;
1766 } 1761 }
1767 last_end = em->start + em->len; 1762 last_end = em->start + em->len;
@@ -1773,7 +1768,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1773 while (1) { 1768 while (1) {
1774 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); 1769 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
1775 if (IS_ERR(em)) { 1770 if (IS_ERR(em)) {
1776 ret = -ENXIO; 1771 ret = PTR_ERR(em);
1777 break; 1772 break;
1778 } 1773 }
1779 1774
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c2f20594c9f7..b02e379b14c7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -777,6 +777,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
777 spin_lock(&block_group->lock); 777 spin_lock(&block_group->lock);
778 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 778 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
779 spin_unlock(&block_group->lock); 779 spin_unlock(&block_group->lock);
780 btrfs_free_path(path);
780 goto out; 781 goto out;
781 } 782 }
782 spin_unlock(&block_group->lock); 783 spin_unlock(&block_group->lock);
@@ -1067,7 +1068,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1067 spin_unlock(&block_group->lock); 1068 spin_unlock(&block_group->lock);
1068 ret = 0; 1069 ret = 0;
1069#ifdef DEBUG 1070#ifdef DEBUG
1070 printk(KERN_ERR "btrfs: failed to write free space cace " 1071 printk(KERN_ERR "btrfs: failed to write free space cache "
1071 "for block group %llu\n", block_group->key.objectid); 1072 "for block group %llu\n", block_group->key.objectid);
1072#endif 1073#endif
1073 } 1074 }
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 213ffa86ce1b..ee15d88b33d2 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -438,7 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
438 trans->bytes_reserved); 438 trans->bytes_reserved);
439 if (ret) 439 if (ret)
440 goto out; 440 goto out;
441 trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, 441 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
442 (u64)(unsigned long)trans,
442 trans->bytes_reserved, 1); 443 trans->bytes_reserved, 1);
443again: 444again:
444 inode = lookup_free_ino_inode(root, path); 445 inode = lookup_free_ino_inode(root, path);
@@ -500,7 +501,8 @@ again:
500out_put: 501out_put:
501 iput(inode); 502 iput(inode);
502out_release: 503out_release:
503 trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, 504 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
505 (u64)(unsigned long)trans,
504 trans->bytes_reserved, 0); 506 trans->bytes_reserved, 0);
505 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 507 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
506out: 508out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 32214fe0f7e3..3a0b5c1f9d31 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -173,9 +173,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
173 cur_size = min_t(unsigned long, compressed_size, 173 cur_size = min_t(unsigned long, compressed_size,
174 PAGE_CACHE_SIZE); 174 PAGE_CACHE_SIZE);
175 175
176 kaddr = kmap_atomic(cpage, KM_USER0); 176 kaddr = kmap_atomic(cpage);
177 write_extent_buffer(leaf, kaddr, ptr, cur_size); 177 write_extent_buffer(leaf, kaddr, ptr, cur_size);
178 kunmap_atomic(kaddr, KM_USER0); 178 kunmap_atomic(kaddr);
179 179
180 i++; 180 i++;
181 ptr += cur_size; 181 ptr += cur_size;
@@ -187,10 +187,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
187 page = find_get_page(inode->i_mapping, 187 page = find_get_page(inode->i_mapping,
188 start >> PAGE_CACHE_SHIFT); 188 start >> PAGE_CACHE_SHIFT);
189 btrfs_set_file_extent_compression(leaf, ei, 0); 189 btrfs_set_file_extent_compression(leaf, ei, 0);
190 kaddr = kmap_atomic(page, KM_USER0); 190 kaddr = kmap_atomic(page);
191 offset = start & (PAGE_CACHE_SIZE - 1); 191 offset = start & (PAGE_CACHE_SIZE - 1);
192 write_extent_buffer(leaf, kaddr + offset, ptr, size); 192 write_extent_buffer(leaf, kaddr + offset, ptr, size);
193 kunmap_atomic(kaddr, KM_USER0); 193 kunmap_atomic(kaddr);
194 page_cache_release(page); 194 page_cache_release(page);
195 } 195 }
196 btrfs_mark_buffer_dirty(leaf); 196 btrfs_mark_buffer_dirty(leaf);
@@ -422,10 +422,10 @@ again:
422 * sending it down to disk 422 * sending it down to disk
423 */ 423 */
424 if (offset) { 424 if (offset) {
425 kaddr = kmap_atomic(page, KM_USER0); 425 kaddr = kmap_atomic(page);
426 memset(kaddr + offset, 0, 426 memset(kaddr + offset, 0,
427 PAGE_CACHE_SIZE - offset); 427 PAGE_CACHE_SIZE - offset);
428 kunmap_atomic(kaddr, KM_USER0); 428 kunmap_atomic(kaddr);
429 } 429 }
430 will_compress = 1; 430 will_compress = 1;
431 } 431 }
@@ -1555,6 +1555,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1555 struct inode *inode; 1555 struct inode *inode;
1556 u64 page_start; 1556 u64 page_start;
1557 u64 page_end; 1557 u64 page_end;
1558 int ret;
1558 1559
1559 fixup = container_of(work, struct btrfs_writepage_fixup, work); 1560 fixup = container_of(work, struct btrfs_writepage_fixup, work);
1560 page = fixup->page; 1561 page = fixup->page;
@@ -1582,12 +1583,21 @@ again:
1582 page_end, &cached_state, GFP_NOFS); 1583 page_end, &cached_state, GFP_NOFS);
1583 unlock_page(page); 1584 unlock_page(page);
1584 btrfs_start_ordered_extent(inode, ordered, 1); 1585 btrfs_start_ordered_extent(inode, ordered, 1);
1586 btrfs_put_ordered_extent(ordered);
1585 goto again; 1587 goto again;
1586 } 1588 }
1587 1589
1588 BUG(); 1590 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
1591 if (ret) {
1592 mapping_set_error(page->mapping, ret);
1593 end_extent_writepage(page, ret, page_start, page_end);
1594 ClearPageChecked(page);
1595 goto out;
1596 }
1597
1589 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); 1598 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1590 ClearPageChecked(page); 1599 ClearPageChecked(page);
1600 set_page_dirty(page);
1591out: 1601out:
1592 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, 1602 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1593 &cached_state, GFP_NOFS); 1603 &cached_state, GFP_NOFS);
@@ -1630,7 +1640,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1630 fixup->work.func = btrfs_writepage_fixup_worker; 1640 fixup->work.func = btrfs_writepage_fixup_worker;
1631 fixup->page = page; 1641 fixup->page = page;
1632 btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); 1642 btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
1633 return -EAGAIN; 1643 return -EBUSY;
1634} 1644}
1635 1645
1636static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, 1646static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -1863,7 +1873,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1863 } else { 1873 } else {
1864 ret = get_state_private(io_tree, start, &private); 1874 ret = get_state_private(io_tree, start, &private);
1865 } 1875 }
1866 kaddr = kmap_atomic(page, KM_USER0); 1876 kaddr = kmap_atomic(page);
1867 if (ret) 1877 if (ret)
1868 goto zeroit; 1878 goto zeroit;
1869 1879
@@ -1872,7 +1882,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1872 if (csum != private) 1882 if (csum != private)
1873 goto zeroit; 1883 goto zeroit;
1874 1884
1875 kunmap_atomic(kaddr, KM_USER0); 1885 kunmap_atomic(kaddr);
1876good: 1886good:
1877 return 0; 1887 return 0;
1878 1888
@@ -1884,7 +1894,7 @@ zeroit:
1884 (unsigned long long)private); 1894 (unsigned long long)private);
1885 memset(kaddr + offset, 1, end - start + 1); 1895 memset(kaddr + offset, 1, end - start + 1);
1886 flush_dcache_page(page); 1896 flush_dcache_page(page);
1887 kunmap_atomic(kaddr, KM_USER0); 1897 kunmap_atomic(kaddr);
1888 if (private == 0) 1898 if (private == 0)
1889 return 0; 1899 return 0;
1890 return -EIO; 1900 return -EIO;
@@ -4575,7 +4585,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4575 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4585 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4576 parent_inode, &key, 4586 parent_inode, &key,
4577 btrfs_inode_type(inode), index); 4587 btrfs_inode_type(inode), index);
4578 BUG_ON(ret); 4588 if (ret)
4589 goto fail_dir_item;
4579 4590
4580 btrfs_i_size_write(parent_inode, parent_inode->i_size + 4591 btrfs_i_size_write(parent_inode, parent_inode->i_size +
4581 name_len * 2); 4592 name_len * 2);
@@ -4583,6 +4594,23 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4583 ret = btrfs_update_inode(trans, root, parent_inode); 4594 ret = btrfs_update_inode(trans, root, parent_inode);
4584 } 4595 }
4585 return ret; 4596 return ret;
4597
4598fail_dir_item:
4599 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4600 u64 local_index;
4601 int err;
4602 err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
4603 key.objectid, root->root_key.objectid,
4604 parent_ino, &local_index, name, name_len);
4605
4606 } else if (add_backref) {
4607 u64 local_index;
4608 int err;
4609
4610 err = btrfs_del_inode_ref(trans, root, name, name_len,
4611 ino, parent_ino, &local_index);
4612 }
4613 return ret;
4586} 4614}
4587 4615
4588static int btrfs_add_nondir(struct btrfs_trans_handle *trans, 4616static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
@@ -4909,12 +4937,12 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4909 ret = btrfs_decompress(compress_type, tmp, page, 4937 ret = btrfs_decompress(compress_type, tmp, page,
4910 extent_offset, inline_size, max_size); 4938 extent_offset, inline_size, max_size);
4911 if (ret) { 4939 if (ret) {
4912 char *kaddr = kmap_atomic(page, KM_USER0); 4940 char *kaddr = kmap_atomic(page);
4913 unsigned long copy_size = min_t(u64, 4941 unsigned long copy_size = min_t(u64,
4914 PAGE_CACHE_SIZE - pg_offset, 4942 PAGE_CACHE_SIZE - pg_offset,
4915 max_size - extent_offset); 4943 max_size - extent_offset);
4916 memset(kaddr + pg_offset, 0, copy_size); 4944 memset(kaddr + pg_offset, 0, copy_size);
4917 kunmap_atomic(kaddr, KM_USER0); 4945 kunmap_atomic(kaddr);
4918 } 4946 }
4919 kfree(tmp); 4947 kfree(tmp);
4920 return 0; 4948 return 0;
@@ -5691,11 +5719,11 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5691 unsigned long flags; 5719 unsigned long flags;
5692 5720
5693 local_irq_save(flags); 5721 local_irq_save(flags);
5694 kaddr = kmap_atomic(page, KM_IRQ0); 5722 kaddr = kmap_atomic(page);
5695 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 5723 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5696 csum, bvec->bv_len); 5724 csum, bvec->bv_len);
5697 btrfs_csum_final(csum, (char *)&csum); 5725 btrfs_csum_final(csum, (char *)&csum);
5698 kunmap_atomic(kaddr, KM_IRQ0); 5726 kunmap_atomic(kaddr);
5699 local_irq_restore(flags); 5727 local_irq_restore(flags);
5700 5728
5701 flush_dcache_page(bvec->bv_page); 5729 flush_dcache_page(bvec->bv_page);
@@ -6696,8 +6724,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6696 int err; 6724 int err;
6697 u64 index = 0; 6725 u64 index = 0;
6698 6726
6699 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, 6727 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
6700 new_dirid, S_IFDIR | 0700, &index); 6728 new_dirid, new_dirid,
6729 S_IFDIR | (~current_umask() & S_IRWXUGO),
6730 &index);
6701 if (IS_ERR(inode)) 6731 if (IS_ERR(inode))
6702 return PTR_ERR(inode); 6732 return PTR_ERR(inode);
6703 inode->i_op = &btrfs_dir_inode_operations; 6733 inode->i_op = &btrfs_dir_inode_operations;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 03bb62a9ee24..d8b54715c2de 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -861,6 +861,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
861 int i_done; 861 int i_done;
862 struct btrfs_ordered_extent *ordered; 862 struct btrfs_ordered_extent *ordered;
863 struct extent_state *cached_state = NULL; 863 struct extent_state *cached_state = NULL;
864 struct extent_io_tree *tree;
864 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 865 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
865 866
866 if (isize == 0) 867 if (isize == 0)
@@ -871,18 +872,34 @@ static int cluster_pages_for_defrag(struct inode *inode,
871 num_pages << PAGE_CACHE_SHIFT); 872 num_pages << PAGE_CACHE_SHIFT);
872 if (ret) 873 if (ret)
873 return ret; 874 return ret;
874again:
875 ret = 0;
876 i_done = 0; 875 i_done = 0;
876 tree = &BTRFS_I(inode)->io_tree;
877 877
878 /* step one, lock all the pages */ 878 /* step one, lock all the pages */
879 for (i = 0; i < num_pages; i++) { 879 for (i = 0; i < num_pages; i++) {
880 struct page *page; 880 struct page *page;
881again:
881 page = find_or_create_page(inode->i_mapping, 882 page = find_or_create_page(inode->i_mapping,
882 start_index + i, mask); 883 start_index + i, mask);
883 if (!page) 884 if (!page)
884 break; 885 break;
885 886
887 page_start = page_offset(page);
888 page_end = page_start + PAGE_CACHE_SIZE - 1;
889 while (1) {
890 lock_extent(tree, page_start, page_end, GFP_NOFS);
891 ordered = btrfs_lookup_ordered_extent(inode,
892 page_start);
893 unlock_extent(tree, page_start, page_end, GFP_NOFS);
894 if (!ordered)
895 break;
896
897 unlock_page(page);
898 btrfs_start_ordered_extent(inode, ordered, 1);
899 btrfs_put_ordered_extent(ordered);
900 lock_page(page);
901 }
902
886 if (!PageUptodate(page)) { 903 if (!PageUptodate(page)) {
887 btrfs_readpage(NULL, page); 904 btrfs_readpage(NULL, page);
888 lock_page(page); 905 lock_page(page);
@@ -893,15 +910,22 @@ again:
893 break; 910 break;
894 } 911 }
895 } 912 }
913
896 isize = i_size_read(inode); 914 isize = i_size_read(inode);
897 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 915 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
898 if (!isize || page->index > file_end || 916 if (!isize || page->index > file_end) {
899 page->mapping != inode->i_mapping) {
900 /* whoops, we blew past eof, skip this page */ 917 /* whoops, we blew past eof, skip this page */
901 unlock_page(page); 918 unlock_page(page);
902 page_cache_release(page); 919 page_cache_release(page);
903 break; 920 break;
904 } 921 }
922
923 if (page->mapping != inode->i_mapping) {
924 unlock_page(page);
925 page_cache_release(page);
926 goto again;
927 }
928
905 pages[i] = page; 929 pages[i] = page;
906 i_done++; 930 i_done++;
907 } 931 }
@@ -924,25 +948,6 @@ again:
924 lock_extent_bits(&BTRFS_I(inode)->io_tree, 948 lock_extent_bits(&BTRFS_I(inode)->io_tree,
925 page_start, page_end - 1, 0, &cached_state, 949 page_start, page_end - 1, 0, &cached_state,
926 GFP_NOFS); 950 GFP_NOFS);
927 ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
928 if (ordered &&
929 ordered->file_offset + ordered->len > page_start &&
930 ordered->file_offset < page_end) {
931 btrfs_put_ordered_extent(ordered);
932 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
933 page_start, page_end - 1,
934 &cached_state, GFP_NOFS);
935 for (i = 0; i < i_done; i++) {
936 unlock_page(pages[i]);
937 page_cache_release(pages[i]);
938 }
939 btrfs_wait_ordered_range(inode, page_start,
940 page_end - page_start);
941 goto again;
942 }
943 if (ordered)
944 btrfs_put_ordered_extent(ordered);
945
946 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 951 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
947 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 952 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
948 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, 953 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
@@ -1327,6 +1332,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1327 goto out; 1332 goto out;
1328 } 1333 }
1329 1334
1335 if (name[0] == '.' &&
1336 (namelen == 1 || (name[1] == '.' && namelen == 2))) {
1337 ret = -EEXIST;
1338 goto out;
1339 }
1340
1330 if (subvol) { 1341 if (subvol) {
1331 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1342 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1332 NULL, transid, readonly); 1343 NULL, transid, readonly);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a178f5ebea78..743b86fa4fcb 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -411,9 +411,9 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
411 411
412 bytes = min_t(unsigned long, destlen, out_len - start_byte); 412 bytes = min_t(unsigned long, destlen, out_len - start_byte);
413 413
414 kaddr = kmap_atomic(dest_page, KM_USER0); 414 kaddr = kmap_atomic(dest_page);
415 memcpy(kaddr, workspace->buf + start_byte, bytes); 415 memcpy(kaddr, workspace->buf + start_byte, bytes);
416 kunmap_atomic(kaddr, KM_USER0); 416 kunmap_atomic(kaddr);
417out: 417out:
418 return ret; 418 return ret;
419} 419}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2373b39a132b..22db04550f6a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -305,7 +305,7 @@ again:
305 305
306 spin_lock(&fs_info->reada_lock); 306 spin_lock(&fs_info->reada_lock);
307 ret = radix_tree_insert(&dev->reada_zones, 307 ret = radix_tree_insert(&dev->reada_zones,
308 (unsigned long)zone->end >> PAGE_CACHE_SHIFT, 308 (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
309 zone); 309 zone);
310 spin_unlock(&fs_info->reada_lock); 310 spin_unlock(&fs_info->reada_lock);
311 311
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9770cc5bfb76..390e7102b0ff 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -591,7 +591,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
591 u64 flags = sbio->spag[ix].flags; 591 u64 flags = sbio->spag[ix].flags;
592 592
593 page = sbio->bio->bi_io_vec[ix].bv_page; 593 page = sbio->bio->bi_io_vec[ix].bv_page;
594 buffer = kmap_atomic(page, KM_USER0); 594 buffer = kmap_atomic(page);
595 if (flags & BTRFS_EXTENT_FLAG_DATA) { 595 if (flags & BTRFS_EXTENT_FLAG_DATA) {
596 ret = scrub_checksum_data(sbio->sdev, 596 ret = scrub_checksum_data(sbio->sdev,
597 sbio->spag + ix, buffer); 597 sbio->spag + ix, buffer);
@@ -603,7 +603,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
603 } else { 603 } else {
604 WARN_ON(1); 604 WARN_ON(1);
605 } 605 }
606 kunmap_atomic(buffer, KM_USER0); 606 kunmap_atomic(buffer);
607 607
608 return ret; 608 return ret;
609} 609}
@@ -792,7 +792,7 @@ static void scrub_checksum(struct btrfs_work *work)
792 } 792 }
793 for (i = 0; i < sbio->count; ++i) { 793 for (i = 0; i < sbio->count; ++i) {
794 page = sbio->bio->bi_io_vec[i].bv_page; 794 page = sbio->bio->bi_io_vec[i].bv_page;
795 buffer = kmap_atomic(page, KM_USER0); 795 buffer = kmap_atomic(page);
796 flags = sbio->spag[i].flags; 796 flags = sbio->spag[i].flags;
797 logical = sbio->logical + i * PAGE_SIZE; 797 logical = sbio->logical + i * PAGE_SIZE;
798 ret = 0; 798 ret = 0;
@@ -807,7 +807,7 @@ static void scrub_checksum(struct btrfs_work *work)
807 } else { 807 } else {
808 WARN_ON(1); 808 WARN_ON(1);
809 } 809 }
810 kunmap_atomic(buffer, KM_USER0); 810 kunmap_atomic(buffer);
811 if (ret) { 811 if (ret) {
812 ret = scrub_recheck_error(sbio, i); 812 ret = scrub_recheck_error(sbio, i);
813 if (!ret) { 813 if (!ret) {
@@ -1367,7 +1367,8 @@ out:
1367} 1367}
1368 1368
1369static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, 1369static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
1370 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length) 1370 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
1371 u64 dev_offset)
1371{ 1372{
1372 struct btrfs_mapping_tree *map_tree = 1373 struct btrfs_mapping_tree *map_tree =
1373 &sdev->dev->dev_root->fs_info->mapping_tree; 1374 &sdev->dev->dev_root->fs_info->mapping_tree;
@@ -1391,7 +1392,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
1391 goto out; 1392 goto out;
1392 1393
1393 for (i = 0; i < map->num_stripes; ++i) { 1394 for (i = 0; i < map->num_stripes; ++i) {
1394 if (map->stripes[i].dev == sdev->dev) { 1395 if (map->stripes[i].dev == sdev->dev &&
1396 map->stripes[i].physical == dev_offset) {
1395 ret = scrub_stripe(sdev, map, i, chunk_offset, length); 1397 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1396 if (ret) 1398 if (ret)
1397 goto out; 1399 goto out;
@@ -1487,7 +1489,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1487 break; 1489 break;
1488 } 1490 }
1489 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1491 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1490 chunk_offset, length); 1492 chunk_offset, length, found_key.offset);
1491 btrfs_put_block_group(cache); 1493 btrfs_put_block_group(cache);
1492 if (ret) 1494 if (ret)
1493 break; 1495 break;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3ce97b217cbe..81df3fec6a6d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -629,7 +629,6 @@ static int btrfs_fill_super(struct super_block *sb,
629 void *data, int silent) 629 void *data, int silent)
630{ 630{
631 struct inode *inode; 631 struct inode *inode;
632 struct dentry *root_dentry;
633 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 632 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
634 struct btrfs_key key; 633 struct btrfs_key key;
635 int err; 634 int err;
@@ -660,15 +659,12 @@ static int btrfs_fill_super(struct super_block *sb,
660 goto fail_close; 659 goto fail_close;
661 } 660 }
662 661
663 root_dentry = d_alloc_root(inode); 662 sb->s_root = d_make_root(inode);
664 if (!root_dentry) { 663 if (!sb->s_root) {
665 iput(inode);
666 err = -ENOMEM; 664 err = -ENOMEM;
667 goto fail_close; 665 goto fail_close;
668 } 666 }
669 667
670 sb->s_root = root_dentry;
671
672 save_mount_options(sb, data); 668 save_mount_options(sb, data);
673 cleancache_init_fs(sb); 669 cleancache_init_fs(sb);
674 sb->s_flags |= MS_ACTIVE; 670 sb->s_flags |= MS_ACTIVE;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 287a6728b1ad..04b77e3ceb7a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -327,7 +327,8 @@ again:
327 327
328 if (num_bytes) { 328 if (num_bytes) {
329 trace_btrfs_space_reservation(root->fs_info, "transaction", 329 trace_btrfs_space_reservation(root->fs_info, "transaction",
330 (u64)h, num_bytes, 1); 330 (u64)(unsigned long)h,
331 num_bytes, 1);
331 h->block_rsv = &root->fs_info->trans_block_rsv; 332 h->block_rsv = &root->fs_info->trans_block_rsv;
332 h->bytes_reserved = num_bytes; 333 h->bytes_reserved = num_bytes;
333 } 334 }
@@ -915,7 +916,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
915 dentry->d_name.name, dentry->d_name.len, 916 dentry->d_name.name, dentry->d_name.len,
916 parent_inode, &key, 917 parent_inode, &key,
917 BTRFS_FT_DIR, index); 918 BTRFS_FT_DIR, index);
918 BUG_ON(ret); 919 if (ret) {
920 pending->error = -EEXIST;
921 dput(parent);
922 goto fail;
923 }
919 924
920 btrfs_i_size_write(parent_inode, parent_inode->i_size + 925 btrfs_i_size_write(parent_inode, parent_inode->i_size +
921 dentry->d_name.len * 2); 926 dentry->d_name.len * 2);
@@ -993,12 +998,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
993{ 998{
994 struct btrfs_pending_snapshot *pending; 999 struct btrfs_pending_snapshot *pending;
995 struct list_head *head = &trans->transaction->pending_snapshots; 1000 struct list_head *head = &trans->transaction->pending_snapshots;
996 int ret;
997 1001
998 list_for_each_entry(pending, head, list) { 1002 list_for_each_entry(pending, head, list)
999 ret = create_pending_snapshot(trans, fs_info, pending); 1003 create_pending_snapshot(trans, fs_info, pending);
1000 BUG_ON(ret);
1001 }
1002 return 0; 1004 return 0;
1003} 1005}
1004 1006
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0b4e2af7954d..ef41f285a475 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -459,12 +459,23 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
459{ 459{
460 struct btrfs_device *device, *next; 460 struct btrfs_device *device, *next;
461 461
462 struct block_device *latest_bdev = NULL;
463 u64 latest_devid = 0;
464 u64 latest_transid = 0;
465
462 mutex_lock(&uuid_mutex); 466 mutex_lock(&uuid_mutex);
463again: 467again:
464 /* This is the initialized path, it is safe to release the devices. */ 468 /* This is the initialized path, it is safe to release the devices. */
465 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 469 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
466 if (device->in_fs_metadata) 470 if (device->in_fs_metadata) {
471 if (!latest_transid ||
472 device->generation > latest_transid) {
473 latest_devid = device->devid;
474 latest_transid = device->generation;
475 latest_bdev = device->bdev;
476 }
467 continue; 477 continue;
478 }
468 479
469 if (device->bdev) { 480 if (device->bdev) {
470 blkdev_put(device->bdev, device->mode); 481 blkdev_put(device->bdev, device->mode);
@@ -487,6 +498,10 @@ again:
487 goto again; 498 goto again;
488 } 499 }
489 500
501 fs_devices->latest_bdev = latest_bdev;
502 fs_devices->latest_devid = latest_devid;
503 fs_devices->latest_trans = latest_transid;
504
490 mutex_unlock(&uuid_mutex); 505 mutex_unlock(&uuid_mutex);
491 return 0; 506 return 0;
492} 507}
@@ -1953,7 +1968,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1953 em = lookup_extent_mapping(em_tree, chunk_offset, 1); 1968 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1954 read_unlock(&em_tree->lock); 1969 read_unlock(&em_tree->lock);
1955 1970
1956 BUG_ON(em->start > chunk_offset || 1971 BUG_ON(!em || em->start > chunk_offset ||
1957 em->start + em->len < chunk_offset); 1972 em->start + em->len < chunk_offset);
1958 map = (struct map_lookup *)em->bdev; 1973 map = (struct map_lookup *)em->bdev;
1959 1974
@@ -4356,6 +4371,20 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4356 return -ENOMEM; 4371 return -ENOMEM;
4357 btrfs_set_buffer_uptodate(sb); 4372 btrfs_set_buffer_uptodate(sb);
4358 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); 4373 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
4374 /*
4375 * The sb extent buffer is artifical and just used to read the system array.
4376 * btrfs_set_buffer_uptodate() call does not properly mark all it's
4377 * pages up-to-date when the page is larger: extent does not cover the
4378 * whole page and consequently check_page_uptodate does not find all
4379 * the page's extents up-to-date (the hole beyond sb),
4380 * write_extent_buffer then triggers a WARN_ON.
4381 *
4382 * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
4383 * but sb spans only this function. Add an explicit SetPageUptodate call
4384 * to silence the warning eg. on PowerPC 64.
4385 */
4386 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
4387 SetPageUptodate(sb->first_page);
4359 4388
4360 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 4389 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
4361 array_size = btrfs_super_sys_array_size(super_copy); 4390 array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index faccd47c6c46..92c20654cc55 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -370,9 +370,9 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
370 PAGE_CACHE_SIZE - buf_offset); 370 PAGE_CACHE_SIZE - buf_offset);
371 bytes = min(bytes, bytes_left); 371 bytes = min(bytes, bytes_left);
372 372
373 kaddr = kmap_atomic(dest_page, KM_USER0); 373 kaddr = kmap_atomic(dest_page);
374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); 374 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
375 kunmap_atomic(kaddr, KM_USER0); 375 kunmap_atomic(kaddr);
376 376
377 pg_offset += bytes; 377 pg_offset += bytes;
378 bytes_left -= bytes; 378 bytes_left -= bytes;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index a0358c2189cb..7f0771d3894e 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -646,7 +646,8 @@ lookup_again:
646 * (this is used to keep track of culling, and atimes are only 646 * (this is used to keep track of culling, and atimes are only
647 * updated by read, write and readdir but not lookup or 647 * updated by read, write and readdir but not lookup or
648 * open) */ 648 * open) */
649 touch_atime(cache->mnt, next); 649 path.dentry = next;
650 touch_atime(&path);
650 } 651 }
651 652
652 /* open a file interface onto a data file */ 653 /* open a file interface onto a data file */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 00de2c9568cd..256f85221926 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -655,9 +655,8 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
655 dout("open_root_inode success\n"); 655 dout("open_root_inode success\n");
656 if (ceph_ino(inode) == CEPH_INO_ROOT && 656 if (ceph_ino(inode) == CEPH_INO_ROOT &&
657 fsc->sb->s_root == NULL) { 657 fsc->sb->s_root == NULL) {
658 root = d_alloc_root(inode); 658 root = d_make_root(inode);
659 if (!root) { 659 if (!root) {
660 iput(inode);
661 root = ERR_PTR(-ENOMEM); 660 root = ERR_PTR(-ENOMEM);
662 goto out; 661 goto out;
663 } 662 }
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c1b254487388..3cc1b251ca08 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -556,6 +556,7 @@ init_cifs_idmap(void)
556 556
557 /* instruct request_key() to use this special keyring as a cache for 557 /* instruct request_key() to use this special keyring as a cache for
558 * the results it looks up */ 558 * the results it looks up */
559 set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
559 cred->thread_keyring = keyring; 560 cred->thread_keyring = keyring;
560 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; 561 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
561 root_cred = cred; 562 root_cred = cred;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b1fd382d1952..418fc42fb8b2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -119,12 +119,10 @@ cifs_read_super(struct super_block *sb)
119 119
120 if (IS_ERR(inode)) { 120 if (IS_ERR(inode)) {
121 rc = PTR_ERR(inode); 121 rc = PTR_ERR(inode);
122 inode = NULL;
123 goto out_no_root; 122 goto out_no_root;
124 } 123 }
125 124
126 sb->s_root = d_alloc_root(inode); 125 sb->s_root = d_make_root(inode);
127
128 if (!sb->s_root) { 126 if (!sb->s_root) {
129 rc = -ENOMEM; 127 rc = -ENOMEM;
130 goto out_no_root; 128 goto out_no_root;
@@ -147,9 +145,6 @@ cifs_read_super(struct super_block *sb)
147 145
148out_no_root: 146out_no_root:
149 cERROR(1, "cifs_read_super: get root inode failed"); 147 cERROR(1, "cifs_read_super: get root inode failed");
150 if (inode)
151 iput(inode);
152
153 return rc; 148 return rc;
154} 149}
155 150
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 63a196b97d50..bc7e24420ac0 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -584,10 +584,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
584 * If either that or op not supported returned, follow 584 * If either that or op not supported returned, follow
585 * the normal lookup. 585 * the normal lookup.
586 */ 586 */
587 if ((rc == 0) || (rc == -ENOENT)) 587 switch (rc) {
588 case 0:
589 /*
590 * The server may allow us to open things like
591 * FIFOs, but the client isn't set up to deal
592 * with that. If it's not a regular file, just
593 * close it and proceed as if it were a normal
594 * lookup.
595 */
596 if (newInode && !S_ISREG(newInode->i_mode)) {
597 CIFSSMBClose(xid, pTcon, fileHandle);
598 break;
599 }
600 case -ENOENT:
588 posix_open = true; 601 posix_open = true;
589 else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) 602 case -EOPNOTSUPP:
603 break;
604 default:
590 pTcon->broken_posix_open = true; 605 pTcon->broken_posix_open = true;
606 }
591 } 607 }
592 if (!posix_open) 608 if (!posix_open)
593 rc = cifs_get_inode_info_unix(&newInode, full_path, 609 rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4dd9283885e7..5e64748a2917 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -920,16 +920,26 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
920 for (lockp = &inode->i_flock; *lockp != NULL; \ 920 for (lockp = &inode->i_flock; *lockp != NULL; \
921 lockp = &(*lockp)->fl_next) 921 lockp = &(*lockp)->fl_next)
922 922
923struct lock_to_push {
924 struct list_head llist;
925 __u64 offset;
926 __u64 length;
927 __u32 pid;
928 __u16 netfid;
929 __u8 type;
930};
931
923static int 932static int
924cifs_push_posix_locks(struct cifsFileInfo *cfile) 933cifs_push_posix_locks(struct cifsFileInfo *cfile)
925{ 934{
926 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 935 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
927 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 936 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
928 struct file_lock *flock, **before; 937 struct file_lock *flock, **before;
929 struct cifsLockInfo *lck, *tmp; 938 unsigned int count = 0, i = 0;
930 int rc = 0, xid, type; 939 int rc = 0, xid, type;
940 struct list_head locks_to_send, *el;
941 struct lock_to_push *lck, *tmp;
931 __u64 length; 942 __u64 length;
932 struct list_head locks_to_send;
933 943
934 xid = GetXid(); 944 xid = GetXid();
935 945
@@ -940,29 +950,55 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
940 return rc; 950 return rc;
941 } 951 }
942 952
953 lock_flocks();
954 cifs_for_each_lock(cfile->dentry->d_inode, before) {
955 if ((*before)->fl_flags & FL_POSIX)
956 count++;
957 }
958 unlock_flocks();
959
943 INIT_LIST_HEAD(&locks_to_send); 960 INIT_LIST_HEAD(&locks_to_send);
944 961
962 /*
963 * Allocating count locks is enough because no locks can be added to
964 * the list while we are holding cinode->lock_mutex that protects
965 * locking operations of this inode.
966 */
967 for (; i < count; i++) {
968 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
969 if (!lck) {
970 rc = -ENOMEM;
971 goto err_out;
972 }
973 list_add_tail(&lck->llist, &locks_to_send);
974 }
975
976 i = 0;
977 el = locks_to_send.next;
945 lock_flocks(); 978 lock_flocks();
946 cifs_for_each_lock(cfile->dentry->d_inode, before) { 979 cifs_for_each_lock(cfile->dentry->d_inode, before) {
980 if (el == &locks_to_send) {
981 /* something is really wrong */
982 cERROR(1, "Can't push all brlocks!");
983 break;
984 }
947 flock = *before; 985 flock = *before;
986 if ((flock->fl_flags & FL_POSIX) == 0)
987 continue;
948 length = 1 + flock->fl_end - flock->fl_start; 988 length = 1 + flock->fl_end - flock->fl_start;
949 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 989 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
950 type = CIFS_RDLCK; 990 type = CIFS_RDLCK;
951 else 991 else
952 type = CIFS_WRLCK; 992 type = CIFS_WRLCK;
953 993 lck = list_entry(el, struct lock_to_push, llist);
954 lck = cifs_lock_init(flock->fl_start, length, type,
955 cfile->netfid);
956 if (!lck) {
957 rc = -ENOMEM;
958 goto send_locks;
959 }
960 lck->pid = flock->fl_pid; 994 lck->pid = flock->fl_pid;
961 995 lck->netfid = cfile->netfid;
962 list_add_tail(&lck->llist, &locks_to_send); 996 lck->length = length;
997 lck->type = type;
998 lck->offset = flock->fl_start;
999 i++;
1000 el = el->next;
963 } 1001 }
964
965send_locks:
966 unlock_flocks(); 1002 unlock_flocks();
967 1003
968 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1004 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
@@ -979,11 +1015,18 @@ send_locks:
979 kfree(lck); 1015 kfree(lck);
980 } 1016 }
981 1017
1018out:
982 cinode->can_cache_brlcks = false; 1019 cinode->can_cache_brlcks = false;
983 mutex_unlock(&cinode->lock_mutex); 1020 mutex_unlock(&cinode->lock_mutex);
984 1021
985 FreeXid(xid); 1022 FreeXid(xid);
986 return rc; 1023 return rc;
1024err_out:
1025 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1026 list_del(&lck->llist);
1027 kfree(lck);
1028 }
1029 goto out;
987} 1030}
988 1031
989static int 1032static int
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a5f54b7d9822..745da3d0653e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -534,6 +534,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
534 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { 534 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
535 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; 535 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
536 fattr->cf_dtype = DT_DIR; 536 fattr->cf_dtype = DT_DIR;
537 /*
538 * Server can return wrong NumberOfLinks value for directories
539 * when Unix extensions are disabled - fake it.
540 */
541 fattr->cf_nlink = 2;
537 } else { 542 } else {
538 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; 543 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
539 fattr->cf_dtype = DT_REG; 544 fattr->cf_dtype = DT_REG;
@@ -541,9 +546,9 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
541 /* clear write bits if ATTR_READONLY is set */ 546 /* clear write bits if ATTR_READONLY is set */
542 if (fattr->cf_cifsattrs & ATTR_READONLY) 547 if (fattr->cf_cifsattrs & ATTR_READONLY)
543 fattr->cf_mode &= ~(S_IWUGO); 548 fattr->cf_mode &= ~(S_IWUGO);
544 }
545 549
546 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); 550 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
551 }
547 552
548 fattr->cf_uid = cifs_sb->mnt_uid; 553 fattr->cf_uid = cifs_sb->mnt_uid;
549 fattr->cf_gid = cifs_sb->mnt_gid; 554 fattr->cf_gid = cifs_sb->mnt_gid;
@@ -1322,7 +1327,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
1322 } 1327 }
1323/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need 1328/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
1324 to set uid/gid */ 1329 to set uid/gid */
1325 inc_nlink(inode);
1326 1330
1327 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1331 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1328 cifs_fill_uniqueid(inode->i_sb, &fattr); 1332 cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1355,7 +1359,6 @@ mkdir_retry_old:
1355 d_drop(direntry); 1359 d_drop(direntry);
1356 } else { 1360 } else {
1357mkdir_get_info: 1361mkdir_get_info:
1358 inc_nlink(inode);
1359 if (pTcon->unix_ext) 1362 if (pTcon->unix_ext)
1360 rc = cifs_get_inode_info_unix(&newinode, full_path, 1363 rc = cifs_get_inode_info_unix(&newinode, full_path,
1361 inode->i_sb, xid); 1364 inode->i_sb, xid);
@@ -1436,6 +1439,11 @@ mkdir_get_info:
1436 } 1439 }
1437 } 1440 }
1438mkdir_out: 1441mkdir_out:
1442 /*
1443 * Force revalidate to get parent dir info when needed since cached
1444 * attributes are invalid now.
1445 */
1446 CIFS_I(inode)->time = 0;
1439 kfree(full_path); 1447 kfree(full_path);
1440 FreeXid(xid); 1448 FreeXid(xid);
1441 cifs_put_tlink(tlink); 1449 cifs_put_tlink(tlink);
@@ -1475,7 +1483,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1475 cifs_put_tlink(tlink); 1483 cifs_put_tlink(tlink);
1476 1484
1477 if (!rc) { 1485 if (!rc) {
1478 drop_nlink(inode);
1479 spin_lock(&direntry->d_inode->i_lock); 1486 spin_lock(&direntry->d_inode->i_lock);
1480 i_size_write(direntry->d_inode, 0); 1487 i_size_write(direntry->d_inode, 0);
1481 clear_nlink(direntry->d_inode); 1488 clear_nlink(direntry->d_inode);
@@ -1483,12 +1490,15 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1483 } 1490 }
1484 1491
1485 cifsInode = CIFS_I(direntry->d_inode); 1492 cifsInode = CIFS_I(direntry->d_inode);
1486 cifsInode->time = 0; /* force revalidate to go get info when 1493 /* force revalidate to go get info when needed */
1487 needed */ 1494 cifsInode->time = 0;
1488 1495
1489 cifsInode = CIFS_I(inode); 1496 cifsInode = CIFS_I(inode);
1490 cifsInode->time = 0; /* force revalidate to get parent dir info 1497 /*
1491 since cached search results now invalid */ 1498 * Force revalidate to get parent dir info when needed since cached
1499 * attributes are invalid now.
1500 */
1501 cifsInode->time = 0;
1492 1502
1493 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime = 1503 direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
1494 current_fs_time(inode->i_sb); 1504 current_fs_time(inode->i_sb);
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 45f07c46f3ed..10d92cf57ab6 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -105,7 +105,6 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
105 struct cifs_tcon *pTcon; 105 struct cifs_tcon *pTcon;
106 struct super_block *sb; 106 struct super_block *sb;
107 char *full_path; 107 char *full_path;
108 struct cifs_ntsd *pacl;
109 108
110 if (direntry == NULL) 109 if (direntry == NULL)
111 return -EIO; 110 return -EIO;
@@ -164,23 +163,24 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
164 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 163 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
165 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, 164 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
166 strlen(CIFS_XATTR_CIFS_ACL)) == 0) { 165 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
166#ifdef CONFIG_CIFS_ACL
167 struct cifs_ntsd *pacl;
167 pacl = kmalloc(value_size, GFP_KERNEL); 168 pacl = kmalloc(value_size, GFP_KERNEL);
168 if (!pacl) { 169 if (!pacl) {
169 cFYI(1, "%s: Can't allocate memory for ACL", 170 cFYI(1, "%s: Can't allocate memory for ACL",
170 __func__); 171 __func__);
171 rc = -ENOMEM; 172 rc = -ENOMEM;
172 } else { 173 } else {
173#ifdef CONFIG_CIFS_ACL
174 memcpy(pacl, ea_value, value_size); 174 memcpy(pacl, ea_value, value_size);
175 rc = set_cifs_acl(pacl, value_size, 175 rc = set_cifs_acl(pacl, value_size,
176 direntry->d_inode, full_path, CIFS_ACL_DACL); 176 direntry->d_inode, full_path, CIFS_ACL_DACL);
177 if (rc == 0) /* force revalidate of the inode */ 177 if (rc == 0) /* force revalidate of the inode */
178 CIFS_I(direntry->d_inode)->time = 0; 178 CIFS_I(direntry->d_inode)->time = 0;
179 kfree(pacl); 179 kfree(pacl);
180 }
180#else 181#else
181 cFYI(1, "Set CIFS ACL not supported yet"); 182 cFYI(1, "Set CIFS ACL not supported yet");
182#endif /* CONFIG_CIFS_ACL */ 183#endif /* CONFIG_CIFS_ACL */
183 }
184 } else { 184 } else {
185 int temp; 185 int temp;
186 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 186 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5e2e1b3f068d..05156c17b551 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -208,13 +208,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
208 if (IS_ERR(root)) { 208 if (IS_ERR(root)) {
209 error = PTR_ERR(root); 209 error = PTR_ERR(root);
210 printk("Failure of coda_cnode_make for root: error %d\n", error); 210 printk("Failure of coda_cnode_make for root: error %d\n", error);
211 root = NULL;
212 goto error; 211 goto error;
213 } 212 }
214 213
215 printk("coda_read_super: rootinode is %ld dev %s\n", 214 printk("coda_read_super: rootinode is %ld dev %s\n",
216 root->i_ino, root->i_sb->s_id); 215 root->i_ino, root->i_sb->s_id);
217 sb->s_root = d_alloc_root(root); 216 sb->s_root = d_make_root(root);
218 if (!sb->s_root) { 217 if (!sb->s_root) {
219 error = -EINVAL; 218 error = -EINVAL;
220 goto error; 219 goto error;
@@ -222,9 +221,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
222 return 0; 221 return 0;
223 222
224error: 223error:
225 if (root)
226 iput(root);
227
228 mutex_lock(&vc->vc_mutex); 224 mutex_lock(&vc->vc_mutex);
229 bdi_destroy(&vc->bdi); 225 bdi_destroy(&vc->bdi);
230 vc->vc_sb = NULL; 226 vc->vc_sb = NULL;
diff --git a/fs/compat.c b/fs/compat.c
index fa9d721ecfee..07880bae28a9 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -131,41 +131,35 @@ asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_tim
131 131
132static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) 132static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
133{ 133{
134 compat_ino_t ino = stat->ino; 134 struct compat_stat tmp;
135 typeof(ubuf->st_uid) uid = 0;
136 typeof(ubuf->st_gid) gid = 0;
137 int err;
138 135
139 SET_UID(uid, stat->uid); 136 if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
140 SET_GID(gid, stat->gid); 137 return -EOVERFLOW;
141 138
142 if ((u64) stat->size > MAX_NON_LFS || 139 memset(&tmp, 0, sizeof(tmp));
143 !old_valid_dev(stat->dev) || 140 tmp.st_dev = old_encode_dev(stat->dev);
144 !old_valid_dev(stat->rdev)) 141 tmp.st_ino = stat->ino;
142 if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
145 return -EOVERFLOW; 143 return -EOVERFLOW;
146 if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) 144 tmp.st_mode = stat->mode;
145 tmp.st_nlink = stat->nlink;
146 if (tmp.st_nlink != stat->nlink)
147 return -EOVERFLOW; 147 return -EOVERFLOW;
148 148 SET_UID(tmp.st_uid, stat->uid);
149 if (clear_user(ubuf, sizeof(*ubuf))) 149 SET_GID(tmp.st_gid, stat->gid);
150 return -EFAULT; 150 tmp.st_rdev = old_encode_dev(stat->rdev);
151 151 if ((u64) stat->size > MAX_NON_LFS)
152 err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); 152 return -EOVERFLOW;
153 err |= __put_user(ino, &ubuf->st_ino); 153 tmp.st_size = stat->size;
154 err |= __put_user(stat->mode, &ubuf->st_mode); 154 tmp.st_atime = stat->atime.tv_sec;
155 err |= __put_user(stat->nlink, &ubuf->st_nlink); 155 tmp.st_atime_nsec = stat->atime.tv_nsec;
156 err |= __put_user(uid, &ubuf->st_uid); 156 tmp.st_mtime = stat->mtime.tv_sec;
157 err |= __put_user(gid, &ubuf->st_gid); 157 tmp.st_mtime_nsec = stat->mtime.tv_nsec;
158 err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); 158 tmp.st_ctime = stat->ctime.tv_sec;
159 err |= __put_user(stat->size, &ubuf->st_size); 159 tmp.st_ctime_nsec = stat->ctime.tv_nsec;
160 err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); 160 tmp.st_blocks = stat->blocks;
161 err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); 161 tmp.st_blksize = stat->blksize;
162 err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); 162 return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
163 err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
164 err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
165 err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
166 err |= __put_user(stat->blksize, &ubuf->st_blksize);
167 err |= __put_user(stat->blocks, &ubuf->st_blocks);
168 return err;
169} 163}
170 164
171asmlinkage long compat_sys_newstat(const char __user * filename, 165asmlinkage long compat_sys_newstat(const char __user * filename,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a26bea10e81b..10d8cd90ca6f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -34,7 +34,7 @@
34#include <linux/fs.h> 34#include <linux/fs.h>
35#include <linux/file.h> 35#include <linux/file.h>
36#include <linux/ppp_defs.h> 36#include <linux/ppp_defs.h>
37#include <linux/if_ppp.h> 37#include <linux/ppp-ioctl.h>
38#include <linux/if_pppox.h> 38#include <linux/if_pppox.h>
39#include <linux/mtio.h> 39#include <linux/mtio.h>
40#include <linux/auto_fs.h> 40#include <linux/auto_fs.h>
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index ede857d20a04..b5f0a3b91f18 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -58,12 +58,11 @@ struct configfs_dirent {
58extern struct mutex configfs_symlink_mutex; 58extern struct mutex configfs_symlink_mutex;
59extern spinlock_t configfs_dirent_lock; 59extern spinlock_t configfs_dirent_lock;
60 60
61extern struct vfsmount * configfs_mount;
62extern struct kmem_cache *configfs_dir_cachep; 61extern struct kmem_cache *configfs_dir_cachep;
63 62
64extern int configfs_is_root(struct config_item *item); 63extern int configfs_is_root(struct config_item *item);
65 64
66extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *); 65extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *);
67extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *)); 66extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
68extern int configfs_inode_init(void); 67extern int configfs_inode_init(void);
69extern void configfs_inode_exit(void); 68extern void configfs_inode_exit(void);
@@ -80,15 +79,15 @@ extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
80extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); 79extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
81extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr); 80extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
82 81
83extern int configfs_pin_fs(void); 82extern struct dentry *configfs_pin_fs(void);
84extern void configfs_release_fs(void); 83extern void configfs_release_fs(void);
85 84
86extern struct rw_semaphore configfs_rename_sem; 85extern struct rw_semaphore configfs_rename_sem;
87extern struct super_block * configfs_sb;
88extern const struct file_operations configfs_dir_operations; 86extern const struct file_operations configfs_dir_operations;
89extern const struct file_operations configfs_file_operations; 87extern const struct file_operations configfs_file_operations;
90extern const struct file_operations bin_fops; 88extern const struct file_operations bin_fops;
91extern const struct inode_operations configfs_dir_inode_operations; 89extern const struct inode_operations configfs_dir_inode_operations;
90extern const struct inode_operations configfs_root_inode_operations;
92extern const struct inode_operations configfs_symlink_inode_operations; 91extern const struct inode_operations configfs_symlink_inode_operations;
93extern const struct dentry_operations configfs_dentry_ops; 92extern const struct dentry_operations configfs_dentry_ops;
94 93
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5ddd7ebd9dcd..7e6c52d8a207 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -264,11 +264,13 @@ static int init_symlink(struct inode * inode)
264 return 0; 264 return 0;
265} 265}
266 266
267static int create_dir(struct config_item * k, struct dentry * p, 267static int create_dir(struct config_item *k, struct dentry *d)
268 struct dentry * d)
269{ 268{
270 int error; 269 int error;
271 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 270 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
271 struct dentry *p = d->d_parent;
272
273 BUG_ON(!k);
272 274
273 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 275 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
274 if (!error) 276 if (!error)
@@ -304,19 +306,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
304 306
305static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 307static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
306{ 308{
307 struct dentry * parent; 309 int error = create_dir(item, dentry);
308 int error = 0;
309
310 BUG_ON(!item);
311
312 if (item->ci_parent)
313 parent = item->ci_parent->ci_dentry;
314 else if (configfs_mount)
315 parent = configfs_mount->mnt_root;
316 else
317 return -EFAULT;
318
319 error = create_dir(item,parent,dentry);
320 if (!error) 310 if (!error)
321 item->ci_dentry = dentry; 311 item->ci_dentry = dentry;
322 return error; 312 return error;
@@ -1079,23 +1069,24 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
1079 int ret; 1069 int ret;
1080 struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; 1070 struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
1081 struct config_item *s_item = &subsys->su_group.cg_item; 1071 struct config_item *s_item = &subsys->su_group.cg_item;
1072 struct dentry *root;
1082 1073
1083 /* 1074 /*
1084 * Pin the configfs filesystem. This means we can safely access 1075 * Pin the configfs filesystem. This means we can safely access
1085 * the root of the configfs filesystem. 1076 * the root of the configfs filesystem.
1086 */ 1077 */
1087 ret = configfs_pin_fs(); 1078 root = configfs_pin_fs();
1088 if (ret) 1079 if (IS_ERR(root))
1089 return ret; 1080 return PTR_ERR(root);
1090 1081
1091 /* 1082 /*
1092 * Next, lock the root directory. We're going to check that the 1083 * Next, lock the root directory. We're going to check that the
1093 * subsystem is really registered, and so we need to lock out 1084 * subsystem is really registered, and so we need to lock out
1094 * configfs_[un]register_subsystem(). 1085 * configfs_[un]register_subsystem().
1095 */ 1086 */
1096 mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); 1087 mutex_lock(&root->d_inode->i_mutex);
1097 1088
1098 root_sd = configfs_sb->s_root->d_fsdata; 1089 root_sd = root->d_fsdata;
1099 1090
1100 list_for_each_entry(p, &root_sd->s_children, s_sibling) { 1091 list_for_each_entry(p, &root_sd->s_children, s_sibling) {
1101 if (p->s_type & CONFIGFS_DIR) { 1092 if (p->s_type & CONFIGFS_DIR) {
@@ -1129,7 +1120,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
1129out_unlock_dirent_lock: 1120out_unlock_dirent_lock:
1130 spin_unlock(&configfs_dirent_lock); 1121 spin_unlock(&configfs_dirent_lock);
1131out_unlock_fs: 1122out_unlock_fs:
1132 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1123 mutex_unlock(&root->d_inode->i_mutex);
1133 1124
1134 /* 1125 /*
1135 * If we succeeded, the fs is pinned via other methods. If not, 1126 * If we succeeded, the fs is pinned via other methods. If not,
@@ -1183,11 +1174,6 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
1183 struct module *subsys_owner = NULL, *new_item_owner = NULL; 1174 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1184 char *name; 1175 char *name;
1185 1176
1186 if (dentry->d_parent == configfs_sb->s_root) {
1187 ret = -EPERM;
1188 goto out;
1189 }
1190
1191 sd = dentry->d_parent->d_fsdata; 1177 sd = dentry->d_parent->d_fsdata;
1192 1178
1193 /* 1179 /*
@@ -1359,9 +1345,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1359 struct module *subsys_owner = NULL, *dead_item_owner = NULL; 1345 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1360 int ret; 1346 int ret;
1361 1347
1362 if (dentry->d_parent == configfs_sb->s_root)
1363 return -EPERM;
1364
1365 sd = dentry->d_fsdata; 1348 sd = dentry->d_fsdata;
1366 if (sd->s_type & CONFIGFS_USET_DEFAULT) 1349 if (sd->s_type & CONFIGFS_USET_DEFAULT)
1367 return -EPERM; 1350 return -EPERM;
@@ -1459,6 +1442,11 @@ const struct inode_operations configfs_dir_inode_operations = {
1459 .setattr = configfs_setattr, 1442 .setattr = configfs_setattr,
1460}; 1443};
1461 1444
1445const struct inode_operations configfs_root_inode_operations = {
1446 .lookup = configfs_lookup,
1447 .setattr = configfs_setattr,
1448};
1449
1462#if 0 1450#if 0
1463int configfs_rename_dir(struct config_item * item, const char *new_name) 1451int configfs_rename_dir(struct config_item * item, const char *new_name)
1464{ 1452{
@@ -1546,6 +1534,7 @@ static inline unsigned char dt_type(struct configfs_dirent *sd)
1546static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir) 1534static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1547{ 1535{
1548 struct dentry *dentry = filp->f_path.dentry; 1536 struct dentry *dentry = filp->f_path.dentry;
1537 struct super_block *sb = dentry->d_sb;
1549 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1538 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1550 struct configfs_dirent *cursor = filp->private_data; 1539 struct configfs_dirent *cursor = filp->private_data;
1551 struct list_head *p, *q = &cursor->s_sibling; 1540 struct list_head *p, *q = &cursor->s_sibling;
@@ -1608,7 +1597,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1608 ino = inode->i_ino; 1597 ino = inode->i_ino;
1609 spin_unlock(&configfs_dirent_lock); 1598 spin_unlock(&configfs_dirent_lock);
1610 if (!inode) 1599 if (!inode)
1611 ino = iunique(configfs_sb, 2); 1600 ino = iunique(sb, 2);
1612 1601
1613 if (filldir(dirent, name, len, filp->f_pos, ino, 1602 if (filldir(dirent, name, len, filp->f_pos, ino,
1614 dt_type(next)) < 0) 1603 dt_type(next)) < 0)
@@ -1680,27 +1669,27 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1680 struct config_group *group = &subsys->su_group; 1669 struct config_group *group = &subsys->su_group;
1681 struct qstr name; 1670 struct qstr name;
1682 struct dentry *dentry; 1671 struct dentry *dentry;
1672 struct dentry *root;
1683 struct configfs_dirent *sd; 1673 struct configfs_dirent *sd;
1684 1674
1685 err = configfs_pin_fs(); 1675 root = configfs_pin_fs();
1686 if (err) 1676 if (IS_ERR(root))
1687 return err; 1677 return PTR_ERR(root);
1688 1678
1689 if (!group->cg_item.ci_name) 1679 if (!group->cg_item.ci_name)
1690 group->cg_item.ci_name = group->cg_item.ci_namebuf; 1680 group->cg_item.ci_name = group->cg_item.ci_namebuf;
1691 1681
1692 sd = configfs_sb->s_root->d_fsdata; 1682 sd = root->d_fsdata;
1693 link_group(to_config_group(sd->s_element), group); 1683 link_group(to_config_group(sd->s_element), group);
1694 1684
1695 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1685 mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
1696 I_MUTEX_PARENT);
1697 1686
1698 name.name = group->cg_item.ci_name; 1687 name.name = group->cg_item.ci_name;
1699 name.len = strlen(name.name); 1688 name.len = strlen(name.name);
1700 name.hash = full_name_hash(name.name, name.len); 1689 name.hash = full_name_hash(name.name, name.len);
1701 1690
1702 err = -ENOMEM; 1691 err = -ENOMEM;
1703 dentry = d_alloc(configfs_sb->s_root, &name); 1692 dentry = d_alloc(root, &name);
1704 if (dentry) { 1693 if (dentry) {
1705 d_add(dentry, NULL); 1694 d_add(dentry, NULL);
1706 1695
@@ -1717,7 +1706,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1717 } 1706 }
1718 } 1707 }
1719 1708
1720 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1709 mutex_unlock(&root->d_inode->i_mutex);
1721 1710
1722 if (err) { 1711 if (err) {
1723 unlink_group(group); 1712 unlink_group(group);
@@ -1731,13 +1720,14 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1731{ 1720{
1732 struct config_group *group = &subsys->su_group; 1721 struct config_group *group = &subsys->su_group;
1733 struct dentry *dentry = group->cg_item.ci_dentry; 1722 struct dentry *dentry = group->cg_item.ci_dentry;
1723 struct dentry *root = dentry->d_sb->s_root;
1734 1724
1735 if (dentry->d_parent != configfs_sb->s_root) { 1725 if (dentry->d_parent != root) {
1736 printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n"); 1726 printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n");
1737 return; 1727 return;
1738 } 1728 }
1739 1729
1740 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1730 mutex_lock_nested(&root->d_inode->i_mutex,
1741 I_MUTEX_PARENT); 1731 I_MUTEX_PARENT);
1742 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1732 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1743 mutex_lock(&configfs_symlink_mutex); 1733 mutex_lock(&configfs_symlink_mutex);
@@ -1754,7 +1744,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1754 1744
1755 d_delete(dentry); 1745 d_delete(dentry);
1756 1746
1757 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); 1747 mutex_unlock(&root->d_inode->i_mutex);
1758 1748
1759 dput(dentry); 1749 dput(dentry);
1760 1750
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 3ee36d418863..0074362d9f7f 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -44,8 +44,6 @@
44static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; 44static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
45#endif 45#endif
46 46
47extern struct super_block * configfs_sb;
48
49static const struct address_space_operations configfs_aops = { 47static const struct address_space_operations configfs_aops = {
50 .readpage = simple_readpage, 48 .readpage = simple_readpage,
51 .write_begin = simple_write_begin, 49 .write_begin = simple_write_begin,
@@ -132,9 +130,10 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
132 inode->i_ctime = iattr->ia_ctime; 130 inode->i_ctime = iattr->ia_ctime;
133} 131}
134 132
135struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd) 133struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
134 struct super_block *s)
136{ 135{
137 struct inode * inode = new_inode(configfs_sb); 136 struct inode * inode = new_inode(s);
138 if (inode) { 137 if (inode) {
139 inode->i_ino = get_next_ino(); 138 inode->i_ino = get_next_ino();
140 inode->i_mapping->a_ops = &configfs_aops; 139 inode->i_mapping->a_ops = &configfs_aops;
@@ -188,36 +187,35 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
188int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *)) 187int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
189{ 188{
190 int error = 0; 189 int error = 0;
191 struct inode * inode = NULL; 190 struct inode *inode = NULL;
192 if (dentry) { 191 struct configfs_dirent *sd;
193 if (!dentry->d_inode) { 192 struct inode *p_inode;
194 struct configfs_dirent *sd = dentry->d_fsdata; 193
195 if ((inode = configfs_new_inode(mode, sd))) { 194 if (!dentry)
196 if (dentry->d_parent && dentry->d_parent->d_inode) { 195 return -ENOENT;
197 struct inode *p_inode = dentry->d_parent->d_inode; 196
198 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; 197 if (dentry->d_inode)
199 } 198 return -EEXIST;
200 configfs_set_inode_lock_class(sd, inode);
201 goto Proceed;
202 }
203 else
204 error = -ENOMEM;
205 } else
206 error = -EEXIST;
207 } else
208 error = -ENOENT;
209 goto Done;
210 199
211 Proceed: 200 sd = dentry->d_fsdata;
212 if (init) 201 inode = configfs_new_inode(mode, sd, dentry->d_sb);
202 if (!inode)
203 return -ENOMEM;
204
205 p_inode = dentry->d_parent->d_inode;
206 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
207 configfs_set_inode_lock_class(sd, inode);
208
209 if (init) {
213 error = init(inode); 210 error = init(inode);
214 if (!error) { 211 if (error) {
215 d_instantiate(dentry, inode); 212 iput(inode);
216 if (S_ISDIR(mode) || S_ISLNK(mode)) 213 return error;
217 dget(dentry); /* pin link and directory dentries in core */ 214 }
218 } else 215 }
219 iput(inode); 216 d_instantiate(dentry, inode);
220 Done: 217 if (S_ISDIR(mode) || S_ISLNK(mode))
218 dget(dentry); /* pin link and directory dentries in core */
221 return error; 219 return error;
222} 220}
223 221
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 276e15cafd58..aee0a7ebbd8e 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -37,8 +37,7 @@
37/* Random magic number */ 37/* Random magic number */
38#define CONFIGFS_MAGIC 0x62656570 38#define CONFIGFS_MAGIC 0x62656570
39 39
40struct vfsmount * configfs_mount = NULL; 40static struct vfsmount *configfs_mount = NULL;
41struct super_block * configfs_sb = NULL;
42struct kmem_cache *configfs_dir_cachep; 41struct kmem_cache *configfs_dir_cachep;
43static int configfs_mnt_count = 0; 42static int configfs_mnt_count = 0;
44 43
@@ -77,12 +76,11 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
77 sb->s_magic = CONFIGFS_MAGIC; 76 sb->s_magic = CONFIGFS_MAGIC;
78 sb->s_op = &configfs_ops; 77 sb->s_op = &configfs_ops;
79 sb->s_time_gran = 1; 78 sb->s_time_gran = 1;
80 configfs_sb = sb;
81 79
82 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 80 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
83 &configfs_root); 81 &configfs_root, sb);
84 if (inode) { 82 if (inode) {
85 inode->i_op = &configfs_dir_inode_operations; 83 inode->i_op = &configfs_root_inode_operations;
86 inode->i_fop = &configfs_dir_operations; 84 inode->i_fop = &configfs_dir_operations;
87 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 85 /* directory inodes start off with i_nlink == 2 (for "." entry) */
88 inc_nlink(inode); 86 inc_nlink(inode);
@@ -91,10 +89,9 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
91 return -ENOMEM; 89 return -ENOMEM;
92 } 90 }
93 91
94 root = d_alloc_root(inode); 92 root = d_make_root(inode);
95 if (!root) { 93 if (!root) {
96 pr_debug("%s: could not get root dentry!\n",__func__); 94 pr_debug("%s: could not get root dentry!\n",__func__);
97 iput(inode);
98 return -ENOMEM; 95 return -ENOMEM;
99 } 96 }
100 config_group_init(&configfs_root_group); 97 config_group_init(&configfs_root_group);
@@ -118,10 +115,11 @@ static struct file_system_type configfs_fs_type = {
118 .kill_sb = kill_litter_super, 115 .kill_sb = kill_litter_super,
119}; 116};
120 117
121int configfs_pin_fs(void) 118struct dentry *configfs_pin_fs(void)
122{ 119{
123 return simple_pin_fs(&configfs_fs_type, &configfs_mount, 120 int err = simple_pin_fs(&configfs_fs_type, &configfs_mount,
124 &configfs_mnt_count); 121 &configfs_mnt_count);
122 return err ? ERR_PTR(err) : configfs_mount->mnt_root;
125} 123}
126 124
127void configfs_release_fs(void) 125void configfs_release_fs(void)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0f3eb41d9201..cc9f2546ea4a 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -110,13 +110,13 @@ out:
110 110
111 111
112static int get_target(const char *symname, struct path *path, 112static int get_target(const char *symname, struct path *path,
113 struct config_item **target) 113 struct config_item **target, struct super_block *sb)
114{ 114{
115 int ret; 115 int ret;
116 116
117 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path); 117 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path);
118 if (!ret) { 118 if (!ret) {
119 if (path->dentry->d_sb == configfs_sb) { 119 if (path->dentry->d_sb == sb) {
120 *target = configfs_get_config_item(path->dentry); 120 *target = configfs_get_config_item(path->dentry);
121 if (!*target) { 121 if (!*target) {
122 ret = -ENOENT; 122 ret = -ENOENT;
@@ -141,10 +141,6 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 struct config_item *target_item = NULL; 141 struct config_item *target_item = NULL;
142 struct config_item_type *type; 142 struct config_item_type *type;
143 143
144 ret = -EPERM; /* What lack-of-symlink returns */
145 if (dentry->d_parent == configfs_sb->s_root)
146 goto out;
147
148 sd = dentry->d_parent->d_fsdata; 144 sd = dentry->d_parent->d_fsdata;
149 /* 145 /*
150 * Fake invisibility if dir belongs to a group/default groups hierarchy 146 * Fake invisibility if dir belongs to a group/default groups hierarchy
@@ -162,7 +158,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
162 !type->ct_item_ops->allow_link) 158 !type->ct_item_ops->allow_link)
163 goto out_put; 159 goto out_put;
164 160
165 ret = get_target(symname, &path, &target_item); 161 ret = get_target(symname, &path, &target_item, dentry->d_sb);
166 if (ret) 162 if (ret)
167 goto out_put; 163 goto out_put;
168 164
@@ -198,8 +194,6 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry)
198 if (!(sd->s_type & CONFIGFS_ITEM_LINK)) 194 if (!(sd->s_type & CONFIGFS_ITEM_LINK))
199 goto out; 195 goto out;
200 196
201 BUG_ON(dentry->d_parent == configfs_sb->s_root);
202
203 sl = sd->s_element; 197 sl = sd->s_element;
204 198
205 parent_item = configfs_get_config_item(dentry->d_parent); 199 parent_item = configfs_get_config_item(dentry->d_parent);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a2ee8f9f5a38..d013c46402ed 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -257,10 +257,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
257 257
258 /* Do sanity checks on the superblock */ 258 /* Do sanity checks on the superblock */
259 if (super.magic != CRAMFS_MAGIC) { 259 if (super.magic != CRAMFS_MAGIC) {
260 /* check for wrong endianess */ 260 /* check for wrong endianness */
261 if (super.magic == CRAMFS_MAGIC_WEND) { 261 if (super.magic == CRAMFS_MAGIC_WEND) {
262 if (!silent) 262 if (!silent)
263 printk(KERN_ERR "cramfs: wrong endianess\n"); 263 printk(KERN_ERR "cramfs: wrong endianness\n");
264 goto out; 264 goto out;
265 } 265 }
266 266
@@ -270,7 +270,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
270 mutex_unlock(&read_mutex); 270 mutex_unlock(&read_mutex);
271 if (super.magic != CRAMFS_MAGIC) { 271 if (super.magic != CRAMFS_MAGIC) {
272 if (super.magic == CRAMFS_MAGIC_WEND && !silent) 272 if (super.magic == CRAMFS_MAGIC_WEND && !silent)
273 printk(KERN_ERR "cramfs: wrong endianess\n"); 273 printk(KERN_ERR "cramfs: wrong endianness\n");
274 else if (!silent) 274 else if (!silent)
275 printk(KERN_ERR "cramfs: wrong magic\n"); 275 printk(KERN_ERR "cramfs: wrong magic\n");
276 goto out; 276 goto out;
@@ -318,11 +318,9 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
318 root = get_cramfs_inode(sb, &super.root, 0); 318 root = get_cramfs_inode(sb, &super.root, 0);
319 if (IS_ERR(root)) 319 if (IS_ERR(root))
320 goto out; 320 goto out;
321 sb->s_root = d_alloc_root(root); 321 sb->s_root = d_make_root(root);
322 if (!sb->s_root) { 322 if (!sb->s_root)
323 iput(root);
324 goto out; 323 goto out;
325 }
326 return 0; 324 return 0;
327out: 325out:
328 kfree(sbi); 326 kfree(sbi);
diff --git a/fs/dcache.c b/fs/dcache.c
index 16a53cc2cc02..e441941c834d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -104,11 +104,11 @@ static unsigned int d_hash_shift __read_mostly;
104 104
105static struct hlist_bl_head *dentry_hashtable __read_mostly; 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
106 106
107static inline struct hlist_bl_head *d_hash(struct dentry *parent, 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108 unsigned long hash) 108 unsigned int hash)
109{ 109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 110 hash += (unsigned long) parent / L1_CACHE_BYTES;
111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 111 hash = hash + (hash >> D_HASHBITS);
112 return dentry_hashtable + (hash & D_HASHMASK); 112 return dentry_hashtable + (hash & D_HASHMASK);
113} 113}
114 114
@@ -137,6 +137,49 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
137} 137}
138#endif 138#endif
139 139
140/*
141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142 * The strings are both count bytes long, and count is non-zero.
143 */
144static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 const unsigned char *ct, size_t tcount)
146{
147#ifdef CONFIG_DCACHE_WORD_ACCESS
148 unsigned long a,b,mask;
149
150 if (unlikely(scount != tcount))
151 return 1;
152
153 for (;;) {
154 a = *(unsigned long *)cs;
155 b = *(unsigned long *)ct;
156 if (tcount < sizeof(unsigned long))
157 break;
158 if (unlikely(a != b))
159 return 1;
160 cs += sizeof(unsigned long);
161 ct += sizeof(unsigned long);
162 tcount -= sizeof(unsigned long);
163 if (!tcount)
164 return 0;
165 }
166 mask = ~(~0ul << tcount*8);
167 return unlikely(!!((a ^ b) & mask));
168#else
169 if (scount != tcount)
170 return 1;
171
172 do {
173 if (*cs != *ct)
174 return 1;
175 cs++;
176 ct++;
177 tcount--;
178 } while (tcount);
179 return 0;
180#endif
181}
182
140static void __d_free(struct rcu_head *head) 183static void __d_free(struct rcu_head *head)
141{ 184{
142 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 185 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -1423,30 +1466,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1423 1466
1424EXPORT_SYMBOL(d_instantiate_unique); 1467EXPORT_SYMBOL(d_instantiate_unique);
1425 1468
1426/**
1427 * d_alloc_root - allocate root dentry
1428 * @root_inode: inode to allocate the root for
1429 *
1430 * Allocate a root ("/") dentry for the inode given. The inode is
1431 * instantiated and returned. %NULL is returned if there is insufficient
1432 * memory or the inode passed is %NULL.
1433 */
1434
1435struct dentry * d_alloc_root(struct inode * root_inode)
1436{
1437 struct dentry *res = NULL;
1438
1439 if (root_inode) {
1440 static const struct qstr name = { .name = "/", .len = 1 };
1441
1442 res = __d_alloc(root_inode->i_sb, &name);
1443 if (res)
1444 d_instantiate(res, root_inode);
1445 }
1446 return res;
1447}
1448EXPORT_SYMBOL(d_alloc_root);
1449
1450struct dentry *d_make_root(struct inode *root_inode) 1469struct dentry *d_make_root(struct inode *root_inode)
1451{ 1470{
1452 struct dentry *res = NULL; 1471 struct dentry *res = NULL;
@@ -1717,8 +1736,9 @@ EXPORT_SYMBOL(d_add_ci);
1717 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1736 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1718 * is formed, giving integrity down the path walk. 1737 * is formed, giving integrity down the path walk.
1719 */ 1738 */
1720struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, 1739struct dentry *__d_lookup_rcu(const struct dentry *parent,
1721 unsigned *seq, struct inode **inode) 1740 const struct qstr *name,
1741 unsigned *seqp, struct inode **inode)
1722{ 1742{
1723 unsigned int len = name->len; 1743 unsigned int len = name->len;
1724 unsigned int hash = name->hash; 1744 unsigned int hash = name->hash;
@@ -1748,6 +1768,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1748 * See Documentation/filesystems/path-lookup.txt for more details. 1768 * See Documentation/filesystems/path-lookup.txt for more details.
1749 */ 1769 */
1750 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1770 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1771 unsigned seq;
1751 struct inode *i; 1772 struct inode *i;
1752 const char *tname; 1773 const char *tname;
1753 int tlen; 1774 int tlen;
@@ -1756,7 +1777,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1756 continue; 1777 continue;
1757 1778
1758seqretry: 1779seqretry:
1759 *seq = read_seqcount_begin(&dentry->d_seq); 1780 seq = read_seqcount_begin(&dentry->d_seq);
1760 if (dentry->d_parent != parent) 1781 if (dentry->d_parent != parent)
1761 continue; 1782 continue;
1762 if (d_unhashed(dentry)) 1783 if (d_unhashed(dentry))
@@ -1771,7 +1792,7 @@ seqretry:
1771 * edge of memory when walking. If we could load this 1792 * edge of memory when walking. If we could load this
1772 * atomically some other way, we could drop this check. 1793 * atomically some other way, we could drop this check.
1773 */ 1794 */
1774 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1795 if (read_seqcount_retry(&dentry->d_seq, seq))
1775 goto seqretry; 1796 goto seqretry;
1776 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1797 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1777 if (parent->d_op->d_compare(parent, *inode, 1798 if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1809,7 @@ seqretry:
1788 * order to do anything useful with the returned dentry 1809 * order to do anything useful with the returned dentry
1789 * anyway. 1810 * anyway.
1790 */ 1811 */
1812 *seqp = seq;
1791 *inode = i; 1813 *inode = i;
1792 return dentry; 1814 return dentry;
1793 } 1815 }
@@ -2968,7 +2990,7 @@ __setup("dhash_entries=", set_dhash_entries);
2968 2990
2969static void __init dcache_init_early(void) 2991static void __init dcache_init_early(void)
2970{ 2992{
2971 int loop; 2993 unsigned int loop;
2972 2994
2973 /* If hashes are distributed across NUMA nodes, defer 2995 /* If hashes are distributed across NUMA nodes, defer
2974 * hash allocation until vmalloc space is available. 2996 * hash allocation until vmalloc space is available.
@@ -2986,13 +3008,13 @@ static void __init dcache_init_early(void)
2986 &d_hash_mask, 3008 &d_hash_mask,
2987 0); 3009 0);
2988 3010
2989 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3011 for (loop = 0; loop < (1U << d_hash_shift); loop++)
2990 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3012 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
2991} 3013}
2992 3014
2993static void __init dcache_init(void) 3015static void __init dcache_init(void)
2994{ 3016{
2995 int loop; 3017 unsigned int loop;
2996 3018
2997 /* 3019 /*
2998 * A constructor could be added for stable state like the lists, 3020 * A constructor could be added for stable state like the lists,
@@ -3016,7 +3038,7 @@ static void __init dcache_init(void)
3016 &d_hash_mask, 3038 &d_hash_mask,
3017 0); 3039 0);
3018 3040
3019 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3041 for (loop = 0; loop < (1U << d_hash_shift); loop++)
3020 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3042 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3021} 3043}
3022 3044
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ef023eef0464..21e93605161c 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -611,7 +611,7 @@ static const struct file_operations fops_regset32 = {
611 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 611 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
612 * code. 612 * code.
613 */ 613 */
614struct dentry *debugfs_create_regset32(const char *name, mode_t mode, 614struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
615 struct dentry *parent, 615 struct dentry *parent,
616 struct debugfs_regset32 *regset) 616 struct debugfs_regset32 *regset)
617{ 617{
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 956d5ddddf6e..b80bc846a15a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -23,9 +23,13 @@
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/seq_file.h>
27#include <linux/parser.h>
26#include <linux/magic.h> 28#include <linux/magic.h>
27#include <linux/slab.h> 29#include <linux/slab.h>
28 30
31#define DEBUGFS_DEFAULT_MODE 0755
32
29static struct vfsmount *debugfs_mount; 33static struct vfsmount *debugfs_mount;
30static int debugfs_mount_count; 34static int debugfs_mount_count;
31static bool debugfs_registered; 35static bool debugfs_registered;
@@ -125,11 +129,154 @@ static inline int debugfs_positive(struct dentry *dentry)
125 return dentry->d_inode && !d_unhashed(dentry); 129 return dentry->d_inode && !d_unhashed(dentry);
126} 130}
127 131
132struct debugfs_mount_opts {
133 uid_t uid;
134 gid_t gid;
135 umode_t mode;
136};
137
138enum {
139 Opt_uid,
140 Opt_gid,
141 Opt_mode,
142 Opt_err
143};
144
145static const match_table_t tokens = {
146 {Opt_uid, "uid=%u"},
147 {Opt_gid, "gid=%u"},
148 {Opt_mode, "mode=%o"},
149 {Opt_err, NULL}
150};
151
152struct debugfs_fs_info {
153 struct debugfs_mount_opts mount_opts;
154};
155
156static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
157{
158 substring_t args[MAX_OPT_ARGS];
159 int option;
160 int token;
161 char *p;
162
163 opts->mode = DEBUGFS_DEFAULT_MODE;
164
165 while ((p = strsep(&data, ",")) != NULL) {
166 if (!*p)
167 continue;
168
169 token = match_token(p, tokens, args);
170 switch (token) {
171 case Opt_uid:
172 if (match_int(&args[0], &option))
173 return -EINVAL;
174 opts->uid = option;
175 break;
176 case Opt_gid:
177 if (match_octal(&args[0], &option))
178 return -EINVAL;
179 opts->gid = option;
180 break;
181 case Opt_mode:
182 if (match_octal(&args[0], &option))
183 return -EINVAL;
184 opts->mode = option & S_IALLUGO;
185 break;
186 /*
187 * We might like to report bad mount options here;
188 * but traditionally debugfs has ignored all mount options
189 */
190 }
191 }
192
193 return 0;
194}
195
196static int debugfs_apply_options(struct super_block *sb)
197{
198 struct debugfs_fs_info *fsi = sb->s_fs_info;
199 struct inode *inode = sb->s_root->d_inode;
200 struct debugfs_mount_opts *opts = &fsi->mount_opts;
201
202 inode->i_mode &= ~S_IALLUGO;
203 inode->i_mode |= opts->mode;
204
205 inode->i_uid = opts->uid;
206 inode->i_gid = opts->gid;
207
208 return 0;
209}
210
211static int debugfs_remount(struct super_block *sb, int *flags, char *data)
212{
213 int err;
214 struct debugfs_fs_info *fsi = sb->s_fs_info;
215
216 err = debugfs_parse_options(data, &fsi->mount_opts);
217 if (err)
218 goto fail;
219
220 debugfs_apply_options(sb);
221
222fail:
223 return err;
224}
225
226static int debugfs_show_options(struct seq_file *m, struct dentry *root)
227{
228 struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
229 struct debugfs_mount_opts *opts = &fsi->mount_opts;
230
231 if (opts->uid != 0)
232 seq_printf(m, ",uid=%u", opts->uid);
233 if (opts->gid != 0)
234 seq_printf(m, ",gid=%u", opts->gid);
235 if (opts->mode != DEBUGFS_DEFAULT_MODE)
236 seq_printf(m, ",mode=%o", opts->mode);
237
238 return 0;
239}
240
241static const struct super_operations debugfs_super_operations = {
242 .statfs = simple_statfs,
243 .remount_fs = debugfs_remount,
244 .show_options = debugfs_show_options,
245};
246
128static int debug_fill_super(struct super_block *sb, void *data, int silent) 247static int debug_fill_super(struct super_block *sb, void *data, int silent)
129{ 248{
130 static struct tree_descr debug_files[] = {{""}}; 249 static struct tree_descr debug_files[] = {{""}};
250 struct debugfs_fs_info *fsi;
251 int err;
252
253 save_mount_options(sb, data);
254
255 fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
256 sb->s_fs_info = fsi;
257 if (!fsi) {
258 err = -ENOMEM;
259 goto fail;
260 }
261
262 err = debugfs_parse_options(data, &fsi->mount_opts);
263 if (err)
264 goto fail;
265
266 err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
267 if (err)
268 goto fail;
269
270 sb->s_op = &debugfs_super_operations;
271
272 debugfs_apply_options(sb);
273
274 return 0;
131 275
132 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 276fail:
277 kfree(fsi);
278 sb->s_fs_info = NULL;
279 return err;
133} 280}
134 281
135static struct dentry *debug_mount(struct file_system_type *fs_type, 282static struct dentry *debug_mount(struct file_system_type *fs_type,
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c4e2a58a2e82..10f5e0b484db 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -36,7 +36,61 @@
36#define DEVPTS_DEFAULT_PTMX_MODE 0000 36#define DEVPTS_DEFAULT_PTMX_MODE 0000
37#define PTMX_MINOR 2 37#define PTMX_MINOR 2
38 38
39extern int pty_limit; /* Config limit on Unix98 ptys */ 39/*
40 * sysctl support for setting limits on the number of Unix98 ptys allocated.
41 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
42 */
43static int pty_limit = NR_UNIX98_PTY_DEFAULT;
44static int pty_reserve = NR_UNIX98_PTY_RESERVE;
45static int pty_limit_min;
46static int pty_limit_max = INT_MAX;
47static int pty_count;
48
49static struct ctl_table pty_table[] = {
50 {
51 .procname = "max",
52 .maxlen = sizeof(int),
53 .mode = 0644,
54 .data = &pty_limit,
55 .proc_handler = proc_dointvec_minmax,
56 .extra1 = &pty_limit_min,
57 .extra2 = &pty_limit_max,
58 }, {
59 .procname = "reserve",
60 .maxlen = sizeof(int),
61 .mode = 0644,
62 .data = &pty_reserve,
63 .proc_handler = proc_dointvec_minmax,
64 .extra1 = &pty_limit_min,
65 .extra2 = &pty_limit_max,
66 }, {
67 .procname = "nr",
68 .maxlen = sizeof(int),
69 .mode = 0444,
70 .data = &pty_count,
71 .proc_handler = proc_dointvec,
72 },
73 {}
74};
75
76static struct ctl_table pty_kern_table[] = {
77 {
78 .procname = "pty",
79 .mode = 0555,
80 .child = pty_table,
81 },
82 {}
83};
84
85static struct ctl_table pty_root_table[] = {
86 {
87 .procname = "kernel",
88 .mode = 0555,
89 .child = pty_kern_table,
90 },
91 {}
92};
93
40static DEFINE_MUTEX(allocated_ptys_lock); 94static DEFINE_MUTEX(allocated_ptys_lock);
41 95
42static struct vfsmount *devpts_mnt; 96static struct vfsmount *devpts_mnt;
@@ -49,10 +103,11 @@ struct pts_mount_opts {
49 umode_t mode; 103 umode_t mode;
50 umode_t ptmxmode; 104 umode_t ptmxmode;
51 int newinstance; 105 int newinstance;
106 int max;
52}; 107};
53 108
54enum { 109enum {
55 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, 110 Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, Opt_max,
56 Opt_err 111 Opt_err
57}; 112};
58 113
@@ -63,6 +118,7 @@ static const match_table_t tokens = {
63#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES 118#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
64 {Opt_ptmxmode, "ptmxmode=%o"}, 119 {Opt_ptmxmode, "ptmxmode=%o"},
65 {Opt_newinstance, "newinstance"}, 120 {Opt_newinstance, "newinstance"},
121 {Opt_max, "max=%d"},
66#endif 122#endif
67 {Opt_err, NULL} 123 {Opt_err, NULL}
68}; 124};
@@ -109,6 +165,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
109 opts->gid = 0; 165 opts->gid = 0;
110 opts->mode = DEVPTS_DEFAULT_MODE; 166 opts->mode = DEVPTS_DEFAULT_MODE;
111 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; 167 opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
168 opts->max = NR_UNIX98_PTY_MAX;
112 169
113 /* newinstance makes sense only on initial mount */ 170 /* newinstance makes sense only on initial mount */
114 if (op == PARSE_MOUNT) 171 if (op == PARSE_MOUNT)
@@ -152,6 +209,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
152 if (op == PARSE_MOUNT) 209 if (op == PARSE_MOUNT)
153 opts->newinstance = 1; 210 opts->newinstance = 1;
154 break; 211 break;
212 case Opt_max:
213 if (match_int(&args[0], &option) ||
214 option < 0 || option > NR_UNIX98_PTY_MAX)
215 return -EINVAL;
216 opts->max = option;
217 break;
155#endif 218#endif
156 default: 219 default:
157 printk(KERN_ERR "devpts: called with bogus options\n"); 220 printk(KERN_ERR "devpts: called with bogus options\n");
@@ -258,6 +321,8 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
258 seq_printf(seq, ",mode=%03o", opts->mode); 321 seq_printf(seq, ",mode=%03o", opts->mode);
259#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES 322#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
260 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); 323 seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
324 if (opts->max < NR_UNIX98_PTY_MAX)
325 seq_printf(seq, ",max=%d", opts->max);
261#endif 326#endif
262 327
263 return 0; 328 return 0;
@@ -309,12 +374,11 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
309 inode->i_fop = &simple_dir_operations; 374 inode->i_fop = &simple_dir_operations;
310 set_nlink(inode, 2); 375 set_nlink(inode, 2);
311 376
312 s->s_root = d_alloc_root(inode); 377 s->s_root = d_make_root(inode);
313 if (s->s_root) 378 if (s->s_root)
314 return 0; 379 return 0;
315 380
316 printk(KERN_ERR "devpts: get root dentry failed\n"); 381 printk(KERN_ERR "devpts: get root dentry failed\n");
317 iput(inode);
318 382
319fail: 383fail:
320 return -ENOMEM; 384 return -ENOMEM;
@@ -438,6 +502,12 @@ retry:
438 return -ENOMEM; 502 return -ENOMEM;
439 503
440 mutex_lock(&allocated_ptys_lock); 504 mutex_lock(&allocated_ptys_lock);
505 if (pty_count >= pty_limit -
506 (fsi->mount_opts.newinstance ? pty_reserve : 0)) {
507 mutex_unlock(&allocated_ptys_lock);
508 return -ENOSPC;
509 }
510
441 ida_ret = ida_get_new(&fsi->allocated_ptys, &index); 511 ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
442 if (ida_ret < 0) { 512 if (ida_ret < 0) {
443 mutex_unlock(&allocated_ptys_lock); 513 mutex_unlock(&allocated_ptys_lock);
@@ -446,11 +516,12 @@ retry:
446 return -EIO; 516 return -EIO;
447 } 517 }
448 518
449 if (index >= pty_limit) { 519 if (index >= fsi->mount_opts.max) {
450 ida_remove(&fsi->allocated_ptys, index); 520 ida_remove(&fsi->allocated_ptys, index);
451 mutex_unlock(&allocated_ptys_lock); 521 mutex_unlock(&allocated_ptys_lock);
452 return -EIO; 522 return -ENOSPC;
453 } 523 }
524 pty_count++;
454 mutex_unlock(&allocated_ptys_lock); 525 mutex_unlock(&allocated_ptys_lock);
455 return index; 526 return index;
456} 527}
@@ -462,6 +533,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
462 533
463 mutex_lock(&allocated_ptys_lock); 534 mutex_lock(&allocated_ptys_lock);
464 ida_remove(&fsi->allocated_ptys, idx); 535 ida_remove(&fsi->allocated_ptys, idx);
536 pty_count--;
465 mutex_unlock(&allocated_ptys_lock); 537 mutex_unlock(&allocated_ptys_lock);
466} 538}
467 539
@@ -558,11 +630,15 @@ void devpts_pty_kill(struct tty_struct *tty)
558static int __init init_devpts_fs(void) 630static int __init init_devpts_fs(void)
559{ 631{
560 int err = register_filesystem(&devpts_fs_type); 632 int err = register_filesystem(&devpts_fs_type);
633 struct ctl_table_header *table;
634
561 if (!err) { 635 if (!err) {
636 table = register_sysctl_table(pty_root_table);
562 devpts_mnt = kern_mount(&devpts_fs_type); 637 devpts_mnt = kern_mount(&devpts_fs_type);
563 if (IS_ERR(devpts_mnt)) { 638 if (IS_ERR(devpts_mnt)) {
564 err = PTR_ERR(devpts_mnt); 639 err = PTR_ERR(devpts_mnt);
565 unregister_filesystem(&devpts_fs_type); 640 unregister_filesystem(&devpts_fs_type);
641 unregister_sysctl_table(table);
566 } 642 }
567 } 643 }
568 return err; 644 return err;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4a588dbd11bf..f4aadd15b613 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -173,7 +173,7 @@ void inode_dio_wait(struct inode *inode)
173 if (atomic_read(&inode->i_dio_count)) 173 if (atomic_read(&inode->i_dio_count))
174 __inode_dio_wait(inode); 174 __inode_dio_wait(inode);
175} 175}
176EXPORT_SYMBOL_GPL(inode_dio_wait); 176EXPORT_SYMBOL(inode_dio_wait);
177 177
178/* 178/*
179 * inode_dio_done - signal finish of a direct I/O requests 179 * inode_dio_done - signal finish of a direct I/O requests
@@ -187,7 +187,7 @@ void inode_dio_done(struct inode *inode)
187 if (atomic_dec_and_test(&inode->i_dio_count)) 187 if (atomic_dec_and_test(&inode->i_dio_count))
188 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); 188 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
189} 189}
190EXPORT_SYMBOL_GPL(inode_dio_done); 190EXPORT_SYMBOL(inode_dio_done);
191 191
192/* 192/*
193 * How many pages are in the queue? 193 * How many pages are in the queue?
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 83641574b016..dc5eb598b81f 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -351,11 +351,28 @@ int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
351static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) 351static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
352{ 352{
353 struct dlm_rsb *r; 353 struct dlm_rsb *r;
354 uint32_t hash, bucket;
355 int rv;
356
357 hash = jhash(name, len, 0);
358 bucket = hash & (ls->ls_rsbtbl_size - 1);
359
360 spin_lock(&ls->ls_rsbtbl[bucket].lock);
361 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r);
362 if (rv)
363 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
364 name, len, 0, &r);
365 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
366
367 if (!rv)
368 return r;
354 369
355 down_read(&ls->ls_root_sem); 370 down_read(&ls->ls_root_sem);
356 list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 371 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
357 if (len == r->res_length && !memcmp(name, r->res_name, len)) { 372 if (len == r->res_length && !memcmp(name, r->res_name, len)) {
358 up_read(&ls->ls_root_sem); 373 up_read(&ls->ls_root_sem);
374 log_error(ls, "find_rsb_root revert to root_list %s",
375 r->res_name);
359 return r; 376 return r;
360 } 377 }
361 } 378 }
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d47183043c59..fa5c07d51dcc 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -411,8 +411,8 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
411 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); 411 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
412} 412}
413 413
414static int search_rsb_tree(struct rb_root *tree, char *name, int len, 414int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
415 unsigned int flags, struct dlm_rsb **r_ret) 415 unsigned int flags, struct dlm_rsb **r_ret)
416{ 416{
417 struct rb_node *node = tree->rb_node; 417 struct rb_node *node = tree->rb_node;
418 struct dlm_rsb *r; 418 struct dlm_rsb *r;
@@ -474,12 +474,12 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
474 struct dlm_rsb *r; 474 struct dlm_rsb *r;
475 int error; 475 int error;
476 476
477 error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); 477 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r);
478 if (!error) { 478 if (!error) {
479 kref_get(&r->res_ref); 479 kref_get(&r->res_ref);
480 goto out; 480 goto out;
481 } 481 }
482 error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); 482 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
483 if (error) 483 if (error)
484 goto out; 484 goto out;
485 485
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 265017a7c3e7..1a255307f6ff 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -28,6 +28,9 @@ void dlm_scan_waiters(struct dlm_ls *ls);
28void dlm_scan_timeout(struct dlm_ls *ls); 28void dlm_scan_timeout(struct dlm_ls *ls);
29void dlm_adjust_timeouts(struct dlm_ls *ls); 29void dlm_adjust_timeouts(struct dlm_ls *ls);
30 30
31int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
32 unsigned int flags, struct dlm_rsb **r_ret);
33
31int dlm_purge_locks(struct dlm_ls *ls); 34int dlm_purge_locks(struct dlm_ls *ls);
32void dlm_purge_mstcpy_locks(struct dlm_rsb *r); 35void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
33void dlm_grant_after_purge(struct dlm_ls *ls); 36void dlm_grant_after_purge(struct dlm_ls *ls);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0b3109ee4257..133ef6dc7cb7 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -52,6 +52,7 @@
52#include <linux/mutex.h> 52#include <linux/mutex.h>
53#include <linux/sctp.h> 53#include <linux/sctp.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <net/sctp/sctp.h>
55#include <net/sctp/user.h> 56#include <net/sctp/user.h>
56#include <net/ipv6.h> 57#include <net/ipv6.h>
57 58
@@ -474,9 +475,6 @@ static void process_sctp_notification(struct connection *con,
474 int prim_len, ret; 475 int prim_len, ret;
475 int addr_len; 476 int addr_len;
476 struct connection *new_con; 477 struct connection *new_con;
477 sctp_peeloff_arg_t parg;
478 int parglen = sizeof(parg);
479 int err;
480 478
481 /* 479 /*
482 * We get this before any data for an association. 480 * We get this before any data for an association.
@@ -525,23 +523,19 @@ static void process_sctp_notification(struct connection *con,
525 return; 523 return;
526 524
527 /* Peel off a new sock */ 525 /* Peel off a new sock */
528 parg.associd = sn->sn_assoc_change.sac_assoc_id; 526 sctp_lock_sock(con->sock->sk);
529 ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, 527 ret = sctp_do_peeloff(con->sock->sk,
530 SCTP_SOCKOPT_PEELOFF, 528 sn->sn_assoc_change.sac_assoc_id,
531 (void *)&parg, &parglen); 529 &new_con->sock);
530 sctp_release_sock(con->sock->sk);
532 if (ret < 0) { 531 if (ret < 0) {
533 log_print("Can't peel off a socket for " 532 log_print("Can't peel off a socket for "
534 "connection %d to node %d: err=%d", 533 "connection %d to node %d: err=%d",
535 parg.associd, nodeid, ret); 534 (int)sn->sn_assoc_change.sac_assoc_id,
536 return; 535 nodeid, ret);
537 }
538 new_con->sock = sockfd_lookup(parg.sd, &err);
539 if (!new_con->sock) {
540 log_print("sockfd_lookup error %d", err);
541 return; 536 return;
542 } 537 }
543 add_sock(new_con->sock, new_con); 538 add_sock(new_con->sock, new_con);
544 sockfd_put(new_con->sock);
545 539
546 log_print("connecting to %d sctp association %d", 540 log_print("connecting to %d sctp association %d",
547 nodeid, (int)sn->sn_assoc_change.sac_assoc_id); 541 nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
@@ -1082,7 +1076,7 @@ static void init_local(void)
1082 int i; 1076 int i;
1083 1077
1084 dlm_local_count = 0; 1078 dlm_local_count = 0;
1085 for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { 1079 for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
1086 if (dlm_our_addr(&sas, i)) 1080 if (dlm_our_addr(&sas, i))
1087 break; 1081 break;
1088 1082
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index d3f95f941c47..2b17f2f9b121 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -48,8 +48,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
48 unsigned long nr_segs, loff_t pos) 48 unsigned long nr_segs, loff_t pos)
49{ 49{
50 ssize_t rc; 50 ssize_t rc;
51 struct dentry *lower_dentry; 51 struct path lower;
52 struct vfsmount *lower_vfsmount;
53 struct file *file = iocb->ki_filp; 52 struct file *file = iocb->ki_filp;
54 53
55 rc = generic_file_aio_read(iocb, iov, nr_segs, pos); 54 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
@@ -60,9 +59,9 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
60 if (-EIOCBQUEUED == rc) 59 if (-EIOCBQUEUED == rc)
61 rc = wait_on_sync_kiocb(iocb); 60 rc = wait_on_sync_kiocb(iocb);
62 if (rc >= 0) { 61 if (rc >= 0) {
63 lower_dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); 62 lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry);
64 lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); 63 lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry);
65 touch_atime(lower_vfsmount, lower_dentry); 64 touch_atime(&lower);
66 } 65 }
67 return rc; 66 return rc;
68} 67}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index b4a6befb1216..68954937a071 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -550,9 +550,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
550 if (IS_ERR(inode)) 550 if (IS_ERR(inode))
551 goto out_free; 551 goto out_free;
552 552
553 s->s_root = d_alloc_root(inode); 553 s->s_root = d_make_root(inode);
554 if (!s->s_root) { 554 if (!s->s_root) {
555 iput(inode);
556 rc = -ENOMEM; 555 rc = -ENOMEM;
557 goto out_free; 556 goto out_free;
558 } 557 }
@@ -795,15 +794,10 @@ static int __init ecryptfs_init(void)
795 "Failed to allocate one or more kmem_cache objects\n"); 794 "Failed to allocate one or more kmem_cache objects\n");
796 goto out; 795 goto out;
797 } 796 }
798 rc = register_filesystem(&ecryptfs_fs_type);
799 if (rc) {
800 printk(KERN_ERR "Failed to register filesystem\n");
801 goto out_free_kmem_caches;
802 }
803 rc = do_sysfs_registration(); 797 rc = do_sysfs_registration();
804 if (rc) { 798 if (rc) {
805 printk(KERN_ERR "sysfs registration failed\n"); 799 printk(KERN_ERR "sysfs registration failed\n");
806 goto out_unregister_filesystem; 800 goto out_free_kmem_caches;
807 } 801 }
808 rc = ecryptfs_init_kthread(); 802 rc = ecryptfs_init_kthread();
809 if (rc) { 803 if (rc) {
@@ -824,19 +818,24 @@ static int __init ecryptfs_init(void)
824 "rc = [%d]\n", rc); 818 "rc = [%d]\n", rc);
825 goto out_release_messaging; 819 goto out_release_messaging;
826 } 820 }
821 rc = register_filesystem(&ecryptfs_fs_type);
822 if (rc) {
823 printk(KERN_ERR "Failed to register filesystem\n");
824 goto out_destroy_crypto;
825 }
827 if (ecryptfs_verbosity > 0) 826 if (ecryptfs_verbosity > 0)
828 printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values " 827 printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values "
829 "will be written to the syslog!\n", ecryptfs_verbosity); 828 "will be written to the syslog!\n", ecryptfs_verbosity);
830 829
831 goto out; 830 goto out;
831out_destroy_crypto:
832 ecryptfs_destroy_crypto();
832out_release_messaging: 833out_release_messaging:
833 ecryptfs_release_messaging(); 834 ecryptfs_release_messaging();
834out_destroy_kthread: 835out_destroy_kthread:
835 ecryptfs_destroy_kthread(); 836 ecryptfs_destroy_kthread();
836out_do_sysfs_unregistration: 837out_do_sysfs_unregistration:
837 do_sysfs_unregistration(); 838 do_sysfs_unregistration();
838out_unregister_filesystem:
839 unregister_filesystem(&ecryptfs_fs_type);
840out_free_kmem_caches: 839out_free_kmem_caches:
841 ecryptfs_free_kmem_caches(); 840 ecryptfs_free_kmem_caches();
842out: 841out:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 349209dc6a91..3a06f4043df4 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -429,7 +429,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
429 goto memdup; 429 goto memdup;
430 } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) { 430 } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) {
431 printk(KERN_WARNING "%s: Acceptable packet size range is " 431 printk(KERN_WARNING "%s: Acceptable packet size range is "
432 "[%d-%lu], but amount of data written is [%zu].", 432 "[%d-%zu], but amount of data written is [%zu].",
433 __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count); 433 __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count);
434 return -EINVAL; 434 return -EINVAL;
435 } 435 }
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index cf152823bbf4..2dd946b636d2 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -184,7 +184,6 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
184const struct super_operations ecryptfs_sops = { 184const struct super_operations ecryptfs_sops = {
185 .alloc_inode = ecryptfs_alloc_inode, 185 .alloc_inode = ecryptfs_alloc_inode,
186 .destroy_inode = ecryptfs_destroy_inode, 186 .destroy_inode = ecryptfs_destroy_inode,
187 .drop_inode = generic_drop_inode,
188 .statfs = ecryptfs_statfs, 187 .statfs = ecryptfs_statfs,
189 .remount_fs = NULL, 188 .remount_fs = NULL,
190 .evict_inode = ecryptfs_evict_inode, 189 .evict_inode = ecryptfs_evict_inode,
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 981106429a9f..e755ec746c69 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -317,10 +317,9 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
317 goto out_no_fs; 317 goto out_no_fs;
318 } 318 }
319 319
320 s->s_root = d_alloc_root(root); 320 s->s_root = d_make_root(root);
321 if (!(s->s_root)) { 321 if (!(s->s_root)) {
322 printk(KERN_ERR "EFS: get root dentry failed\n"); 322 printk(KERN_ERR "EFS: get root dentry failed\n");
323 iput(root);
324 ret = -ENOMEM; 323 ret = -ENOMEM;
325 goto out_no_fs; 324 goto out_no_fs;
326 } 325 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf24..4d9d3a45e356 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p)
320 return !list_empty(p); 320 return !list_empty(p);
321} 321}
322 322
323static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
324{
325 return container_of(p, struct eppoll_entry, wait);
326}
327
323/* Get the "struct epitem" from a wait queue pointer */ 328/* Get the "struct epitem" from a wait queue pointer */
324static inline struct epitem *ep_item_from_wait(wait_queue_t *p) 329static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
325{ 330{
@@ -467,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
467 put_cpu(); 472 put_cpu();
468} 473}
469 474
475static void ep_remove_wait_queue(struct eppoll_entry *pwq)
476{
477 wait_queue_head_t *whead;
478
479 rcu_read_lock();
480 /* If it is cleared by POLLFREE, it should be rcu-safe */
481 whead = rcu_dereference(pwq->whead);
482 if (whead)
483 remove_wait_queue(whead, &pwq->wait);
484 rcu_read_unlock();
485}
486
470/* 487/*
471 * This function unregisters poll callbacks from the associated file 488 * This function unregisters poll callbacks from the associated file
472 * descriptor. Must be called with "mtx" held (or "epmutex" if called from 489 * descriptor. Must be called with "mtx" held (or "epmutex" if called from
@@ -481,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
481 pwq = list_first_entry(lsthead, struct eppoll_entry, llink); 498 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
482 499
483 list_del(&pwq->llink); 500 list_del(&pwq->llink);
484 remove_wait_queue(pwq->whead, &pwq->wait); 501 ep_remove_wait_queue(pwq);
485 kmem_cache_free(pwq_cache, pwq); 502 kmem_cache_free(pwq_cache, pwq);
486 } 503 }
487} 504}
@@ -842,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
842 struct epitem *epi = ep_item_from_wait(wait); 859 struct epitem *epi = ep_item_from_wait(wait);
843 struct eventpoll *ep = epi->ep; 860 struct eventpoll *ep = epi->ep;
844 861
862 if ((unsigned long)key & POLLFREE) {
863 ep_pwq_from_wait(wait)->whead = NULL;
864 /*
865 * whead = NULL above can race with ep_remove_wait_queue()
866 * which can do another remove_wait_queue() after us, so we
867 * can't use __remove_wait_queue(). whead->lock is held by
868 * the caller.
869 */
870 list_del_init(&wait->task_list);
871 }
872
845 spin_lock_irqsave(&ep->lock, flags); 873 spin_lock_irqsave(&ep->lock, flags);
846 874
847 /* 875 /*
@@ -960,6 +988,10 @@ static int path_count[PATH_ARR_SIZE];
960 988
961static int path_count_inc(int nests) 989static int path_count_inc(int nests)
962{ 990{
991 /* Allow an arbitrary number of depth 1 paths */
992 if (nests == 0)
993 return 0;
994
963 if (++path_count[nests] > path_limits[nests]) 995 if (++path_count[nests] > path_limits[nests])
964 return -1; 996 return -1;
965 return 0; 997 return 0;
diff --git a/fs/exec.c b/fs/exec.c
index 92ce83a11e90..23559c227d9c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,8 @@
63#include <trace/events/task.h> 63#include <trace/events/task.h>
64#include "internal.h" 64#include "internal.h"
65 65
66#include <trace/events/sched.h>
67
66int core_uses_pid; 68int core_uses_pid;
67char core_pattern[CORENAME_MAX_SIZE] = "core"; 69char core_pattern[CORENAME_MAX_SIZE] = "core";
68unsigned int core_pipe_limit; 70unsigned int core_pipe_limit;
@@ -79,15 +81,13 @@ static atomic_t call_count = ATOMIC_INIT(1);
79static LIST_HEAD(formats); 81static LIST_HEAD(formats);
80static DEFINE_RWLOCK(binfmt_lock); 82static DEFINE_RWLOCK(binfmt_lock);
81 83
82int __register_binfmt(struct linux_binfmt * fmt, int insert) 84void __register_binfmt(struct linux_binfmt * fmt, int insert)
83{ 85{
84 if (!fmt) 86 BUG_ON(!fmt);
85 return -EINVAL;
86 write_lock(&binfmt_lock); 87 write_lock(&binfmt_lock);
87 insert ? list_add(&fmt->lh, &formats) : 88 insert ? list_add(&fmt->lh, &formats) :
88 list_add_tail(&fmt->lh, &formats); 89 list_add_tail(&fmt->lh, &formats);
89 write_unlock(&binfmt_lock); 90 write_unlock(&binfmt_lock);
90 return 0;
91} 91}
92 92
93EXPORT_SYMBOL(__register_binfmt); 93EXPORT_SYMBOL(__register_binfmt);
@@ -822,7 +822,7 @@ static int exec_mmap(struct mm_struct *mm)
822 /* Notify parent that we're no longer interested in the old VM */ 822 /* Notify parent that we're no longer interested in the old VM */
823 tsk = current; 823 tsk = current;
824 old_mm = current->mm; 824 old_mm = current->mm;
825 sync_mm_rss(tsk, old_mm); 825 sync_mm_rss(old_mm);
826 mm_release(tsk, old_mm); 826 mm_release(tsk, old_mm);
827 827
828 if (old_mm) { 828 if (old_mm) {
@@ -848,6 +848,7 @@ static int exec_mmap(struct mm_struct *mm)
848 if (old_mm) { 848 if (old_mm) {
849 up_read(&old_mm->mmap_sem); 849 up_read(&old_mm->mmap_sem);
850 BUG_ON(active_mm != old_mm); 850 BUG_ON(active_mm != old_mm);
851 setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
851 mm_update_next_owner(old_mm); 852 mm_update_next_owner(old_mm);
852 mmput(old_mm); 853 mmput(old_mm);
853 return 0; 854 return 0;
@@ -975,8 +976,8 @@ static int de_thread(struct task_struct *tsk)
975 sig->notify_count = 0; 976 sig->notify_count = 0;
976 977
977no_thread_group: 978no_thread_group:
978 if (current->mm) 979 /* we have changed execution domain */
979 setmax_mm_hiwater_rss(&sig->maxrss, current->mm); 980 tsk->exit_signal = SIGCHLD;
980 981
981 exit_itimers(sig); 982 exit_itimers(sig);
982 flush_itimer_signals(); 983 flush_itimer_signals();
@@ -1112,7 +1113,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1112 bprm->mm = NULL; /* We're using it now */ 1113 bprm->mm = NULL; /* We're using it now */
1113 1114
1114 set_fs(USER_DS); 1115 set_fs(USER_DS);
1115 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); 1116 current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
1116 flush_thread(); 1117 flush_thread();
1117 current->personality &= ~bprm->per_clear; 1118 current->personality &= ~bprm->per_clear;
1118 1119
@@ -1339,13 +1340,13 @@ int remove_arg_zero(struct linux_binprm *bprm)
1339 ret = -EFAULT; 1340 ret = -EFAULT;
1340 goto out; 1341 goto out;
1341 } 1342 }
1342 kaddr = kmap_atomic(page, KM_USER0); 1343 kaddr = kmap_atomic(page);
1343 1344
1344 for (; offset < PAGE_SIZE && kaddr[offset]; 1345 for (; offset < PAGE_SIZE && kaddr[offset];
1345 offset++, bprm->p++) 1346 offset++, bprm->p++)
1346 ; 1347 ;
1347 1348
1348 kunmap_atomic(kaddr, KM_USER0); 1349 kunmap_atomic(kaddr);
1349 put_arg_page(page); 1350 put_arg_page(page);
1350 1351
1351 if (offset == PAGE_SIZE) 1352 if (offset == PAGE_SIZE)
@@ -1402,9 +1403,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1402 */ 1403 */
1403 bprm->recursion_depth = depth; 1404 bprm->recursion_depth = depth;
1404 if (retval >= 0) { 1405 if (retval >= 0) {
1405 if (depth == 0) 1406 if (depth == 0) {
1406 ptrace_event(PTRACE_EVENT_EXEC, 1407 trace_sched_process_exec(current, old_pid, bprm);
1407 old_pid); 1408 ptrace_event(PTRACE_EVENT_EXEC, old_pid);
1409 }
1408 put_binfmt(fmt); 1410 put_binfmt(fmt);
1409 allow_write_access(bprm->file); 1411 allow_write_access(bprm->file);
1410 if (bprm->file) 1412 if (bprm->file)
@@ -1915,7 +1917,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
1915{ 1917{
1916 struct task_struct *tsk = current; 1918 struct task_struct *tsk = current;
1917 struct mm_struct *mm = tsk->mm; 1919 struct mm_struct *mm = tsk->mm;
1918 struct completion *vfork_done;
1919 int core_waiters = -EBUSY; 1920 int core_waiters = -EBUSY;
1920 1921
1921 init_completion(&core_state->startup); 1922 init_completion(&core_state->startup);
@@ -1927,22 +1928,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
1927 core_waiters = zap_threads(tsk, mm, core_state, exit_code); 1928 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1928 up_write(&mm->mmap_sem); 1929 up_write(&mm->mmap_sem);
1929 1930
1930 if (unlikely(core_waiters < 0)) 1931 if (core_waiters > 0)
1931 goto fail;
1932
1933 /*
1934 * Make sure nobody is waiting for us to release the VM,
1935 * otherwise we can deadlock when we wait on each other
1936 */
1937 vfork_done = tsk->vfork_done;
1938 if (vfork_done) {
1939 tsk->vfork_done = NULL;
1940 complete(vfork_done);
1941 }
1942
1943 if (core_waiters)
1944 wait_for_completion(&core_state->startup); 1932 wait_for_completion(&core_state->startup);
1945fail: 1933
1946 return core_waiters; 1934 return core_waiters;
1947} 1935}
1948 1936
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 80405836ba6e..c61e62ac231c 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -597,7 +597,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
597 goto fail; 597 goto fail;
598 } 598 }
599 599
600 kaddr = kmap_atomic(page, KM_USER0); 600 kaddr = kmap_atomic(page);
601 de = (struct exofs_dir_entry *)kaddr; 601 de = (struct exofs_dir_entry *)kaddr;
602 de->name_len = 1; 602 de->name_len = 1;
603 de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1)); 603 de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
@@ -611,7 +611,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
611 de->inode_no = cpu_to_le64(parent->i_ino); 611 de->inode_no = cpu_to_le64(parent->i_ino);
612 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); 612 memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
613 exofs_set_de_type(de, inode); 613 exofs_set_de_type(de, inode);
614 kunmap_atomic(kaddr, KM_USER0); 614 kunmap_atomic(kaddr);
615 err = exofs_commit_chunk(page, 0, chunk_size); 615 err = exofs_commit_chunk(page, 0, chunk_size);
616fail: 616fail:
617 page_cache_release(page); 617 page_cache_release(page);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 9dbf0c301030..fc7161d6bf6b 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -143,9 +143,6 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
143{ 143{
144 struct inode *inode = old_dentry->d_inode; 144 struct inode *inode = old_dentry->d_inode;
145 145
146 if (inode->i_nlink >= EXOFS_LINK_MAX)
147 return -EMLINK;
148
149 inode->i_ctime = CURRENT_TIME; 146 inode->i_ctime = CURRENT_TIME;
150 inode_inc_link_count(inode); 147 inode_inc_link_count(inode);
151 ihold(inode); 148 ihold(inode);
@@ -156,10 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
156static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 153static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
157{ 154{
158 struct inode *inode; 155 struct inode *inode;
159 int err = -EMLINK; 156 int err;
160
161 if (dir->i_nlink >= EXOFS_LINK_MAX)
162 goto out;
163 157
164 inode_inc_link_count(dir); 158 inode_inc_link_count(dir);
165 159
@@ -275,11 +269,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
275 if (err) 269 if (err)
276 goto out_dir; 270 goto out_dir;
277 } else { 271 } else {
278 if (dir_de) {
279 err = -EMLINK;
280 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
281 goto out_dir;
282 }
283 err = exofs_add_link(new_dentry, old_inode); 272 err = exofs_add_link(new_dentry, old_inode);
284 if (err) 273 if (err)
285 goto out_dir; 274 goto out_dir;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index d22cd168c6ee..7f2b590a36b7 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -754,6 +754,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
754 sb->s_blocksize = EXOFS_BLKSIZE; 754 sb->s_blocksize = EXOFS_BLKSIZE;
755 sb->s_blocksize_bits = EXOFS_BLKSHIFT; 755 sb->s_blocksize_bits = EXOFS_BLKSHIFT;
756 sb->s_maxbytes = MAX_LFS_FILESIZE; 756 sb->s_maxbytes = MAX_LFS_FILESIZE;
757 sb->s_max_links = EXOFS_LINK_MAX;
757 atomic_set(&sbi->s_curr_pending, 0); 758 atomic_set(&sbi->s_curr_pending, 0);
758 sb->s_bdev = NULL; 759 sb->s_bdev = NULL;
759 sb->s_dev = 0; 760 sb->s_dev = 0;
@@ -818,9 +819,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
818 ret = PTR_ERR(root); 819 ret = PTR_ERR(root);
819 goto free_sbi; 820 goto free_sbi;
820 } 821 }
821 sb->s_root = d_alloc_root(root); 822 sb->s_root = d_make_root(root);
822 if (!sb->s_root) { 823 if (!sb->s_root) {
823 iput(root);
824 EXOFS_ERR("ERROR: get root inode failed\n"); 824 EXOFS_ERR("ERROR: get root inode failed\n");
825 ret = -ENOMEM; 825 ret = -ENOMEM;
826 goto free_sbi; 826 goto free_sbi;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d37df352d324..0f4f5c929257 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -645,7 +645,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
645 unlock_page(page); 645 unlock_page(page);
646 goto fail; 646 goto fail;
647 } 647 }
648 kaddr = kmap_atomic(page, KM_USER0); 648 kaddr = kmap_atomic(page);
649 memset(kaddr, 0, chunk_size); 649 memset(kaddr, 0, chunk_size);
650 de = (struct ext2_dir_entry_2 *)kaddr; 650 de = (struct ext2_dir_entry_2 *)kaddr;
651 de->name_len = 1; 651 de->name_len = 1;
@@ -660,7 +660,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
660 de->inode = cpu_to_le32(parent->i_ino); 660 de->inode = cpu_to_le32(parent->i_ino);
661 memcpy (de->name, "..\0", 4); 661 memcpy (de->name, "..\0", 4);
662 ext2_set_de_type (de, inode); 662 ext2_set_de_type (de, inode);
663 kunmap_atomic(kaddr, KM_USER0); 663 kunmap_atomic(kaddr);
664 err = ext2_commit_chunk(page, 0, chunk_size); 664 err = ext2_commit_chunk(page, 0, chunk_size);
665fail: 665fail:
666 page_cache_release(page); 666 page_cache_release(page);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 080419814bae..dffb86536285 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -195,9 +195,6 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
195 struct inode *inode = old_dentry->d_inode; 195 struct inode *inode = old_dentry->d_inode;
196 int err; 196 int err;
197 197
198 if (inode->i_nlink >= EXT2_LINK_MAX)
199 return -EMLINK;
200
201 dquot_initialize(dir); 198 dquot_initialize(dir);
202 199
203 inode->i_ctime = CURRENT_TIME_SEC; 200 inode->i_ctime = CURRENT_TIME_SEC;
@@ -217,10 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
217static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 214static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
218{ 215{
219 struct inode * inode; 216 struct inode * inode;
220 int err = -EMLINK; 217 int err;
221
222 if (dir->i_nlink >= EXT2_LINK_MAX)
223 goto out;
224 218
225 dquot_initialize(dir); 219 dquot_initialize(dir);
226 220
@@ -346,11 +340,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
346 drop_nlink(new_inode); 340 drop_nlink(new_inode);
347 inode_dec_link_count(new_inode); 341 inode_dec_link_count(new_inode);
348 } else { 342 } else {
349 if (dir_de) {
350 err = -EMLINK;
351 if (new_dir->i_nlink >= EXT2_LINK_MAX)
352 goto out_dir;
353 }
354 err = ext2_add_link(new_dentry, old_inode); 343 err = ext2_add_link(new_dentry, old_inode);
355 if (err) 344 if (err)
356 goto out_dir; 345 goto out_dir;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0090595beb28..e1025c7a437a 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -919,6 +919,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
919 } 919 }
920 920
921 sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); 921 sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
922 sb->s_max_links = EXT2_LINK_MAX;
922 923
923 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { 924 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
924 sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; 925 sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
@@ -1087,9 +1088,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1087 goto failed_mount3; 1088 goto failed_mount3;
1088 } 1089 }
1089 1090
1090 sb->s_root = d_alloc_root(root); 1091 sb->s_root = d_make_root(root);
1091 if (!sb->s_root) { 1092 if (!sb->s_root) {
1092 iput(root);
1093 ext2_msg(sb, KERN_ERR, "error: get root inode failed"); 1093 ext2_msg(sb, KERN_ERR, "error: get root inode failed");
1094 ret = -ENOMEM; 1094 ret = -ENOMEM;
1095 goto failed_mount3; 1095 goto failed_mount3;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 726c7ef6cdf1..e0b45b93327b 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2046,10 +2046,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2046 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2046 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2047 goto failed_mount3; 2047 goto failed_mount3;
2048 } 2048 }
2049 sb->s_root = d_alloc_root(root); 2049 sb->s_root = d_make_root(root);
2050 if (!sb->s_root) { 2050 if (!sb->s_root) {
2051 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2051 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2052 iput(root);
2053 ret = -ENOMEM; 2052 ret = -ENOMEM;
2054 goto failed_mount3; 2053 goto failed_mount3;
2055 } 2054 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 502c61fd7392..933900909ed0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3735,9 +3735,8 @@ no_journal:
3735 iput(root); 3735 iput(root);
3736 goto failed_mount4; 3736 goto failed_mount4;
3737 } 3737 }
3738 sb->s_root = d_alloc_root(root); 3738 sb->s_root = d_make_root(root);
3739 if (!sb->s_root) { 3739 if (!sb->s_root) {
3740 iput(root);
3741 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3740 ext4_msg(sb, KERN_ERR, "get root dentry failed");
3742 ret = -ENOMEM; 3741 ret = -ENOMEM;
3743 goto failed_mount4; 3742 goto failed_mount4;
@@ -5056,6 +5055,9 @@ static int __init ext4_init_fs(void)
5056{ 5055{
5057 int i, err; 5056 int i, err;
5058 5057
5058 ext4_li_info = NULL;
5059 mutex_init(&ext4_li_mtx);
5060
5059 ext4_check_flag_values(); 5061 ext4_check_flag_values();
5060 5062
5061 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 5063 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
@@ -5094,8 +5096,6 @@ static int __init ext4_init_fs(void)
5094 if (err) 5096 if (err)
5095 goto out; 5097 goto out;
5096 5098
5097 ext4_li_info = NULL;
5098 mutex_init(&ext4_li_mtx);
5099 return 0; 5099 return 0;
5100out: 5100out:
5101 unregister_as_ext2(); 5101 unregister_as_ext2();
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 3ab841054d53..21687e31acc0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1496,11 +1496,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1496 root_inode->i_ino = MSDOS_ROOT_INO; 1496 root_inode->i_ino = MSDOS_ROOT_INO;
1497 root_inode->i_version = 1; 1497 root_inode->i_version = 1;
1498 error = fat_read_root(root_inode); 1498 error = fat_read_root(root_inode);
1499 if (error < 0) 1499 if (error < 0) {
1500 iput(root_inode);
1500 goto out_fail; 1501 goto out_fail;
1502 }
1501 error = -ENOMEM; 1503 error = -ENOMEM;
1502 insert_inode_hash(root_inode); 1504 insert_inode_hash(root_inode);
1503 sb->s_root = d_alloc_root(root_inode); 1505 sb->s_root = d_make_root(root_inode);
1504 if (!sb->s_root) { 1506 if (!sb->s_root) {
1505 fat_msg(sb, KERN_ERR, "get root inode failed"); 1507 fat_msg(sb, KERN_ERR, "get root inode failed");
1506 goto out_fail; 1508 goto out_fail;
@@ -1516,8 +1518,6 @@ out_invalid:
1516out_fail: 1518out_fail:
1517 if (fat_inode) 1519 if (fat_inode)
1518 iput(fat_inode); 1520 iput(fat_inode);
1519 if (root_inode)
1520 iput(root_inode);
1521 unload_nls(sbi->nls_io); 1521 unload_nls(sbi->nls_io);
1522 unload_nls(sbi->nls_disk); 1522 unload_nls(sbi->nls_disk);
1523 if (sbi->options.iocharset != fat_default_iocharset) 1523 if (sbi->options.iocharset != fat_default_iocharset)
diff --git a/fs/file_table.c b/fs/file_table.c
index 20002e39754d..70f2a0fd6aec 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(alloc_file);
204 * to write to @file, along with access to write through 204 * to write to @file, along with access to write through
205 * its vfsmount. 205 * its vfsmount.
206 */ 206 */
207void drop_file_write_access(struct file *file) 207static void drop_file_write_access(struct file *file)
208{ 208{
209 struct vfsmount *mnt = file->f_path.mnt; 209 struct vfsmount *mnt = file->f_path.mnt;
210 struct dentry *dentry = file->f_path.dentry; 210 struct dentry *dentry = file->f_path.dentry;
@@ -219,7 +219,6 @@ void drop_file_write_access(struct file *file)
219 mnt_drop_write(mnt); 219 mnt_drop_write(mnt);
220 file_release_write(file); 220 file_release_write(file);
221} 221}
222EXPORT_SYMBOL_GPL(drop_file_write_access);
223 222
224/* the real guts of fput() - releasing the last reference to file 223/* the real guts of fput() - releasing the last reference to file
225 */ 224 */
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 9d1c99558389..d4fabd26084e 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -224,9 +224,8 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
224 ret = PTR_ERR(root); 224 ret = PTR_ERR(root);
225 goto out; 225 goto out;
226 } 226 }
227 sbp->s_root = d_alloc_root(root); 227 sbp->s_root = d_make_root(root);
228 if (!sbp->s_root) { 228 if (!sbp->s_root) {
229 iput(root);
230 printk(KERN_WARNING "vxfs: unable to get root dentry.\n"); 229 printk(KERN_WARNING "vxfs: unable to get root dentry.\n");
231 goto out_free_ilist; 230 goto out_free_ilist;
232 } 231 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5b4a9362d5aa..77b535ac7136 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1284,7 +1284,7 @@ int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
1284EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1284EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 1285
1286/** 1286/**
1287 * writeback_inodes_sb_if_idle - start writeback if none underway 1287 * writeback_inodes_sb_nr_if_idle - start writeback if none underway
1288 * @sb: the superblock 1288 * @sb: the superblock
1289 * @nr: the number of pages to write 1289 * @nr: the number of pages to write
1290 * @reason: reason why some writeback work was initiated 1290 * @reason: reason why some writeback work was initiated
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 78b519c13536..6324c4274959 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -26,11 +26,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
26{ 26{
27 struct path old_root; 27 struct path old_root;
28 28
29 path_get_longterm(path);
29 spin_lock(&fs->lock); 30 spin_lock(&fs->lock);
30 write_seqcount_begin(&fs->seq); 31 write_seqcount_begin(&fs->seq);
31 old_root = fs->root; 32 old_root = fs->root;
32 fs->root = *path; 33 fs->root = *path;
33 path_get_longterm(path);
34 write_seqcount_end(&fs->seq); 34 write_seqcount_end(&fs->seq);
35 spin_unlock(&fs->lock); 35 spin_unlock(&fs->lock);
36 if (old_root.dentry) 36 if (old_root.dentry)
@@ -45,11 +45,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
45{ 45{
46 struct path old_pwd; 46 struct path old_pwd;
47 47
48 path_get_longterm(path);
48 spin_lock(&fs->lock); 49 spin_lock(&fs->lock);
49 write_seqcount_begin(&fs->seq); 50 write_seqcount_begin(&fs->seq);
50 old_pwd = fs->pwd; 51 old_pwd = fs->pwd;
51 fs->pwd = *path; 52 fs->pwd = *path;
52 path_get_longterm(path);
53 write_seqcount_end(&fs->seq); 53 write_seqcount_end(&fs->seq);
54 spin_unlock(&fs->lock); 54 spin_unlock(&fs->lock);
55 55
@@ -57,6 +57,14 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
57 path_put_longterm(&old_pwd); 57 path_put_longterm(&old_pwd);
58} 58}
59 59
60static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
61{
62 if (likely(p->dentry != old->dentry || p->mnt != old->mnt))
63 return 0;
64 *p = *new;
65 return 1;
66}
67
60void chroot_fs_refs(struct path *old_root, struct path *new_root) 68void chroot_fs_refs(struct path *old_root, struct path *new_root)
61{ 69{
62 struct task_struct *g, *p; 70 struct task_struct *g, *p;
@@ -68,21 +76,16 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
68 task_lock(p); 76 task_lock(p);
69 fs = p->fs; 77 fs = p->fs;
70 if (fs) { 78 if (fs) {
79 int hits = 0;
71 spin_lock(&fs->lock); 80 spin_lock(&fs->lock);
72 write_seqcount_begin(&fs->seq); 81 write_seqcount_begin(&fs->seq);
73 if (fs->root.dentry == old_root->dentry 82 hits += replace_path(&fs->root, old_root, new_root);
74 && fs->root.mnt == old_root->mnt) { 83 hits += replace_path(&fs->pwd, old_root, new_root);
75 path_get_longterm(new_root); 84 write_seqcount_end(&fs->seq);
76 fs->root = *new_root; 85 while (hits--) {
77 count++; 86 count++;
78 }
79 if (fs->pwd.dentry == old_root->dentry
80 && fs->pwd.mnt == old_root->mnt) {
81 path_get_longterm(new_root); 87 path_get_longterm(new_root);
82 fs->pwd = *new_root;
83 count++;
84 } 88 }
85 write_seqcount_end(&fs->seq);
86 spin_unlock(&fs->lock); 89 spin_unlock(&fs->lock);
87 } 90 }
88 task_unlock(p); 91 task_unlock(p);
@@ -107,10 +110,8 @@ void exit_fs(struct task_struct *tsk)
107 int kill; 110 int kill;
108 task_lock(tsk); 111 task_lock(tsk);
109 spin_lock(&fs->lock); 112 spin_lock(&fs->lock);
110 write_seqcount_begin(&fs->seq);
111 tsk->fs = NULL; 113 tsk->fs = NULL;
112 kill = !--fs->users; 114 kill = !--fs->users;
113 write_seqcount_end(&fs->seq);
114 spin_unlock(&fs->lock); 115 spin_unlock(&fs->lock);
115 task_unlock(tsk); 116 task_unlock(tsk);
116 if (kill) 117 if (kill)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5f3368ab0fa9..7df2b5e8fbe1 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -838,10 +838,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
838 } 838 }
839 } 839 }
840 if (page) { 840 if (page) {
841 void *mapaddr = kmap_atomic(page, KM_USER0); 841 void *mapaddr = kmap_atomic(page);
842 void *buf = mapaddr + offset; 842 void *buf = mapaddr + offset;
843 offset += fuse_copy_do(cs, &buf, &count); 843 offset += fuse_copy_do(cs, &buf, &count);
844 kunmap_atomic(mapaddr, KM_USER0); 844 kunmap_atomic(mapaddr);
845 } else 845 } else
846 offset += fuse_copy_do(cs, NULL, &count); 846 offset += fuse_copy_do(cs, NULL, &count);
847 } 847 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4a199fd93fbd..a841868bf9ce 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1887,11 +1887,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1887 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 1887 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1888 goto out; 1888 goto out;
1889 1889
1890 vaddr = kmap_atomic(pages[0], KM_USER0); 1890 vaddr = kmap_atomic(pages[0]);
1891 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, 1891 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
1892 transferred, in_iovs + out_iovs, 1892 transferred, in_iovs + out_iovs,
1893 (flags & FUSE_IOCTL_COMPAT) != 0); 1893 (flags & FUSE_IOCTL_COMPAT) != 0);
1894 kunmap_atomic(vaddr, KM_USER0); 1894 kunmap_atomic(vaddr);
1895 if (err) 1895 if (err)
1896 goto out; 1896 goto out;
1897 1897
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 64cf8d07393e..4aec5995867e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -988,14 +988,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
988 988
989 err = -ENOMEM; 989 err = -ENOMEM;
990 root = fuse_get_root_inode(sb, d.rootmode); 990 root = fuse_get_root_inode(sb, d.rootmode);
991 if (!root) 991 root_dentry = d_make_root(root);
992 if (!root_dentry)
992 goto err_put_conn; 993 goto err_put_conn;
993
994 root_dentry = d_alloc_root(root);
995 if (!root_dentry) {
996 iput(root);
997 goto err_put_conn;
998 }
999 /* only now - we want root dentry with NULL ->d_op */ 994 /* only now - we want root dentry with NULL ->d_op */
1000 sb->s_d_op = &fuse_dentry_operations; 995 sb->s_d_op = &fuse_dentry_operations;
1001 996
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 501e5cba09b3..38b7a74a0f91 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -434,12 +434,12 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
434 if (error) 434 if (error)
435 return error; 435 return error;
436 436
437 kaddr = kmap_atomic(page, KM_USER0); 437 kaddr = kmap_atomic(page);
438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode)); 439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); 441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
442 kunmap_atomic(kaddr, KM_USER0); 442 kunmap_atomic(kaddr);
443 flush_dcache_page(page); 443 flush_dcache_page(page);
444 brelse(dibh); 444 brelse(dibh);
445 SetPageUptodate(page); 445 SetPageUptodate(page);
@@ -542,9 +542,9 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
542 page = read_cache_page(mapping, index, __gfs2_readpage, NULL); 542 page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
543 if (IS_ERR(page)) 543 if (IS_ERR(page))
544 return PTR_ERR(page); 544 return PTR_ERR(page);
545 p = kmap_atomic(page, KM_USER0); 545 p = kmap_atomic(page);
546 memcpy(buf + copied, p + offset, amt); 546 memcpy(buf + copied, p + offset, amt);
547 kunmap_atomic(p, KM_USER0); 547 kunmap_atomic(p);
548 mark_page_accessed(page); 548 mark_page_accessed(page);
549 page_cache_release(page); 549 page_cache_release(page);
550 copied += amt; 550 copied += amt;
@@ -788,11 +788,11 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
788 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); 788 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
789 789
790 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); 790 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
791 kaddr = kmap_atomic(page, KM_USER0); 791 kaddr = kmap_atomic(page);
792 memcpy(buf + pos, kaddr + pos, copied); 792 memcpy(buf + pos, kaddr + pos, copied);
793 memset(kaddr + pos + copied, 0, len - copied); 793 memset(kaddr + pos + copied, 0, len - copied);
794 flush_dcache_page(page); 794 flush_dcache_page(page);
795 kunmap_atomic(kaddr, KM_USER0); 795 kunmap_atomic(kaddr);
796 796
797 if (!PageUptodate(page)) 797 if (!PageUptodate(page))
798 SetPageUptodate(page); 798 SetPageUptodate(page);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 14a704015970..197c5c47e577 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -60,7 +60,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
60 int release = 0; 60 int release = 0;
61 61
62 if (!page || page->index) { 62 if (!page || page->index) {
63 page = grab_cache_page(inode->i_mapping, 0); 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
64 if (!page) 64 if (!page)
65 return -ENOMEM; 65 return -ENOMEM;
66 release = 1; 66 release = 1;
@@ -930,7 +930,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
930 struct page *page; 930 struct page *page;
931 int err; 931 int err;
932 932
933 page = grab_cache_page(mapping, index); 933 page = find_or_create_page(mapping, index, GFP_NOFS);
934 if (!page) 934 if (!page)
935 return 0; 935 return 0;
936 936
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c5fb3597f696..76834587a8a4 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -313,6 +313,8 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
313 return gfs2_get_flags(filp, (u32 __user *)arg); 313 return gfs2_get_flags(filp, (u32 __user *)arg);
314 case FS_IOC_SETFLAGS: 314 case FS_IOC_SETFLAGS:
315 return gfs2_set_flags(filp, (u32 __user *)arg); 315 return gfs2_set_flags(filp, (u32 __user *)arg);
316 case FITRIM:
317 return gfs2_fitrim(filp, (void __user *)arg);
316 } 318 }
317 return -ENOTTY; 319 return -ENOTTY;
318} 320}
@@ -674,6 +676,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
674 struct gfs2_inode *ip = GFS2_I(inode); 676 struct gfs2_inode *ip = GFS2_I(inode);
675 struct buffer_head *dibh; 677 struct buffer_head *dibh;
676 int error; 678 int error;
679 loff_t size = len;
677 unsigned int nr_blks; 680 unsigned int nr_blks;
678 sector_t lblock = offset >> inode->i_blkbits; 681 sector_t lblock = offset >> inode->i_blkbits;
679 682
@@ -707,8 +710,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
707 goto out; 710 goto out;
708 } 711 }
709 } 712 }
710 if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) 713 if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
711 i_size_write(inode, offset + len); 714 i_size_write(inode, offset + size);
712 715
713 mark_inode_dirty(inode); 716 mark_inode_dirty(inode);
714 717
@@ -777,12 +780,14 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
777 if (unlikely(error)) 780 if (unlikely(error))
778 goto out_uninit; 781 goto out_uninit;
779 782
780 if (!gfs2_write_alloc_required(ip, offset, len))
781 goto out_unlock;
782
783 while (len > 0) { 783 while (len > 0) {
784 if (len < bytes) 784 if (len < bytes)
785 bytes = len; 785 bytes = len;
786 if (!gfs2_write_alloc_required(ip, offset, bytes)) {
787 len -= bytes;
788 offset += bytes;
789 continue;
790 }
786 qa = gfs2_qadata_get(ip); 791 qa = gfs2_qadata_get(ip);
787 if (!qa) { 792 if (!qa) {
788 error = -ENOMEM; 793 error = -ENOMEM;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 376816fcd040..dab2526071cc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -29,6 +29,7 @@
29#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h>
32 33
33#include "gfs2.h" 34#include "gfs2.h"
34#include "incore.h" 35#include "incore.h"
@@ -167,14 +168,19 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
167 spin_unlock(&lru_lock); 168 spin_unlock(&lru_lock);
168} 169}
169 170
170static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 171static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
171{ 172{
172 spin_lock(&lru_lock);
173 if (!list_empty(&gl->gl_lru)) { 173 if (!list_empty(&gl->gl_lru)) {
174 list_del_init(&gl->gl_lru); 174 list_del_init(&gl->gl_lru);
175 atomic_dec(&lru_count); 175 atomic_dec(&lru_count);
176 clear_bit(GLF_LRU, &gl->gl_flags); 176 clear_bit(GLF_LRU, &gl->gl_flags);
177 } 177 }
178}
179
180static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
181{
182 spin_lock(&lru_lock);
183 __gfs2_glock_remove_from_lru(gl);
178 spin_unlock(&lru_lock); 184 spin_unlock(&lru_lock);
179} 185}
180 186
@@ -217,11 +223,12 @@ void gfs2_glock_put(struct gfs2_glock *gl)
217 struct gfs2_sbd *sdp = gl->gl_sbd; 223 struct gfs2_sbd *sdp = gl->gl_sbd;
218 struct address_space *mapping = gfs2_glock2aspace(gl); 224 struct address_space *mapping = gfs2_glock2aspace(gl);
219 225
220 if (atomic_dec_and_test(&gl->gl_ref)) { 226 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
227 __gfs2_glock_remove_from_lru(gl);
228 spin_unlock(&lru_lock);
221 spin_lock_bucket(gl->gl_hash); 229 spin_lock_bucket(gl->gl_hash);
222 hlist_bl_del_rcu(&gl->gl_list); 230 hlist_bl_del_rcu(&gl->gl_list);
223 spin_unlock_bucket(gl->gl_hash); 231 spin_unlock_bucket(gl->gl_hash);
224 gfs2_glock_remove_from_lru(gl);
225 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 232 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
226 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 233 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
227 trace_gfs2_glock_put(gl); 234 trace_gfs2_glock_put(gl);
@@ -537,6 +544,11 @@ __acquires(&gl->gl_spin)
537 do_error(gl, 0); /* Fail queued try locks */ 544 do_error(gl, 0); /* Fail queued try locks */
538 } 545 }
539 gl->gl_req = target; 546 gl->gl_req = target;
547 set_bit(GLF_BLOCKING, &gl->gl_flags);
548 if ((gl->gl_req == LM_ST_UNLOCKED) ||
549 (gl->gl_state == LM_ST_EXCLUSIVE) ||
550 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
551 clear_bit(GLF_BLOCKING, &gl->gl_flags);
540 spin_unlock(&gl->gl_spin); 552 spin_unlock(&gl->gl_spin);
541 if (glops->go_xmote_th) 553 if (glops->go_xmote_th)
542 glops->go_xmote_th(gl); 554 glops->go_xmote_th(gl);
@@ -738,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
738 return -ENOMEM; 750 return -ENOMEM;
739 751
740 atomic_inc(&sdp->sd_glock_disposal); 752 atomic_inc(&sdp->sd_glock_disposal);
753 gl->gl_sbd = sdp;
741 gl->gl_flags = 0; 754 gl->gl_flags = 0;
742 gl->gl_name = name; 755 gl->gl_name = name;
743 atomic_set(&gl->gl_ref, 1); 756 atomic_set(&gl->gl_ref, 1);
@@ -746,12 +759,17 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
746 gl->gl_demote_state = LM_ST_EXCLUSIVE; 759 gl->gl_demote_state = LM_ST_EXCLUSIVE;
747 gl->gl_hash = hash; 760 gl->gl_hash = hash;
748 gl->gl_ops = glops; 761 gl->gl_ops = glops;
749 snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number); 762 gl->gl_dstamp = ktime_set(0, 0);
763 preempt_disable();
764 /* We use the global stats to estimate the initial per-glock stats */
765 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
766 preempt_enable();
767 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
768 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
750 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 769 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
751 gl->gl_lksb.sb_lvbptr = gl->gl_lvb; 770 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
752 gl->gl_tchange = jiffies; 771 gl->gl_tchange = jiffies;
753 gl->gl_object = NULL; 772 gl->gl_object = NULL;
754 gl->gl_sbd = sdp;
755 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 773 gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
756 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 774 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
757 INIT_WORK(&gl->gl_delete, delete_work_func); 775 INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -993,6 +1011,8 @@ fail:
993 } 1011 }
994 set_bit(GLF_QUEUED, &gl->gl_flags); 1012 set_bit(GLF_QUEUED, &gl->gl_flags);
995 trace_gfs2_glock_queue(gh, 1); 1013 trace_gfs2_glock_queue(gh, 1);
1014 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1015 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
996 if (likely(insert_pt == NULL)) { 1016 if (likely(insert_pt == NULL)) {
997 list_add_tail(&gh->gh_list, &gl->gl_holders); 1017 list_add_tail(&gh->gh_list, &gl->gl_holders);
998 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 1018 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
@@ -1652,6 +1672,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1652 *p++ = 'L'; 1672 *p++ = 'L';
1653 if (gl->gl_object) 1673 if (gl->gl_object)
1654 *p++ = 'o'; 1674 *p++ = 'o';
1675 if (test_bit(GLF_BLOCKING, gflags))
1676 *p++ = 'b';
1655 *p = 0; 1677 *p = 0;
1656 return buf; 1678 return buf;
1657} 1679}
@@ -1708,8 +1730,78 @@ out:
1708 return error; 1730 return error;
1709} 1731}
1710 1732
1733static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1734{
1735 struct gfs2_glock *gl = iter_ptr;
1736
1737 seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n",
1738 gl->gl_name.ln_type,
1739 (unsigned long long)gl->gl_name.ln_number,
1740 (long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
1741 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
1742 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
1743 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
1744 (long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
1745 (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
1746 (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
1747 (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
1748 return 0;
1749}
1750
1751static const char *gfs2_gltype[] = {
1752 "type",
1753 "reserved",
1754 "nondisk",
1755 "inode",
1756 "rgrp",
1757 "meta",
1758 "iopen",
1759 "flock",
1760 "plock",
1761 "quota",
1762 "journal",
1763};
1764
1765static const char *gfs2_stype[] = {
1766 [GFS2_LKS_SRTT] = "srtt",
1767 [GFS2_LKS_SRTTVAR] = "srttvar",
1768 [GFS2_LKS_SRTTB] = "srttb",
1769 [GFS2_LKS_SRTTVARB] = "srttvarb",
1770 [GFS2_LKS_SIRT] = "sirt",
1771 [GFS2_LKS_SIRTVAR] = "sirtvar",
1772 [GFS2_LKS_DCOUNT] = "dlm",
1773 [GFS2_LKS_QCOUNT] = "queue",
1774};
1775
1776#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
1777
1778static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1779{
1780 struct gfs2_glock_iter *gi = seq->private;
1781 struct gfs2_sbd *sdp = gi->sdp;
1782 unsigned index = gi->hash >> 3;
1783 unsigned subindex = gi->hash & 0x07;
1784 s64 value;
1785 int i;
1786
1787 if (index == 0 && subindex != 0)
1788 return 0;
1711 1789
1790 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
1791 (index == 0) ? "cpu": gfs2_stype[subindex]);
1712 1792
1793 for_each_possible_cpu(i) {
1794 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
1795 if (index == 0) {
1796 value = i;
1797 } else {
1798 value = lkstats->lkstats[index - 1].stats[subindex];
1799 }
1800 seq_printf(seq, " %15lld", (long long)value);
1801 }
1802 seq_putc(seq, '\n');
1803 return 0;
1804}
1713 1805
1714int __init gfs2_glock_init(void) 1806int __init gfs2_glock_init(void)
1715{ 1807{
@@ -1822,6 +1914,35 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
1822 return dump_glock(seq, iter_ptr); 1914 return dump_glock(seq, iter_ptr);
1823} 1915}
1824 1916
1917static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
1918{
1919 struct gfs2_glock_iter *gi = seq->private;
1920
1921 gi->hash = *pos;
1922 if (*pos >= GFS2_NR_SBSTATS)
1923 return NULL;
1924 preempt_disable();
1925 return SEQ_START_TOKEN;
1926}
1927
1928static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
1929 loff_t *pos)
1930{
1931 struct gfs2_glock_iter *gi = seq->private;
1932 (*pos)++;
1933 gi->hash++;
1934 if (gi->hash >= GFS2_NR_SBSTATS) {
1935 preempt_enable();
1936 return NULL;
1937 }
1938 return SEQ_START_TOKEN;
1939}
1940
1941static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
1942{
1943 preempt_enable();
1944}
1945
1825static const struct seq_operations gfs2_glock_seq_ops = { 1946static const struct seq_operations gfs2_glock_seq_ops = {
1826 .start = gfs2_glock_seq_start, 1947 .start = gfs2_glock_seq_start,
1827 .next = gfs2_glock_seq_next, 1948 .next = gfs2_glock_seq_next,
@@ -1829,7 +1950,21 @@ static const struct seq_operations gfs2_glock_seq_ops = {
1829 .show = gfs2_glock_seq_show, 1950 .show = gfs2_glock_seq_show,
1830}; 1951};
1831 1952
1832static int gfs2_debugfs_open(struct inode *inode, struct file *file) 1953static const struct seq_operations gfs2_glstats_seq_ops = {
1954 .start = gfs2_glock_seq_start,
1955 .next = gfs2_glock_seq_next,
1956 .stop = gfs2_glock_seq_stop,
1957 .show = gfs2_glstats_seq_show,
1958};
1959
1960static const struct seq_operations gfs2_sbstats_seq_ops = {
1961 .start = gfs2_sbstats_seq_start,
1962 .next = gfs2_sbstats_seq_next,
1963 .stop = gfs2_sbstats_seq_stop,
1964 .show = gfs2_sbstats_seq_show,
1965};
1966
1967static int gfs2_glocks_open(struct inode *inode, struct file *file)
1833{ 1968{
1834 int ret = seq_open_private(file, &gfs2_glock_seq_ops, 1969 int ret = seq_open_private(file, &gfs2_glock_seq_ops,
1835 sizeof(struct gfs2_glock_iter)); 1970 sizeof(struct gfs2_glock_iter));
@@ -1841,9 +1976,49 @@ static int gfs2_debugfs_open(struct inode *inode, struct file *file)
1841 return ret; 1976 return ret;
1842} 1977}
1843 1978
1844static const struct file_operations gfs2_debug_fops = { 1979static int gfs2_glstats_open(struct inode *inode, struct file *file)
1980{
1981 int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
1982 sizeof(struct gfs2_glock_iter));
1983 if (ret == 0) {
1984 struct seq_file *seq = file->private_data;
1985 struct gfs2_glock_iter *gi = seq->private;
1986 gi->sdp = inode->i_private;
1987 }
1988 return ret;
1989}
1990
1991static int gfs2_sbstats_open(struct inode *inode, struct file *file)
1992{
1993 int ret = seq_open_private(file, &gfs2_sbstats_seq_ops,
1994 sizeof(struct gfs2_glock_iter));
1995 if (ret == 0) {
1996 struct seq_file *seq = file->private_data;
1997 struct gfs2_glock_iter *gi = seq->private;
1998 gi->sdp = inode->i_private;
1999 }
2000 return ret;
2001}
2002
2003static const struct file_operations gfs2_glocks_fops = {
2004 .owner = THIS_MODULE,
2005 .open = gfs2_glocks_open,
2006 .read = seq_read,
2007 .llseek = seq_lseek,
2008 .release = seq_release_private,
2009};
2010
2011static const struct file_operations gfs2_glstats_fops = {
1845 .owner = THIS_MODULE, 2012 .owner = THIS_MODULE,
1846 .open = gfs2_debugfs_open, 2013 .open = gfs2_glstats_open,
2014 .read = seq_read,
2015 .llseek = seq_lseek,
2016 .release = seq_release_private,
2017};
2018
2019static const struct file_operations gfs2_sbstats_fops = {
2020 .owner = THIS_MODULE,
2021 .open = gfs2_sbstats_open,
1847 .read = seq_read, 2022 .read = seq_read,
1848 .llseek = seq_lseek, 2023 .llseek = seq_lseek,
1849 .release = seq_release_private, 2024 .release = seq_release_private,
@@ -1857,20 +2032,45 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
1857 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2032 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
1858 S_IFREG | S_IRUGO, 2033 S_IFREG | S_IRUGO,
1859 sdp->debugfs_dir, sdp, 2034 sdp->debugfs_dir, sdp,
1860 &gfs2_debug_fops); 2035 &gfs2_glocks_fops);
1861 if (!sdp->debugfs_dentry_glocks) 2036 if (!sdp->debugfs_dentry_glocks)
1862 return -ENOMEM; 2037 goto fail;
2038
2039 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
2040 S_IFREG | S_IRUGO,
2041 sdp->debugfs_dir, sdp,
2042 &gfs2_glstats_fops);
2043 if (!sdp->debugfs_dentry_glstats)
2044 goto fail;
2045
2046 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
2047 S_IFREG | S_IRUGO,
2048 sdp->debugfs_dir, sdp,
2049 &gfs2_sbstats_fops);
2050 if (!sdp->debugfs_dentry_sbstats)
2051 goto fail;
1863 2052
1864 return 0; 2053 return 0;
2054fail:
2055 gfs2_delete_debugfs_file(sdp);
2056 return -ENOMEM;
1865} 2057}
1866 2058
1867void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2059void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
1868{ 2060{
1869 if (sdp && sdp->debugfs_dir) { 2061 if (sdp->debugfs_dir) {
1870 if (sdp->debugfs_dentry_glocks) { 2062 if (sdp->debugfs_dentry_glocks) {
1871 debugfs_remove(sdp->debugfs_dentry_glocks); 2063 debugfs_remove(sdp->debugfs_dentry_glocks);
1872 sdp->debugfs_dentry_glocks = NULL; 2064 sdp->debugfs_dentry_glocks = NULL;
1873 } 2065 }
2066 if (sdp->debugfs_dentry_glstats) {
2067 debugfs_remove(sdp->debugfs_dentry_glstats);
2068 sdp->debugfs_dentry_glstats = NULL;
2069 }
2070 if (sdp->debugfs_dentry_sbstats) {
2071 debugfs_remove(sdp->debugfs_dentry_sbstats);
2072 sdp->debugfs_dentry_sbstats = NULL;
2073 }
1874 debugfs_remove(sdp->debugfs_dir); 2074 debugfs_remove(sdp->debugfs_dir);
1875 sdp->debugfs_dir = NULL; 2075 sdp->debugfs_dir = NULL;
1876 } 2076 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 97742a7ea9cc..47d0bda5ac2b 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -19,6 +19,8 @@
19#include <linux/rculist_bl.h> 19#include <linux/rculist_bl.h>
20#include <linux/completion.h> 20#include <linux/completion.h>
21#include <linux/rbtree.h> 21#include <linux/rbtree.h>
22#include <linux/ktime.h>
23#include <linux/percpu.h>
22 24
23#define DIO_WAIT 0x00000010 25#define DIO_WAIT 0x00000010
24#define DIO_METADATA 0x00000020 26#define DIO_METADATA 0x00000020
@@ -205,6 +207,22 @@ struct gfs2_glock_operations {
205}; 207};
206 208
207enum { 209enum {
210 GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */
211 GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */
212 GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */
213 GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */
214 GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */
215 GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */
216 GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */
217 GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */
218 GFS2_NR_LKSTATS
219};
220
221struct gfs2_lkstats {
222 s64 stats[GFS2_NR_LKSTATS];
223};
224
225enum {
208 /* States */ 226 /* States */
209 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ 227 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
210 HIF_FIRST = 7, 228 HIF_FIRST = 7,
@@ -238,10 +256,12 @@ enum {
238 GLF_QUEUED = 12, 256 GLF_QUEUED = 12,
239 GLF_LRU = 13, 257 GLF_LRU = 13,
240 GLF_OBJECT = 14, /* Used only for tracing */ 258 GLF_OBJECT = 14, /* Used only for tracing */
259 GLF_BLOCKING = 15,
241}; 260};
242 261
243struct gfs2_glock { 262struct gfs2_glock {
244 struct hlist_bl_node gl_list; 263 struct hlist_bl_node gl_list;
264 struct gfs2_sbd *gl_sbd;
245 unsigned long gl_flags; /* GLF_... */ 265 unsigned long gl_flags; /* GLF_... */
246 struct lm_lockname gl_name; 266 struct lm_lockname gl_name;
247 atomic_t gl_ref; 267 atomic_t gl_ref;
@@ -261,16 +281,14 @@ struct gfs2_glock {
261 struct list_head gl_holders; 281 struct list_head gl_holders;
262 282
263 const struct gfs2_glock_operations *gl_ops; 283 const struct gfs2_glock_operations *gl_ops;
264 char gl_strname[GDLM_STRNAME_BYTES]; 284 ktime_t gl_dstamp;
285 struct gfs2_lkstats gl_stats;
265 struct dlm_lksb gl_lksb; 286 struct dlm_lksb gl_lksb;
266 char gl_lvb[32]; 287 char gl_lvb[32];
267 unsigned long gl_tchange; 288 unsigned long gl_tchange;
268 void *gl_object; 289 void *gl_object;
269 290
270 struct list_head gl_lru; 291 struct list_head gl_lru;
271
272 struct gfs2_sbd *gl_sbd;
273
274 struct list_head gl_ail_list; 292 struct list_head gl_ail_list;
275 atomic_t gl_ail_count; 293 atomic_t gl_ail_count;
276 atomic_t gl_revokes; 294 atomic_t gl_revokes;
@@ -560,8 +578,14 @@ struct lm_lockstruct {
560 uint32_t *ls_recover_result; /* result of last jid recovery */ 578 uint32_t *ls_recover_result; /* result of last jid recovery */
561}; 579};
562 580
581struct gfs2_pcpu_lkstats {
582 /* One struct for each glock type */
583 struct gfs2_lkstats lkstats[10];
584};
585
563struct gfs2_sbd { 586struct gfs2_sbd {
564 struct super_block *sd_vfs; 587 struct super_block *sd_vfs;
588 struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
565 struct kobject sd_kobj; 589 struct kobject sd_kobj;
566 unsigned long sd_flags; /* SDF_... */ 590 unsigned long sd_flags; /* SDF_... */
567 struct gfs2_sb_host sd_sb; 591 struct gfs2_sb_host sd_sb;
@@ -620,7 +644,6 @@ struct gfs2_sbd {
620 644
621 int sd_rindex_uptodate; 645 int sd_rindex_uptodate;
622 spinlock_t sd_rindex_spin; 646 spinlock_t sd_rindex_spin;
623 struct mutex sd_rindex_mutex;
624 struct rb_root sd_rindex_tree; 647 struct rb_root sd_rindex_tree;
625 unsigned int sd_rgrps; 648 unsigned int sd_rgrps;
626 unsigned int sd_max_rg_data; 649 unsigned int sd_max_rg_data;
@@ -725,8 +748,23 @@ struct gfs2_sbd {
725 748
726 unsigned long sd_last_warning; 749 unsigned long sd_last_warning;
727 struct dentry *debugfs_dir; /* debugfs directory */ 750 struct dentry *debugfs_dir; /* debugfs directory */
728 struct dentry *debugfs_dentry_glocks; /* for debugfs */ 751 struct dentry *debugfs_dentry_glocks;
752 struct dentry *debugfs_dentry_glstats;
753 struct dentry *debugfs_dentry_sbstats;
729}; 754};
730 755
756static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
757{
758 gl->gl_stats.stats[which]++;
759}
760
761static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
762{
763 const struct gfs2_sbd *sdp = gl->gl_sbd;
764 preempt_disable();
765 this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++;
766 preempt_enable();
767}
768
731#endif /* __INCORE_DOT_H__ */ 769#endif /* __INCORE_DOT_H__ */
732 770
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a7d611b93f0f..c98a60ee6dfd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -391,10 +391,6 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
391 int error; 391 int error;
392 int dblocks = 1; 392 int dblocks = 1;
393 393
394 error = gfs2_rindex_update(sdp);
395 if (error)
396 fs_warn(sdp, "rindex update returns %d\n", error);
397
398 error = gfs2_inplace_reserve(dip, RES_DINODE); 394 error = gfs2_inplace_reserve(dip, RES_DINODE);
399 if (error) 395 if (error)
400 goto out; 396 goto out;
@@ -1040,9 +1036,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1040 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1036 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1041 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 1037 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1042 1038
1043 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1039 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1044 if (!rgd) 1040 if (!rgd)
1045 goto out_inodes; 1041 goto out_inodes;
1042
1046 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 1043 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1047 1044
1048 1045
@@ -1258,7 +1255,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1258 * this is the case of the target file already existing 1255 * this is the case of the target file already existing
1259 * so we unlink before doing the rename 1256 * so we unlink before doing the rename
1260 */ 1257 */
1261 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); 1258 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
1262 if (nrgd) 1259 if (nrgd)
1263 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 1260 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1264 } 1261 }
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8944d1e32ab5..f8411bd1b805 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -18,14 +18,106 @@
18#include "glock.h" 18#include "glock.h"
19#include "util.h" 19#include "util.h"
20#include "sys.h" 20#include "sys.h"
21#include "trace_gfs2.h"
21 22
22extern struct workqueue_struct *gfs2_control_wq; 23extern struct workqueue_struct *gfs2_control_wq;
23 24
25/**
26 * gfs2_update_stats - Update time based stats
27 * @mv: Pointer to mean/variance structure to update
28 * @sample: New data to include
29 *
30 * @delta is the difference between the current rtt sample and the
31 * running average srtt. We add 1/8 of that to the srtt in order to
32 * update the current srtt estimate. The varience estimate is a bit
33 * more complicated. We subtract the abs value of the @delta from
34 * the current variance estimate and add 1/4 of that to the running
35 * total.
36 *
37 * Note that the index points at the array entry containing the smoothed
38 * mean value, and the variance is always in the following entry
39 *
40 * Reference: TCP/IP Illustrated, vol 2, p. 831,832
41 * All times are in units of integer nanoseconds. Unlike the TCP/IP case,
42 * they are not scaled fixed point.
43 */
44
45static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
46 s64 sample)
47{
48 s64 delta = sample - s->stats[index];
49 s->stats[index] += (delta >> 3);
50 index++;
51 s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
52}
53
54/**
55 * gfs2_update_reply_times - Update locking statistics
56 * @gl: The glock to update
57 *
58 * This assumes that gl->gl_dstamp has been set earlier.
59 *
60 * The rtt (lock round trip time) is an estimate of the time
61 * taken to perform a dlm lock request. We update it on each
62 * reply from the dlm.
63 *
64 * The blocking flag is set on the glock for all dlm requests
65 * which may potentially block due to lock requests from other nodes.
66 * DLM requests where the current lock state is exclusive, the
67 * requested state is null (or unlocked) or where the TRY or
68 * TRY_1CB flags are set are classified as non-blocking. All
69 * other DLM requests are counted as (potentially) blocking.
70 */
71static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
72{
73 struct gfs2_pcpu_lkstats *lks;
74 const unsigned gltype = gl->gl_name.ln_type;
75 unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
76 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
77 s64 rtt;
78
79 preempt_disable();
80 rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
81 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
82 gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */
83 gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */
84 preempt_enable();
85
86 trace_gfs2_glock_lock_time(gl, rtt);
87}
88
89/**
90 * gfs2_update_request_times - Update locking statistics
91 * @gl: The glock to update
92 *
93 * The irt (lock inter-request times) measures the average time
94 * between requests to the dlm. It is updated immediately before
95 * each dlm call.
96 */
97
98static inline void gfs2_update_request_times(struct gfs2_glock *gl)
99{
100 struct gfs2_pcpu_lkstats *lks;
101 const unsigned gltype = gl->gl_name.ln_type;
102 ktime_t dstamp;
103 s64 irt;
104
105 preempt_disable();
106 dstamp = gl->gl_dstamp;
107 gl->gl_dstamp = ktime_get_real();
108 irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
109 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
110 gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */
111 gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */
112 preempt_enable();
113}
114
24static void gdlm_ast(void *arg) 115static void gdlm_ast(void *arg)
25{ 116{
26 struct gfs2_glock *gl = arg; 117 struct gfs2_glock *gl = arg;
27 unsigned ret = gl->gl_state; 118 unsigned ret = gl->gl_state;
28 119
120 gfs2_update_reply_times(gl);
29 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
30 122
31 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) 123 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
@@ -111,7 +203,7 @@ static int make_mode(const unsigned int lmstate)
111static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, 203static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
112 const int req) 204 const int req)
113{ 205{
114 u32 lkf = 0; 206 u32 lkf = DLM_LKF_VALBLK;
115 207
116 if (gfs_flags & LM_FLAG_TRY) 208 if (gfs_flags & LM_FLAG_TRY)
117 lkf |= DLM_LKF_NOQUEUE; 209 lkf |= DLM_LKF_NOQUEUE;
@@ -138,26 +230,43 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
138 if (lkid != 0) 230 if (lkid != 0)
139 lkf |= DLM_LKF_CONVERT; 231 lkf |= DLM_LKF_CONVERT;
140 232
141 lkf |= DLM_LKF_VALBLK;
142
143 return lkf; 233 return lkf;
144} 234}
145 235
236static void gfs2_reverse_hex(char *c, u64 value)
237{
238 while (value) {
239 *c-- = hex_asc[value & 0x0f];
240 value >>= 4;
241 }
242}
243
146static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, 244static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
147 unsigned int flags) 245 unsigned int flags)
148{ 246{
149 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 247 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
150 int req; 248 int req;
151 u32 lkf; 249 u32 lkf;
250 char strname[GDLM_STRNAME_BYTES] = "";
152 251
153 req = make_mode(req_state); 252 req = make_mode(req_state);
154 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); 253 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
155 254 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
255 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
256 if (gl->gl_lksb.sb_lkid) {
257 gfs2_update_request_times(gl);
258 } else {
259 memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
260 strname[GDLM_STRNAME_BYTES - 1] = '\0';
261 gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
262 gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
263 gl->gl_dstamp = ktime_get_real();
264 }
156 /* 265 /*
157 * Submit the actual lock request. 266 * Submit the actual lock request.
158 */ 267 */
159 268
160 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, 269 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
161 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 270 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
162} 271}
163 272
@@ -172,6 +281,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
172 return; 281 return;
173 } 282 }
174 283
284 clear_bit(GLF_BLOCKING, &gl->gl_flags);
285 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
286 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
287 gfs2_update_request_times(gl);
175 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 288 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
176 NULL, gl); 289 NULL, gl);
177 if (error) { 290 if (error) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 756fae9eaf8f..4752eadc7f6e 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -19,6 +19,7 @@
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h> 21#include <linux/writeback.h>
22#include <linux/list_sort.h>
22 23
23#include "gfs2.h" 24#include "gfs2.h"
24#include "incore.h" 25#include "incore.h"
@@ -358,7 +359,7 @@ retry:
358 return 0; 359 return 0;
359} 360}
360 361
361static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) 362u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
362{ 363{
363 struct gfs2_journal_extent *je; 364 struct gfs2_journal_extent *je;
364 365
@@ -467,8 +468,8 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
467 468
468void gfs2_log_incr_head(struct gfs2_sbd *sdp) 469void gfs2_log_incr_head(struct gfs2_sbd *sdp)
469{ 470{
470 if (sdp->sd_log_flush_head == sdp->sd_log_tail) 471 BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
471 BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head); 472 (sdp->sd_log_flush_head != sdp->sd_log_head));
472 473
473 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { 474 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
474 sdp->sd_log_flush_head = 0; 475 sdp->sd_log_flush_head = 0;
@@ -476,99 +477,6 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp)
476 } 477 }
477} 478}
478 479
479/**
480 * gfs2_log_write_endio - End of I/O for a log buffer
481 * @bh: The buffer head
482 * @uptodate: I/O Status
483 *
484 */
485
486static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
487{
488 struct gfs2_sbd *sdp = bh->b_private;
489 bh->b_private = NULL;
490
491 end_buffer_write_sync(bh, uptodate);
492 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
493 wake_up(&sdp->sd_log_flush_wait);
494}
495
496/**
497 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
498 * @sdp: The GFS2 superblock
499 *
500 * Returns: the buffer_head
501 */
502
503struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
504{
505 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
506 struct buffer_head *bh;
507
508 bh = sb_getblk(sdp->sd_vfs, blkno);
509 lock_buffer(bh);
510 memset(bh->b_data, 0, bh->b_size);
511 set_buffer_uptodate(bh);
512 clear_buffer_dirty(bh);
513 gfs2_log_incr_head(sdp);
514 atomic_inc(&sdp->sd_log_in_flight);
515 bh->b_private = sdp;
516 bh->b_end_io = gfs2_log_write_endio;
517
518 return bh;
519}
520
521/**
522 * gfs2_fake_write_endio -
523 * @bh: The buffer head
524 * @uptodate: The I/O Status
525 *
526 */
527
528static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
529{
530 struct buffer_head *real_bh = bh->b_private;
531 struct gfs2_bufdata *bd = real_bh->b_private;
532 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
533
534 end_buffer_write_sync(bh, uptodate);
535 free_buffer_head(bh);
536 unlock_buffer(real_bh);
537 brelse(real_bh);
538 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
539 wake_up(&sdp->sd_log_flush_wait);
540}
541
542/**
543 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
544 * @sdp: the filesystem
545 * @data: the data the buffer_head should point to
546 *
547 * Returns: the log buffer descriptor
548 */
549
550struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
551 struct buffer_head *real)
552{
553 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
554 struct buffer_head *bh;
555
556 bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
557 atomic_set(&bh->b_count, 1);
558 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
559 set_bh_page(bh, real->b_page, bh_offset(real));
560 bh->b_blocknr = blkno;
561 bh->b_size = sdp->sd_sb.sb_bsize;
562 bh->b_bdev = sdp->sd_vfs->s_bdev;
563 bh->b_private = real;
564 bh->b_end_io = gfs2_fake_write_endio;
565
566 gfs2_log_incr_head(sdp);
567 atomic_inc(&sdp->sd_log_in_flight);
568
569 return bh;
570}
571
572static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) 480static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
573{ 481{
574 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); 482 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
@@ -583,66 +491,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
583 sdp->sd_log_tail = new_tail; 491 sdp->sd_log_tail = new_tail;
584} 492}
585 493
586/**
587 * log_write_header - Get and initialize a journal header buffer
588 * @sdp: The GFS2 superblock
589 *
590 * Returns: the initialized log buffer descriptor
591 */
592 494
593static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) 495static void log_flush_wait(struct gfs2_sbd *sdp)
594{
595 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
596 struct buffer_head *bh;
597 struct gfs2_log_header *lh;
598 unsigned int tail;
599 u32 hash;
600
601 bh = sb_getblk(sdp->sd_vfs, blkno);
602 lock_buffer(bh);
603 memset(bh->b_data, 0, bh->b_size);
604 set_buffer_uptodate(bh);
605 clear_buffer_dirty(bh);
606
607 gfs2_ail1_empty(sdp);
608 tail = current_tail(sdp);
609
610 lh = (struct gfs2_log_header *)bh->b_data;
611 memset(lh, 0, sizeof(struct gfs2_log_header));
612 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
613 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
614 lh->lh_header.__pad0 = cpu_to_be64(0);
615 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
616 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
617 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
618 lh->lh_flags = cpu_to_be32(flags);
619 lh->lh_tail = cpu_to_be32(tail);
620 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
621 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
622 lh->lh_hash = cpu_to_be32(hash);
623
624 bh->b_end_io = end_buffer_write_sync;
625 get_bh(bh);
626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
628 else
629 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
630 wait_on_buffer(bh);
631
632 if (!buffer_uptodate(bh))
633 gfs2_io_error_bh(sdp, bh);
634 brelse(bh);
635
636 if (sdp->sd_log_tail != tail)
637 log_pull_tail(sdp, tail);
638 else
639 gfs2_assert_withdraw(sdp, !pull);
640
641 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
642 gfs2_log_incr_head(sdp);
643}
644
645static void log_flush_commit(struct gfs2_sbd *sdp)
646{ 496{
647 DEFINE_WAIT(wait); 497 DEFINE_WAIT(wait);
648 498
@@ -655,8 +505,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
655 } while(atomic_read(&sdp->sd_log_in_flight)); 505 } while(atomic_read(&sdp->sd_log_in_flight));
656 finish_wait(&sdp->sd_log_flush_wait, &wait); 506 finish_wait(&sdp->sd_log_flush_wait, &wait);
657 } 507 }
508}
509
510static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
511{
512 struct gfs2_bufdata *bda, *bdb;
658 513
659 log_write_header(sdp, 0, 0); 514 bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list);
515 bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list);
516
517 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
518 return -1;
519 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
520 return 1;
521 return 0;
660} 522}
661 523
662static void gfs2_ordered_write(struct gfs2_sbd *sdp) 524static void gfs2_ordered_write(struct gfs2_sbd *sdp)
@@ -666,6 +528,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
666 LIST_HEAD(written); 528 LIST_HEAD(written);
667 529
668 gfs2_log_lock(sdp); 530 gfs2_log_lock(sdp);
531 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
669 while (!list_empty(&sdp->sd_log_le_ordered)) { 532 while (!list_empty(&sdp->sd_log_le_ordered)) {
670 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); 533 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
671 list_move(&bd->bd_le.le_list, &written); 534 list_move(&bd->bd_le.le_list, &written);
@@ -711,6 +574,68 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
711} 574}
712 575
713/** 576/**
577 * log_write_header - Get and initialize a journal header buffer
578 * @sdp: The GFS2 superblock
579 *
580 * Returns: the initialized log buffer descriptor
581 */
582
583static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
584{
585 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
586 struct buffer_head *bh;
587 struct gfs2_log_header *lh;
588 unsigned int tail;
589 u32 hash;
590
591 bh = sb_getblk(sdp->sd_vfs, blkno);
592 lock_buffer(bh);
593 memset(bh->b_data, 0, bh->b_size);
594 set_buffer_uptodate(bh);
595 clear_buffer_dirty(bh);
596
597 gfs2_ail1_empty(sdp);
598 tail = current_tail(sdp);
599
600 lh = (struct gfs2_log_header *)bh->b_data;
601 memset(lh, 0, sizeof(struct gfs2_log_header));
602 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
603 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
604 lh->lh_header.__pad0 = cpu_to_be64(0);
605 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
606 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
607 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
608 lh->lh_flags = cpu_to_be32(flags);
609 lh->lh_tail = cpu_to_be32(tail);
610 lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
611 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
612 lh->lh_hash = cpu_to_be32(hash);
613
614 bh->b_end_io = end_buffer_write_sync;
615 get_bh(bh);
616 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
617 gfs2_ordered_wait(sdp);
618 log_flush_wait(sdp);
619 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
620 } else {
621 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
622 }
623 wait_on_buffer(bh);
624
625 if (!buffer_uptodate(bh))
626 gfs2_io_error_bh(sdp, bh);
627 brelse(bh);
628
629 if (sdp->sd_log_tail != tail)
630 log_pull_tail(sdp, tail);
631 else
632 gfs2_assert_withdraw(sdp, !pull);
633
634 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
635 gfs2_log_incr_head(sdp);
636}
637
638/**
714 * gfs2_log_flush - flush incore transaction(s) 639 * gfs2_log_flush - flush incore transaction(s)
715 * @sdp: the filesystem 640 * @sdp: the filesystem
716 * @gl: The glock structure to flush. If NULL, flush the whole incore log 641 * @gl: The glock structure to flush. If NULL, flush the whole incore log
@@ -753,11 +678,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
753 678
754 gfs2_ordered_write(sdp); 679 gfs2_ordered_write(sdp);
755 lops_before_commit(sdp); 680 lops_before_commit(sdp);
756 gfs2_ordered_wait(sdp);
757 681
758 if (sdp->sd_log_head != sdp->sd_log_flush_head) 682 if (sdp->sd_log_head != sdp->sd_log_flush_head) {
759 log_flush_commit(sdp); 683 log_write_header(sdp, 0, 0);
760 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 684 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
761 gfs2_log_lock(sdp); 685 gfs2_log_lock(sdp);
762 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 686 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
763 trace_gfs2_log_blocks(sdp, -1); 687 trace_gfs2_log_blocks(sdp, -1);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index ab0621698b73..ff07454b582c 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -53,10 +53,7 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
53 53
54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 54extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
55extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); 55extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
56 56extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn);
57extern struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
58extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59 struct buffer_head *real);
60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 57extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 58extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 59extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 0301be655b12..6b1efb594d90 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -12,6 +12,7 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/mempool.h>
15#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
16#include <linux/bio.h> 17#include <linux/bio.h>
17#include <linux/fs.h> 18#include <linux/fs.h>
@@ -76,7 +77,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
76 if (bi->bi_clone == 0) 77 if (bi->bi_clone == 0)
77 return; 78 return;
78 if (sdp->sd_args.ar_discard) 79 if (sdp->sd_args.ar_discard)
79 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi); 80 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
80 memcpy(bi->bi_clone + bi->bi_offset, 81 memcpy(bi->bi_clone + bi->bi_offset,
81 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); 82 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
82 clear_bit(GBF_FULL, &bi->bi_flags); 83 clear_bit(GBF_FULL, &bi->bi_flags);
@@ -143,6 +144,98 @@ static inline __be64 *bh_ptr_end(struct buffer_head *bh)
143 return (__force __be64 *)(bh->b_data + bh->b_size); 144 return (__force __be64 *)(bh->b_data + bh->b_size);
144} 145}
145 146
147/**
148 * gfs2_log_write_endio - End of I/O for a log buffer
149 * @bh: The buffer head
150 * @uptodate: I/O Status
151 *
152 */
153
154static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
155{
156 struct gfs2_sbd *sdp = bh->b_private;
157 bh->b_private = NULL;
158
159 end_buffer_write_sync(bh, uptodate);
160 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
161 wake_up(&sdp->sd_log_flush_wait);
162}
163
164/**
165 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
166 * @sdp: The GFS2 superblock
167 *
168 * tReturns: the buffer_head
169 */
170
171static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
172{
173 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
174 struct buffer_head *bh;
175
176 bh = sb_getblk(sdp->sd_vfs, blkno);
177 lock_buffer(bh);
178 memset(bh->b_data, 0, bh->b_size);
179 set_buffer_uptodate(bh);
180 clear_buffer_dirty(bh);
181 gfs2_log_incr_head(sdp);
182 atomic_inc(&sdp->sd_log_in_flight);
183 bh->b_private = sdp;
184 bh->b_end_io = gfs2_log_write_endio;
185
186 return bh;
187}
188
189/**
190 * gfs2_fake_write_endio -
191 * @bh: The buffer head
192 * @uptodate: The I/O Status
193 *
194 */
195
196static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
197{
198 struct buffer_head *real_bh = bh->b_private;
199 struct gfs2_bufdata *bd = real_bh->b_private;
200 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
201
202 end_buffer_write_sync(bh, uptodate);
203 mempool_free(bh, gfs2_bh_pool);
204 unlock_buffer(real_bh);
205 brelse(real_bh);
206 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
207 wake_up(&sdp->sd_log_flush_wait);
208}
209
210/**
211 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
212 * @sdp: the filesystem
213 * @data: the data the buffer_head should point to
214 *
215 * Returns: the log buffer descriptor
216 */
217
218static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
219 struct buffer_head *real)
220{
221 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
222 struct buffer_head *bh;
223
224 bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS);
225 atomic_set(&bh->b_count, 1);
226 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
227 set_bh_page(bh, real->b_page, bh_offset(real));
228 bh->b_blocknr = blkno;
229 bh->b_size = sdp->sd_sb.sb_bsize;
230 bh->b_bdev = sdp->sd_vfs->s_bdev;
231 bh->b_private = real;
232 bh->b_end_io = gfs2_fake_write_endio;
233
234 gfs2_log_incr_head(sdp);
235 atomic_inc(&sdp->sd_log_in_flight);
236
237 return bh;
238}
146 239
147static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) 240static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
148{ 241{
@@ -553,11 +646,11 @@ static void gfs2_check_magic(struct buffer_head *bh)
553 __be32 *ptr; 646 __be32 *ptr;
554 647
555 clear_buffer_escaped(bh); 648 clear_buffer_escaped(bh);
556 kaddr = kmap_atomic(bh->b_page, KM_USER0); 649 kaddr = kmap_atomic(bh->b_page);
557 ptr = kaddr + bh_offset(bh); 650 ptr = kaddr + bh_offset(bh);
558 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 651 if (*ptr == cpu_to_be32(GFS2_MAGIC))
559 set_buffer_escaped(bh); 652 set_buffer_escaped(bh);
560 kunmap_atomic(kaddr, KM_USER0); 653 kunmap_atomic(kaddr);
561} 654}
562 655
563static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, 656static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -594,10 +687,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
594 if (buffer_escaped(bd->bd_bh)) { 687 if (buffer_escaped(bd->bd_bh)) {
595 void *kaddr; 688 void *kaddr;
596 bh1 = gfs2_log_get_buf(sdp); 689 bh1 = gfs2_log_get_buf(sdp);
597 kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); 690 kaddr = kmap_atomic(bd->bd_bh->b_page);
598 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), 691 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
599 bh1->b_size); 692 bh1->b_size);
600 kunmap_atomic(kaddr, KM_USER0); 693 kunmap_atomic(kaddr);
601 *(__be32 *)bh1->b_data = 0; 694 *(__be32 *)bh1->b_data = 0;
602 clear_buffer_escaped(bd->bd_bh); 695 clear_buffer_escaped(bd->bd_bh);
603 unlock_buffer(bd->bd_bh); 696 unlock_buffer(bd->bd_bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a8d9bcd0e19c..754426b1e52c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -17,6 +17,7 @@
17#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h> 18#include <linux/rculist_bl.h>
19#include <linux/atomic.h> 19#include <linux/atomic.h>
20#include <linux/mempool.h>
20 21
21#include "gfs2.h" 22#include "gfs2.h"
22#include "incore.h" 23#include "incore.h"
@@ -69,6 +70,16 @@ static void gfs2_init_gl_aspace_once(void *foo)
69 address_space_init_once(mapping); 70 address_space_init_once(mapping);
70} 71}
71 72
73static void *gfs2_bh_alloc(gfp_t mask, void *data)
74{
75 return alloc_buffer_head(mask);
76}
77
78static void gfs2_bh_free(void *ptr, void *data)
79{
80 return free_buffer_head(ptr);
81}
82
72/** 83/**
73 * init_gfs2_fs - Register GFS2 as a filesystem 84 * init_gfs2_fs - Register GFS2 as a filesystem
74 * 85 *
@@ -151,6 +162,10 @@ static int __init init_gfs2_fs(void)
151 gfs2_control_wq = alloc_workqueue("gfs2_control", 162 gfs2_control_wq = alloc_workqueue("gfs2_control",
152 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); 163 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
153 if (!gfs2_control_wq) 164 if (!gfs2_control_wq)
165 goto fail_recovery;
166
167 gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL);
168 if (!gfs2_bh_pool)
154 goto fail_control; 169 goto fail_control;
155 170
156 gfs2_register_debugfs(); 171 gfs2_register_debugfs();
@@ -160,6 +175,8 @@ static int __init init_gfs2_fs(void)
160 return 0; 175 return 0;
161 176
162fail_control: 177fail_control:
178 destroy_workqueue(gfs2_control_wq);
179fail_recovery:
163 destroy_workqueue(gfs_recovery_wq); 180 destroy_workqueue(gfs_recovery_wq);
164fail_wq: 181fail_wq:
165 unregister_filesystem(&gfs2meta_fs_type); 182 unregister_filesystem(&gfs2meta_fs_type);
@@ -208,6 +225,7 @@ static void __exit exit_gfs2_fs(void)
208 225
209 rcu_barrier(); 226 rcu_barrier();
210 227
228 mempool_destroy(gfs2_bh_pool);
211 kmem_cache_destroy(gfs2_quotad_cachep); 229 kmem_cache_destroy(gfs2_quotad_cachep);
212 kmem_cache_destroy(gfs2_rgrpd_cachep); 230 kmem_cache_destroy(gfs2_rgrpd_cachep);
213 kmem_cache_destroy(gfs2_bufdata_cachep); 231 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6aacf3f230a2..6f3a18f9e176 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -68,6 +68,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
68 68
69 sb->s_fs_info = sdp; 69 sb->s_fs_info = sdp;
70 sdp->sd_vfs = sb; 70 sdp->sd_vfs = sb;
71 sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
72 if (!sdp->sd_lkstats) {
73 kfree(sdp);
74 return NULL;
75 }
76
71 set_bit(SDF_NOJOURNALID, &sdp->sd_flags); 77 set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
72 gfs2_tune_init(&sdp->sd_tune); 78 gfs2_tune_init(&sdp->sd_tune);
73 79
@@ -77,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
77 spin_lock_init(&sdp->sd_statfs_spin); 83 spin_lock_init(&sdp->sd_statfs_spin);
78 84
79 spin_lock_init(&sdp->sd_rindex_spin); 85 spin_lock_init(&sdp->sd_rindex_spin);
80 mutex_init(&sdp->sd_rindex_mutex);
81 sdp->sd_rindex_tree.rb_node = NULL; 86 sdp->sd_rindex_tree.rb_node = NULL;
82 87
83 INIT_LIST_HEAD(&sdp->sd_jindex_list); 88 INIT_LIST_HEAD(&sdp->sd_jindex_list);
@@ -431,10 +436,9 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
431 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 436 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
432 return PTR_ERR(inode); 437 return PTR_ERR(inode);
433 } 438 }
434 dentry = d_alloc_root(inode); 439 dentry = d_make_root(inode);
435 if (!dentry) { 440 if (!dentry) {
436 fs_err(sdp, "can't alloc %s dentry\n", name); 441 fs_err(sdp, "can't alloc %s dentry\n", name);
437 iput(inode);
438 return -ENOMEM; 442 return -ENOMEM;
439 } 443 }
440 *dptr = dentry; 444 *dptr = dentry;
@@ -800,6 +804,11 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
800 fs_err(sdp, "can't get quota file inode: %d\n", error); 804 fs_err(sdp, "can't get quota file inode: %d\n", error);
801 goto fail_rindex; 805 goto fail_rindex;
802 } 806 }
807
808 error = gfs2_rindex_update(sdp);
809 if (error)
810 goto fail_qinode;
811
803 return 0; 812 return 0;
804 813
805fail_qinode: 814fail_qinode:
@@ -1216,6 +1225,7 @@ fail_sys:
1216 gfs2_sys_fs_del(sdp); 1225 gfs2_sys_fs_del(sdp);
1217fail: 1226fail:
1218 gfs2_delete_debugfs_file(sdp); 1227 gfs2_delete_debugfs_file(sdp);
1228 free_percpu(sdp->sd_lkstats);
1219 kfree(sdp); 1229 kfree(sdp);
1220 sb->s_fs_info = NULL; 1230 sb->s_fs_info = NULL;
1221 return error; 1231 return error;
@@ -1388,6 +1398,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1388 shrink_dcache_sb(sb); 1398 shrink_dcache_sb(sb);
1389 kill_block_super(sb); 1399 kill_block_super(sb);
1390 gfs2_delete_debugfs_file(sdp); 1400 gfs2_delete_debugfs_file(sdp);
1401 free_percpu(sdp->sd_lkstats);
1391 kfree(sdp); 1402 kfree(sdp);
1392} 1403}
1393 1404
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a45b21b03915..6019da3dcaed 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -681,7 +681,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
681 ptr = qp; 681 ptr = qp;
682 nbytes = sizeof(struct gfs2_quota); 682 nbytes = sizeof(struct gfs2_quota);
683get_a_page: 683get_a_page:
684 page = grab_cache_page(mapping, index); 684 page = find_or_create_page(mapping, index, GFP_NOFS);
685 if (!page) 685 if (!page)
686 return -ENOMEM; 686 return -ENOMEM;
687 687
@@ -720,12 +720,12 @@ get_a_page:
720 720
721 gfs2_trans_add_bh(ip->i_gl, bh, 0); 721 gfs2_trans_add_bh(ip->i_gl, bh, 0);
722 722
723 kaddr = kmap_atomic(page, KM_USER0); 723 kaddr = kmap_atomic(page);
724 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 724 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
725 nbytes = PAGE_CACHE_SIZE - offset; 725 nbytes = PAGE_CACHE_SIZE - offset;
726 memcpy(kaddr + offset, ptr, nbytes); 726 memcpy(kaddr + offset, ptr, nbytes);
727 flush_dcache_page(page); 727 flush_dcache_page(page);
728 kunmap_atomic(kaddr, KM_USER0); 728 kunmap_atomic(kaddr);
729 unlock_page(page); 729 unlock_page(page);
730 page_cache_release(page); 730 page_cache_release(page);
731 731
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 981bfa32121a..19bde40b4864 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -327,23 +327,34 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
327 * Returns: The resource group, or NULL if not found 327 * Returns: The resource group, or NULL if not found
328 */ 328 */
329 329
330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) 330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
331{ 331{
332 struct rb_node **newn; 332 struct rb_node *n, *next;
333 struct gfs2_rgrpd *cur; 333 struct gfs2_rgrpd *cur;
334 334
335 if (gfs2_rindex_update(sdp))
336 return NULL;
337
335 spin_lock(&sdp->sd_rindex_spin); 338 spin_lock(&sdp->sd_rindex_spin);
336 newn = &sdp->sd_rindex_tree.rb_node; 339 n = sdp->sd_rindex_tree.rb_node;
337 while (*newn) { 340 while (n) {
338 cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node); 341 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
342 next = NULL;
339 if (blk < cur->rd_addr) 343 if (blk < cur->rd_addr)
340 newn = &((*newn)->rb_left); 344 next = n->rb_left;
341 else if (blk >= cur->rd_data0 + cur->rd_data) 345 else if (blk >= cur->rd_data0 + cur->rd_data)
342 newn = &((*newn)->rb_right); 346 next = n->rb_right;
343 else { 347 if (next == NULL) {
344 spin_unlock(&sdp->sd_rindex_spin); 348 spin_unlock(&sdp->sd_rindex_spin);
349 if (exact) {
350 if (blk < cur->rd_addr)
351 return NULL;
352 if (blk >= cur->rd_data0 + cur->rd_data)
353 return NULL;
354 }
345 return cur; 355 return cur;
346 } 356 }
357 n = next;
347 } 358 }
348 spin_unlock(&sdp->sd_rindex_spin); 359 spin_unlock(&sdp->sd_rindex_spin);
349 360
@@ -532,7 +543,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
532 struct file_ra_state ra_state; 543 struct file_ra_state ra_state;
533 int error, rgrps; 544 int error, rgrps;
534 545
535 mutex_lock(&sdp->sd_rindex_mutex);
536 file_ra_state_init(&ra_state, inode->i_mapping); 546 file_ra_state_init(&ra_state, inode->i_mapping);
537 for (rgrps = 0;; rgrps++) { 547 for (rgrps = 0;; rgrps++) {
538 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 548 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -545,11 +555,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
545 break; 555 break;
546 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 556 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
547 } 557 }
548 mutex_unlock(&sdp->sd_rindex_mutex);
549 return total_data; 558 return total_data;
550} 559}
551 560
552static void rgd_insert(struct gfs2_rgrpd *rgd) 561static int rgd_insert(struct gfs2_rgrpd *rgd)
553{ 562{
554 struct gfs2_sbd *sdp = rgd->rd_sbd; 563 struct gfs2_sbd *sdp = rgd->rd_sbd;
555 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 564 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
@@ -565,11 +574,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
565 else if (rgd->rd_addr > cur->rd_addr) 574 else if (rgd->rd_addr > cur->rd_addr)
566 newn = &((*newn)->rb_right); 575 newn = &((*newn)->rb_right);
567 else 576 else
568 return; 577 return -EEXIST;
569 } 578 }
570 579
571 rb_link_node(&rgd->rd_node, parent, newn); 580 rb_link_node(&rgd->rd_node, parent, newn);
572 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 581 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
582 sdp->sd_rgrps++;
583 return 0;
573} 584}
574 585
575/** 586/**
@@ -623,10 +634,12 @@ static int read_rindex_entry(struct gfs2_inode *ip,
623 if (rgd->rd_data > sdp->sd_max_rg_data) 634 if (rgd->rd_data > sdp->sd_max_rg_data)
624 sdp->sd_max_rg_data = rgd->rd_data; 635 sdp->sd_max_rg_data = rgd->rd_data;
625 spin_lock(&sdp->sd_rindex_spin); 636 spin_lock(&sdp->sd_rindex_spin);
626 rgd_insert(rgd); 637 error = rgd_insert(rgd);
627 sdp->sd_rgrps++;
628 spin_unlock(&sdp->sd_rindex_spin); 638 spin_unlock(&sdp->sd_rindex_spin);
629 return error; 639 if (!error)
640 return 0;
641
642 error = 0; /* someone else read in the rgrp; free it and ignore it */
630 643
631fail: 644fail:
632 kfree(rgd->rd_bits); 645 kfree(rgd->rd_bits);
@@ -683,20 +696,22 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
683 struct gfs2_glock *gl = ip->i_gl; 696 struct gfs2_glock *gl = ip->i_gl;
684 struct gfs2_holder ri_gh; 697 struct gfs2_holder ri_gh;
685 int error = 0; 698 int error = 0;
699 int unlock_required = 0;
686 700
687 /* Read new copy from disk if we don't have the latest */ 701 /* Read new copy from disk if we don't have the latest */
688 if (!sdp->sd_rindex_uptodate) { 702 if (!sdp->sd_rindex_uptodate) {
689 mutex_lock(&sdp->sd_rindex_mutex); 703 if (!gfs2_glock_is_locked_by_me(gl)) {
690 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 704 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
691 if (error) 705 if (error)
692 return error; 706 return error;
707 unlock_required = 1;
708 }
693 if (!sdp->sd_rindex_uptodate) 709 if (!sdp->sd_rindex_uptodate)
694 error = gfs2_ri_update(ip); 710 error = gfs2_ri_update(ip);
695 gfs2_glock_dq_uninit(&ri_gh); 711 if (unlock_required)
696 mutex_unlock(&sdp->sd_rindex_mutex); 712 gfs2_glock_dq_uninit(&ri_gh);
697 } 713 }
698 714
699
700 return error; 715 return error;
701} 716}
702 717
@@ -805,9 +820,9 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
805 820
806} 821}
807 822
808void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 823int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
809 struct buffer_head *bh, 824 struct buffer_head *bh,
810 const struct gfs2_bitmap *bi) 825 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
811{ 826{
812 struct super_block *sb = sdp->sd_vfs; 827 struct super_block *sb = sdp->sd_vfs;
813 struct block_device *bdev = sb->s_bdev; 828 struct block_device *bdev = sb->s_bdev;
@@ -818,11 +833,19 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
818 sector_t nr_sects = 0; 833 sector_t nr_sects = 0;
819 int rv; 834 int rv;
820 unsigned int x; 835 unsigned int x;
836 u32 trimmed = 0;
837 u8 diff;
821 838
822 for (x = 0; x < bi->bi_len; x++) { 839 for (x = 0; x < bi->bi_len; x++) {
823 const u8 *orig = bh->b_data + bi->bi_offset + x; 840 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
824 const u8 *clone = bi->bi_clone + bi->bi_offset + x; 841 clone += bi->bi_offset;
825 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 842 clone += x;
843 if (bh) {
844 const u8 *orig = bh->b_data + bi->bi_offset + x;
845 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
846 } else {
847 diff = ~(*clone | (*clone >> 1));
848 }
826 diff &= 0x55; 849 diff &= 0x55;
827 if (diff == 0) 850 if (diff == 0)
828 continue; 851 continue;
@@ -833,11 +856,14 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
833 if (nr_sects == 0) 856 if (nr_sects == 0)
834 goto start_new_extent; 857 goto start_new_extent;
835 if ((start + nr_sects) != blk) { 858 if ((start + nr_sects) != blk) {
836 rv = blkdev_issue_discard(bdev, start, 859 if (nr_sects >= minlen) {
837 nr_sects, GFP_NOFS, 860 rv = blkdev_issue_discard(bdev,
838 0); 861 start, nr_sects,
839 if (rv) 862 GFP_NOFS, 0);
840 goto fail; 863 if (rv)
864 goto fail;
865 trimmed += nr_sects;
866 }
841 nr_sects = 0; 867 nr_sects = 0;
842start_new_extent: 868start_new_extent:
843 start = blk; 869 start = blk;
@@ -848,15 +874,104 @@ start_new_extent:
848 blk += sects_per_blk; 874 blk += sects_per_blk;
849 } 875 }
850 } 876 }
851 if (nr_sects) { 877 if (nr_sects >= minlen) {
852 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 878 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
853 if (rv) 879 if (rv)
854 goto fail; 880 goto fail;
881 trimmed += nr_sects;
855 } 882 }
856 return; 883 if (ptrimmed)
884 *ptrimmed = trimmed;
885 return 0;
886
857fail: 887fail:
858 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 888 if (sdp->sd_args.ar_discard)
889 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
859 sdp->sd_args.ar_discard = 0; 890 sdp->sd_args.ar_discard = 0;
891 return -EIO;
892}
893
894/**
895 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
896 * @filp: Any file on the filesystem
897 * @argp: Pointer to the arguments (also used to pass result)
898 *
899 * Returns: 0 on success, otherwise error code
900 */
901
902int gfs2_fitrim(struct file *filp, void __user *argp)
903{
904 struct inode *inode = filp->f_dentry->d_inode;
905 struct gfs2_sbd *sdp = GFS2_SB(inode);
906 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
907 struct buffer_head *bh;
908 struct gfs2_rgrpd *rgd;
909 struct gfs2_rgrpd *rgd_end;
910 struct gfs2_holder gh;
911 struct fstrim_range r;
912 int ret = 0;
913 u64 amt;
914 u64 trimmed = 0;
915 unsigned int x;
916
917 if (!capable(CAP_SYS_ADMIN))
918 return -EPERM;
919
920 if (!blk_queue_discard(q))
921 return -EOPNOTSUPP;
922
923 if (argp == NULL) {
924 r.start = 0;
925 r.len = ULLONG_MAX;
926 r.minlen = 0;
927 } else if (copy_from_user(&r, argp, sizeof(r)))
928 return -EFAULT;
929
930 rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
931 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
932
933 while (1) {
934
935 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
936 if (ret)
937 goto out;
938
939 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
940 /* Trim each bitmap in the rgrp */
941 for (x = 0; x < rgd->rd_length; x++) {
942 struct gfs2_bitmap *bi = rgd->rd_bits + x;
943 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
944 if (ret) {
945 gfs2_glock_dq_uninit(&gh);
946 goto out;
947 }
948 trimmed += amt;
949 }
950
951 /* Mark rgrp as having been trimmed */
952 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
953 if (ret == 0) {
954 bh = rgd->rd_bits[0].bi_bh;
955 rgd->rd_flags |= GFS2_RGF_TRIMMED;
956 gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
957 gfs2_rgrp_out(rgd, bh->b_data);
958 gfs2_trans_end(sdp);
959 }
960 }
961 gfs2_glock_dq_uninit(&gh);
962
963 if (rgd == rgd_end)
964 break;
965
966 rgd = gfs2_rgrpd_get_next(rgd);
967 }
968
969out:
970 r.len = trimmed << 9;
971 if (argp && copy_to_user(argp, &r, sizeof(r)))
972 return -EFAULT;
973
974 return ret;
860} 975}
861 976
862/** 977/**
@@ -1003,7 +1118,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1003 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1118 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
1004 rgd = begin = ip->i_rgd; 1119 rgd = begin = ip->i_rgd;
1005 else 1120 else
1006 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal); 1121 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1007 1122
1008 if (rgd == NULL) 1123 if (rgd == NULL)
1009 return -EBADSLT; 1124 return -EBADSLT;
@@ -1288,7 +1403,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1288 u32 length, rgrp_blk, buf_blk; 1403 u32 length, rgrp_blk, buf_blk;
1289 unsigned int buf; 1404 unsigned int buf;
1290 1405
1291 rgd = gfs2_blk2rgrpd(sdp, bstart); 1406 rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1292 if (!rgd) { 1407 if (!rgd) {
1293 if (gfs2_consist(sdp)) 1408 if (gfs2_consist(sdp))
1294 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1409 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
@@ -1469,7 +1584,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1469 return; 1584 return;
1470 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); 1585 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
1471 rgd->rd_free += blen; 1586 rgd->rd_free += blen;
1472 1587 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
1473 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1588 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1474 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1589 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1475 1590
@@ -1555,14 +1670,9 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1555{ 1670{
1556 struct gfs2_rgrpd *rgd; 1671 struct gfs2_rgrpd *rgd;
1557 struct gfs2_holder rgd_gh; 1672 struct gfs2_holder rgd_gh;
1558 int error; 1673 int error = -EINVAL;
1559
1560 error = gfs2_rindex_update(sdp);
1561 if (error)
1562 return error;
1563 1674
1564 error = -EINVAL; 1675 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
1565 rgd = gfs2_blk2rgrpd(sdp, no_addr);
1566 if (!rgd) 1676 if (!rgd)
1567 goto fail; 1677 goto fail;
1568 1678
@@ -1605,7 +1715,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
1605 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 1715 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
1606 rgd = ip->i_rgd; 1716 rgd = ip->i_rgd;
1607 else 1717 else
1608 rgd = gfs2_blk2rgrpd(sdp, block); 1718 rgd = gfs2_blk2rgrpd(sdp, block, 1);
1609 if (!rgd) { 1719 if (!rgd) {
1610 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 1720 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
1611 return; 1721 return;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ceec9106cdf4..b4b10f4de25f 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -11,6 +11,7 @@
11#define __RGRP_DOT_H__ 11#define __RGRP_DOT_H__
12 12
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/uaccess.h>
14 15
15struct gfs2_rgrpd; 16struct gfs2_rgrpd;
16struct gfs2_sbd; 17struct gfs2_sbd;
@@ -18,7 +19,7 @@ struct gfs2_holder;
18 19
19extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); 20extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
20 21
21extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); 22extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
22extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); 23extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
23extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); 24extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
24 25
@@ -62,8 +63,9 @@ extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
62extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 63extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
63extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); 64extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
64extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); 65extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
65extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 66extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
66 struct buffer_head *bh, 67 struct buffer_head *bh,
67 const struct gfs2_bitmap *bi); 68 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
69extern int gfs2_fitrim(struct file *filp, void __user *argp);
68 70
69#endif /* __RGRP_DOT_H__ */ 71#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4553ce515f62..6172fa77ad59 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1417,7 +1417,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1417 if (error) 1417 if (error)
1418 goto out; 1418 goto out;
1419 1419
1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1421 if (!rgd) { 1421 if (!rgd) {
1422 gfs2_consist_inode(ip); 1422 gfs2_consist_inode(ip);
1423 error = -EIO; 1423 error = -EIO;
@@ -1557,6 +1557,7 @@ out:
1557 end_writeback(inode); 1557 end_writeback(inode);
1558 gfs2_dir_hash_inval(ip); 1558 gfs2_dir_hash_inval(ip);
1559 ip->i_gl->gl_object = NULL; 1559 ip->i_gl->gl_object = NULL;
1560 flush_delayed_work_sync(&ip->i_gl->gl_work);
1560 gfs2_glock_add_to_lru(ip->i_gl); 1561 gfs2_glock_add_to_lru(ip->i_gl);
1561 gfs2_glock_put(ip->i_gl); 1562 gfs2_glock_put(ip->i_gl);
1562 ip->i_gl = NULL; 1563 ip->i_gl = NULL;
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 5d07609ec57d..dfa89cd75534 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -11,6 +11,7 @@
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h> 13#include <linux/writeback.h>
14#include <linux/ktime.h>
14#include "incore.h" 15#include "incore.h"
15#include "glock.h" 16#include "glock.h"
16 17
@@ -43,7 +44,8 @@
43 {(1UL << GLF_FROZEN), "F" }, \ 44 {(1UL << GLF_FROZEN), "F" }, \
44 {(1UL << GLF_QUEUED), "q" }, \ 45 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \ 46 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" }) 47 {(1UL << GLF_OBJECT), "o" }, \
48 {(1UL << GLF_BLOCKING), "b" })
47 49
48#ifndef NUMPTY 50#ifndef NUMPTY
49#define NUMPTY 51#define NUMPTY
@@ -236,6 +238,62 @@ TRACE_EVENT(gfs2_glock_queue,
236 glock_trace_name(__entry->state)) 238 glock_trace_name(__entry->state))
237); 239);
238 240
241/* DLM sends a reply to GFS2 */
242TRACE_EVENT(gfs2_glock_lock_time,
243
244 TP_PROTO(const struct gfs2_glock *gl, s64 tdiff),
245
246 TP_ARGS(gl, tdiff),
247
248 TP_STRUCT__entry(
249 __field( dev_t, dev )
250 __field( u64, glnum )
251 __field( u32, gltype )
252 __field( int, status )
253 __field( char, flags )
254 __field( s64, tdiff )
255 __field( s64, srtt )
256 __field( s64, srttvar )
257 __field( s64, srttb )
258 __field( s64, srttvarb )
259 __field( s64, sirt )
260 __field( s64, sirtvar )
261 __field( s64, dcount )
262 __field( s64, qcount )
263 ),
264
265 TP_fast_assign(
266 __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
267 __entry->glnum = gl->gl_name.ln_number;
268 __entry->gltype = gl->gl_name.ln_type;
269 __entry->status = gl->gl_lksb.sb_status;
270 __entry->flags = gl->gl_lksb.sb_flags;
271 __entry->tdiff = tdiff;
272 __entry->srtt = gl->gl_stats.stats[GFS2_LKS_SRTT];
273 __entry->srttvar = gl->gl_stats.stats[GFS2_LKS_SRTTVAR];
274 __entry->srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
275 __entry->srttvarb = gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
276 __entry->sirt = gl->gl_stats.stats[GFS2_LKS_SIRT];
277 __entry->sirtvar = gl->gl_stats.stats[GFS2_LKS_SIRTVAR];
278 __entry->dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
279 __entry->qcount = gl->gl_stats.stats[GFS2_LKS_QCOUNT];
280 ),
281
282 TP_printk("%u,%u glock %d:%lld status:%d flags:%02x tdiff:%lld srtt:%lld/%lld srttb:%lld/%lld sirt:%lld/%lld dcnt:%lld qcnt:%lld",
283 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
284 (unsigned long long)__entry->glnum,
285 __entry->status, __entry->flags,
286 (long long)__entry->tdiff,
287 (long long)__entry->srtt,
288 (long long)__entry->srttvar,
289 (long long)__entry->srttb,
290 (long long)__entry->srttvarb,
291 (long long)__entry->sirt,
292 (long long)__entry->sirtvar,
293 (long long)__entry->dcount,
294 (long long)__entry->qcount)
295);
296
239/* Section 2 - Log/journal 297/* Section 2 - Log/journal
240 * 298 *
241 * Objectives: 299 * Objectives:
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 53511291fe36..9e7765e8e7b0 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,6 +25,7 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
25struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 25struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; 26struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
27struct kmem_cache *gfs2_quotad_cachep __read_mostly; 27struct kmem_cache *gfs2_quotad_cachep __read_mostly;
28mempool_t *gfs2_bh_pool __read_mostly;
28 29
29void gfs2_assert_i(struct gfs2_sbd *sdp) 30void gfs2_assert_i(struct gfs2_sbd *sdp)
30{ 31{
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index b432e04600de..a4ce76c67dbb 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -10,6 +10,8 @@
10#ifndef __UTIL_DOT_H__ 10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__ 11#define __UTIL_DOT_H__
12 12
13#include <linux/mempool.h>
14
13#include "incore.h" 15#include "incore.h"
14 16
15#define fs_printk(level, fs, fmt, arg...) \ 17#define fs_printk(level, fs, fmt, arg...) \
@@ -150,6 +152,7 @@ extern struct kmem_cache *gfs2_inode_cachep;
150extern struct kmem_cache *gfs2_bufdata_cachep; 152extern struct kmem_cache *gfs2_bufdata_cachep;
151extern struct kmem_cache *gfs2_rgrpd_cachep; 153extern struct kmem_cache *gfs2_rgrpd_cachep;
152extern struct kmem_cache *gfs2_quotad_cachep; 154extern struct kmem_cache *gfs2_quotad_cachep;
155extern mempool_t *gfs2_bh_pool;
153 156
154static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, 157static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
155 unsigned int *p) 158 unsigned int *p)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index e9636591b5d5..2e5ba425cae7 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -251,7 +251,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
251 if (!blks) 251 if (!blks)
252 return 0; 252 return 0;
253 253
254 rgd = gfs2_blk2rgrpd(sdp, bn); 254 rgd = gfs2_blk2rgrpd(sdp, bn, 1);
255 if (!rgd) { 255 if (!rgd) {
256 gfs2_consist_inode(ip); 256 gfs2_consist_inode(ip);
257 return -EIO; 257 return -EIO;
@@ -1439,7 +1439,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1439 struct gfs2_holder gh; 1439 struct gfs2_holder gh;
1440 int error; 1440 int error;
1441 1441
1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); 1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1);
1443 if (!rgd) { 1443 if (!rgd) {
1444 gfs2_consist_inode(ip); 1444 gfs2_consist_inode(ip);
1445 return -EIO; 1445 return -EIO;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8137fb3e6780..7b4c537d6e13 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -430,15 +430,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
430 430
431 sb->s_d_op = &hfs_dentry_operations; 431 sb->s_d_op = &hfs_dentry_operations;
432 res = -ENOMEM; 432 res = -ENOMEM;
433 sb->s_root = d_alloc_root(root_inode); 433 sb->s_root = d_make_root(root_inode);
434 if (!sb->s_root) 434 if (!sb->s_root)
435 goto bail_iput; 435 goto bail_no_root;
436 436
437 /* everything's okay */ 437 /* everything's okay */
438 return 0; 438 return 0;
439 439
440bail_iput:
441 iput(root_inode);
442bail_no_root: 440bail_no_root:
443 printk(KERN_ERR "hfs: get root inode failed.\n"); 441 printk(KERN_ERR "hfs: get root inode failed.\n");
444bail: 442bail:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 21a5b7fc6db4..4e75ac646fea 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -317,6 +317,11 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
317 317
318 318
319/* 319/*
320 * hfs+-specific ioctl for making the filesystem bootable
321 */
322#define HFSPLUS_IOC_BLESS _IO('h', 0x80)
323
324/*
320 * Functions in any *.c used in other files 325 * Functions in any *.c used in other files
321 */ 326 */
322 327
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 927cdd6d5bf5..921967e5abb1 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -117,7 +117,7 @@ struct hfsplus_vh {
117 __be32 write_count; 117 __be32 write_count;
118 __be64 encodings_bmp; 118 __be64 encodings_bmp;
119 119
120 u8 finder_info[32]; 120 u32 finder_info[8];
121 121
122 struct hfsplus_fork_raw alloc_file; 122 struct hfsplus_fork_raw alloc_file;
123 struct hfsplus_fork_raw ext_file; 123 struct hfsplus_fork_raw ext_file;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6643b242bdd7..82b69ee4dacc 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -193,6 +193,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir,
193 mutex_init(&hip->extents_lock); 193 mutex_init(&hip->extents_lock);
194 hip->extent_state = 0; 194 hip->extent_state = 0;
195 hip->flags = 0; 195 hip->flags = 0;
196 hip->userflags = 0;
196 set_bit(HFSPLUS_I_RSRC, &hip->flags); 197 set_bit(HFSPLUS_I_RSRC, &hip->flags);
197 198
198 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 199 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
@@ -400,6 +401,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
400 atomic_set(&hip->opencnt, 0); 401 atomic_set(&hip->opencnt, 0);
401 hip->extent_state = 0; 402 hip->extent_state = 0;
402 hip->flags = 0; 403 hip->flags = 0;
404 hip->userflags = 0;
403 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); 405 memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
404 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); 406 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
405 hip->alloc_blocks = 0; 407 hip->alloc_blocks = 0;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index f66c7655b3f7..c640ba57074b 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -20,6 +20,38 @@
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include "hfsplus_fs.h" 21#include "hfsplus_fs.h"
22 22
23/*
24 * "Blessing" an HFS+ filesystem writes metadata to the superblock informing
25 * the platform firmware which file to boot from
26 */
27static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
28{
29 struct dentry *dentry = file->f_path.dentry;
30 struct inode *inode = dentry->d_inode;
31 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
32 struct hfsplus_vh *vh = sbi->s_vhdr;
33 struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
34
35 if (!capable(CAP_SYS_ADMIN))
36 return -EPERM;
37
38 mutex_lock(&sbi->vh_mutex);
39
40 /* Directory containing the bootable system */
41 vh->finder_info[0] = bvh->finder_info[0] =
42 cpu_to_be32(parent_ino(dentry));
43
44 /* Bootloader */
45 vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino);
46
47 /* Per spec, the OS X system folder - same as finder_info[0] here */
48 vh->finder_info[5] = bvh->finder_info[5] =
49 cpu_to_be32(parent_ino(dentry));
50
51 mutex_unlock(&sbi->vh_mutex);
52 return 0;
53}
54
23static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) 55static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
24{ 56{
25 struct inode *inode = file->f_path.dentry->d_inode; 57 struct inode *inode = file->f_path.dentry->d_inode;
@@ -108,6 +140,8 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
108 return hfsplus_ioctl_getflags(file, argp); 140 return hfsplus_ioctl_getflags(file, argp);
109 case HFSPLUS_IOC_EXT2_SETFLAGS: 141 case HFSPLUS_IOC_EXT2_SETFLAGS:
110 return hfsplus_ioctl_setflags(file, argp); 142 return hfsplus_ioctl_setflags(file, argp);
143 case HFSPLUS_IOC_BLESS:
144 return hfsplus_ioctl_bless(file, argp);
111 default: 145 default:
112 return -ENOTTY; 146 return -ENOTTY;
113 } 147 }
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 427682ca9e48..ceb1c281eefb 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -465,6 +465,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
465 goto out_put_alloc_file; 465 goto out_put_alloc_file;
466 } 466 }
467 467
468 sb->s_d_op = &hfsplus_dentry_operations;
469 sb->s_root = d_make_root(root);
470 if (!sb->s_root) {
471 err = -ENOMEM;
472 goto out_put_alloc_file;
473 }
474
468 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 475 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
469 str.name = HFSP_HIDDENDIR_NAME; 476 str.name = HFSP_HIDDENDIR_NAME;
470 err = hfs_find_init(sbi->cat_tree, &fd); 477 err = hfs_find_init(sbi->cat_tree, &fd);
@@ -515,13 +522,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
515 } 522 }
516 } 523 }
517 524
518 sb->s_d_op = &hfsplus_dentry_operations;
519 sb->s_root = d_alloc_root(root);
520 if (!sb->s_root) {
521 err = -ENOMEM;
522 goto out_put_hidden_dir;
523 }
524
525 unload_nls(sbi->nls); 525 unload_nls(sbi->nls);
526 sbi->nls = nls; 526 sbi->nls = nls;
527 return 0; 527 return 0;
@@ -529,7 +529,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
529out_put_hidden_dir: 529out_put_hidden_dir:
530 iput(sbi->hidden_dir); 530 iput(sbi->hidden_dir);
531out_put_root: 531out_put_root:
532 iput(root); 532 dput(sb->s_root);
533 sb->s_root = NULL;
533out_put_alloc_file: 534out_put_alloc_file:
534 iput(sbi->alloc_file); 535 iput(sbi->alloc_file);
535out_close_cat_tree: 536out_close_cat_tree:
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index e130bd46d671..588d45885a6f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -966,9 +966,9 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
966 } 966 }
967 967
968 err = -ENOMEM; 968 err = -ENOMEM;
969 sb->s_root = d_alloc_root(root_inode); 969 sb->s_root = d_make_root(root_inode);
970 if (sb->s_root == NULL) 970 if (sb->s_root == NULL)
971 goto out_put; 971 goto out;
972 972
973 return 0; 973 return 0;
974 974
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 3690467c944e..54f6eccb79d9 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -625,11 +625,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
625 hpfs_init_inode(root); 625 hpfs_init_inode(root);
626 hpfs_read_inode(root); 626 hpfs_read_inode(root);
627 unlock_new_inode(root); 627 unlock_new_inode(root);
628 s->s_root = d_alloc_root(root); 628 s->s_root = d_make_root(root);
629 if (!s->s_root) { 629 if (!s->s_root)
630 iput(root);
631 goto bail0; 630 goto bail0;
632 }
633 631
634 /* 632 /*
635 * find the root directory's . pointer & finish filling in the inode 633 * find the root directory's . pointer & finish filling in the inode
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index d92f4ce80925..a80e45a690ac 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -726,17 +726,12 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
726 726
727 err = -ENOMEM; 727 err = -ENOMEM;
728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root)); 728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root));
729 if (!root_inode) 729 sb->s_root = d_make_root(root_inode);
730 goto out_mntput;
731
732 sb->s_root = d_alloc_root(root_inode);
733 if (!sb->s_root) 730 if (!sb->s_root)
734 goto out_iput; 731 goto out_mntput;
735 732
736 return 0; 733 return 0;
737 734
738 out_iput:
739 iput(root_inode);
740 out_mntput: 735 out_mntput:
741 mntput(proc_mnt); 736 mntput(proc_mnt);
742 out: 737 out:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1e85a7ac0217..ea251749d9d5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -41,6 +41,25 @@ const struct file_operations hugetlbfs_file_operations;
41static const struct inode_operations hugetlbfs_dir_inode_operations; 41static const struct inode_operations hugetlbfs_dir_inode_operations;
42static const struct inode_operations hugetlbfs_inode_operations; 42static const struct inode_operations hugetlbfs_inode_operations;
43 43
44struct hugetlbfs_config {
45 uid_t uid;
46 gid_t gid;
47 umode_t mode;
48 long nr_blocks;
49 long nr_inodes;
50 struct hstate *hstate;
51};
52
53struct hugetlbfs_inode_info {
54 struct shared_policy policy;
55 struct inode vfs_inode;
56};
57
58static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
59{
60 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
61}
62
44static struct backing_dev_info hugetlbfs_backing_dev_info = { 63static struct backing_dev_info hugetlbfs_backing_dev_info = {
45 .name = "hugetlbfs", 64 .name = "hugetlbfs",
46 .ra_pages = 0, /* No readahead */ 65 .ra_pages = 0, /* No readahead */
@@ -154,10 +173,12 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
154 return addr; 173 return addr;
155 } 174 }
156 175
157 start_addr = mm->free_area_cache; 176 if (len > mm->cached_hole_size)
158 177 start_addr = mm->free_area_cache;
159 if (len <= mm->cached_hole_size) 178 else {
160 start_addr = TASK_UNMAPPED_BASE; 179 start_addr = TASK_UNMAPPED_BASE;
180 mm->cached_hole_size = 0;
181 }
161 182
162full_search: 183full_search:
163 addr = ALIGN(start_addr, huge_page_size(h)); 184 addr = ALIGN(start_addr, huge_page_size(h));
@@ -171,13 +192,18 @@ full_search:
171 */ 192 */
172 if (start_addr != TASK_UNMAPPED_BASE) { 193 if (start_addr != TASK_UNMAPPED_BASE) {
173 start_addr = TASK_UNMAPPED_BASE; 194 start_addr = TASK_UNMAPPED_BASE;
195 mm->cached_hole_size = 0;
174 goto full_search; 196 goto full_search;
175 } 197 }
176 return -ENOMEM; 198 return -ENOMEM;
177 } 199 }
178 200
179 if (!vma || addr + len <= vma->vm_start) 201 if (!vma || addr + len <= vma->vm_start) {
202 mm->free_area_cache = addr + len;
180 return addr; 203 return addr;
204 }
205 if (addr + mm->cached_hole_size < vma->vm_start)
206 mm->cached_hole_size = vma->vm_start - addr;
181 addr = ALIGN(vma->vm_end, huge_page_size(h)); 207 addr = ALIGN(vma->vm_end, huge_page_size(h));
182 } 208 }
183} 209}
@@ -238,17 +264,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
238 loff_t isize; 264 loff_t isize;
239 ssize_t retval = 0; 265 ssize_t retval = 0;
240 266
241 mutex_lock(&inode->i_mutex);
242
243 /* validate length */ 267 /* validate length */
244 if (len == 0) 268 if (len == 0)
245 goto out; 269 goto out;
246 270
247 isize = i_size_read(inode);
248 if (!isize)
249 goto out;
250
251 end_index = (isize - 1) >> huge_page_shift(h);
252 for (;;) { 271 for (;;) {
253 struct page *page; 272 struct page *page;
254 unsigned long nr, ret; 273 unsigned long nr, ret;
@@ -256,18 +275,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
256 275
257 /* nr is the maximum number of bytes to copy from this page */ 276 /* nr is the maximum number of bytes to copy from this page */
258 nr = huge_page_size(h); 277 nr = huge_page_size(h);
278 isize = i_size_read(inode);
279 if (!isize)
280 goto out;
281 end_index = (isize - 1) >> huge_page_shift(h);
259 if (index >= end_index) { 282 if (index >= end_index) {
260 if (index > end_index) 283 if (index > end_index)
261 goto out; 284 goto out;
262 nr = ((isize - 1) & ~huge_page_mask(h)) + 1; 285 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
263 if (nr <= offset) { 286 if (nr <= offset)
264 goto out; 287 goto out;
265 }
266 } 288 }
267 nr = nr - offset; 289 nr = nr - offset;
268 290
269 /* Find the page */ 291 /* Find the page */
270 page = find_get_page(mapping, index); 292 page = find_lock_page(mapping, index);
271 if (unlikely(page == NULL)) { 293 if (unlikely(page == NULL)) {
272 /* 294 /*
273 * We have a HOLE, zero out the user-buffer for the 295 * We have a HOLE, zero out the user-buffer for the
@@ -279,17 +301,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
279 else 301 else
280 ra = 0; 302 ra = 0;
281 } else { 303 } else {
304 unlock_page(page);
305
282 /* 306 /*
283 * We have the page, copy it to user space buffer. 307 * We have the page, copy it to user space buffer.
284 */ 308 */
285 ra = hugetlbfs_read_actor(page, offset, buf, len, nr); 309 ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
286 ret = ra; 310 ret = ra;
311 page_cache_release(page);
287 } 312 }
288 if (ra < 0) { 313 if (ra < 0) {
289 if (retval == 0) 314 if (retval == 0)
290 retval = ra; 315 retval = ra;
291 if (page)
292 page_cache_release(page);
293 goto out; 316 goto out;
294 } 317 }
295 318
@@ -299,16 +322,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
299 index += offset >> huge_page_shift(h); 322 index += offset >> huge_page_shift(h);
300 offset &= ~huge_page_mask(h); 323 offset &= ~huge_page_mask(h);
301 324
302 if (page)
303 page_cache_release(page);
304
305 /* short read or no more work */ 325 /* short read or no more work */
306 if ((ret != nr) || (len == 0)) 326 if ((ret != nr) || (len == 0))
307 break; 327 break;
308 } 328 }
309out: 329out:
310 *ppos = ((loff_t)index << huge_page_shift(h)) + offset; 330 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
311 mutex_unlock(&inode->i_mutex);
312 return retval; 331 return retval;
313} 332}
314 333
@@ -607,9 +626,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
607 spin_lock(&sbinfo->stat_lock); 626 spin_lock(&sbinfo->stat_lock);
608 /* If no limits set, just report 0 for max/free/used 627 /* If no limits set, just report 0 for max/free/used
609 * blocks, like simple_statfs() */ 628 * blocks, like simple_statfs() */
610 if (sbinfo->max_blocks >= 0) { 629 if (sbinfo->spool) {
611 buf->f_blocks = sbinfo->max_blocks; 630 long free_pages;
612 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 631
632 spin_lock(&sbinfo->spool->lock);
633 buf->f_blocks = sbinfo->spool->max_hpages;
634 free_pages = sbinfo->spool->max_hpages
635 - sbinfo->spool->used_hpages;
636 buf->f_bavail = buf->f_bfree = free_pages;
637 spin_unlock(&sbinfo->spool->lock);
613 buf->f_files = sbinfo->max_inodes; 638 buf->f_files = sbinfo->max_inodes;
614 buf->f_ffree = sbinfo->free_inodes; 639 buf->f_ffree = sbinfo->free_inodes;
615 } 640 }
@@ -625,6 +650,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
625 650
626 if (sbi) { 651 if (sbi) {
627 sb->s_fs_info = NULL; 652 sb->s_fs_info = NULL;
653
654 if (sbi->spool)
655 hugepage_put_subpool(sbi->spool);
656
628 kfree(sbi); 657 kfree(sbi);
629 } 658 }
630} 659}
@@ -831,8 +860,6 @@ bad_val:
831static int 860static int
832hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 861hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
833{ 862{
834 struct inode * inode;
835 struct dentry * root;
836 int ret; 863 int ret;
837 struct hugetlbfs_config config; 864 struct hugetlbfs_config config;
838 struct hugetlbfs_sb_info *sbinfo; 865 struct hugetlbfs_sb_info *sbinfo;
@@ -855,60 +882,31 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
855 sb->s_fs_info = sbinfo; 882 sb->s_fs_info = sbinfo;
856 sbinfo->hstate = config.hstate; 883 sbinfo->hstate = config.hstate;
857 spin_lock_init(&sbinfo->stat_lock); 884 spin_lock_init(&sbinfo->stat_lock);
858 sbinfo->max_blocks = config.nr_blocks;
859 sbinfo->free_blocks = config.nr_blocks;
860 sbinfo->max_inodes = config.nr_inodes; 885 sbinfo->max_inodes = config.nr_inodes;
861 sbinfo->free_inodes = config.nr_inodes; 886 sbinfo->free_inodes = config.nr_inodes;
887 sbinfo->spool = NULL;
888 if (config.nr_blocks != -1) {
889 sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
890 if (!sbinfo->spool)
891 goto out_free;
892 }
862 sb->s_maxbytes = MAX_LFS_FILESIZE; 893 sb->s_maxbytes = MAX_LFS_FILESIZE;
863 sb->s_blocksize = huge_page_size(config.hstate); 894 sb->s_blocksize = huge_page_size(config.hstate);
864 sb->s_blocksize_bits = huge_page_shift(config.hstate); 895 sb->s_blocksize_bits = huge_page_shift(config.hstate);
865 sb->s_magic = HUGETLBFS_MAGIC; 896 sb->s_magic = HUGETLBFS_MAGIC;
866 sb->s_op = &hugetlbfs_ops; 897 sb->s_op = &hugetlbfs_ops;
867 sb->s_time_gran = 1; 898 sb->s_time_gran = 1;
868 inode = hugetlbfs_get_root(sb, &config); 899 sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
869 if (!inode) 900 if (!sb->s_root)
870 goto out_free;
871
872 root = d_alloc_root(inode);
873 if (!root) {
874 iput(inode);
875 goto out_free; 901 goto out_free;
876 }
877 sb->s_root = root;
878 return 0; 902 return 0;
879out_free: 903out_free:
904 if (sbinfo->spool)
905 kfree(sbinfo->spool);
880 kfree(sbinfo); 906 kfree(sbinfo);
881 return -ENOMEM; 907 return -ENOMEM;
882} 908}
883 909
884int hugetlb_get_quota(struct address_space *mapping, long delta)
885{
886 int ret = 0;
887 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
888
889 if (sbinfo->free_blocks > -1) {
890 spin_lock(&sbinfo->stat_lock);
891 if (sbinfo->free_blocks - delta >= 0)
892 sbinfo->free_blocks -= delta;
893 else
894 ret = -ENOMEM;
895 spin_unlock(&sbinfo->stat_lock);
896 }
897
898 return ret;
899}
900
901void hugetlb_put_quota(struct address_space *mapping, long delta)
902{
903 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
904
905 if (sbinfo->free_blocks > -1) {
906 spin_lock(&sbinfo->stat_lock);
907 sbinfo->free_blocks += delta;
908 spin_unlock(&sbinfo->stat_lock);
909 }
910}
911
912static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, 910static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
913 int flags, const char *dev_name, void *data) 911 int flags, const char *dev_name, void *data)
914{ 912{
@@ -928,8 +926,8 @@ static int can_do_hugetlb_shm(void)
928 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 926 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
929} 927}
930 928
931struct file *hugetlb_file_setup(const char *name, size_t size, 929struct file *hugetlb_file_setup(const char *name, unsigned long addr,
932 vm_flags_t acctflag, 930 size_t size, vm_flags_t acctflag,
933 struct user_struct **user, int creat_flags) 931 struct user_struct **user, int creat_flags)
934{ 932{
935 int error = -ENOMEM; 933 int error = -ENOMEM;
@@ -938,6 +936,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
938 struct path path; 936 struct path path;
939 struct dentry *root; 937 struct dentry *root;
940 struct qstr quick_string; 938 struct qstr quick_string;
939 struct hstate *hstate;
940 unsigned long num_pages;
941 941
942 *user = NULL; 942 *user = NULL;
943 if (!hugetlbfs_vfsmount) 943 if (!hugetlbfs_vfsmount)
@@ -946,7 +946,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
946 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { 946 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
947 *user = current_user(); 947 *user = current_user();
948 if (user_shm_lock(size, *user)) { 948 if (user_shm_lock(size, *user)) {
949 printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n"); 949 task_lock(current);
950 printk_once(KERN_WARNING
951 "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
952 current->comm, current->pid);
953 task_unlock(current);
950 } else { 954 } else {
951 *user = NULL; 955 *user = NULL;
952 return ERR_PTR(-EPERM); 956 return ERR_PTR(-EPERM);
@@ -967,10 +971,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
967 if (!inode) 971 if (!inode)
968 goto out_dentry; 972 goto out_dentry;
969 973
974 hstate = hstate_inode(inode);
975 size += addr & ~huge_page_mask(hstate);
976 num_pages = ALIGN(size, huge_page_size(hstate)) >>
977 huge_page_shift(hstate);
970 error = -ENOMEM; 978 error = -ENOMEM;
971 if (hugetlb_reserve_pages(inode, 0, 979 if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag))
972 size >> huge_page_shift(hstate_inode(inode)), NULL,
973 acctflag))
974 goto out_inode; 980 goto out_inode;
975 981
976 d_instantiate(path.dentry, inode); 982 d_instantiate(path.dentry, inode);
@@ -1006,6 +1012,7 @@ static int __init init_hugetlbfs_fs(void)
1006 if (error) 1012 if (error)
1007 return error; 1013 return error;
1008 1014
1015 error = -ENOMEM;
1009 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 1016 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
1010 sizeof(struct hugetlbfs_inode_info), 1017 sizeof(struct hugetlbfs_inode_info),
1011 0, 0, init_once); 1018 0, 0, init_once);
@@ -1024,10 +1031,10 @@ static int __init init_hugetlbfs_fs(void)
1024 } 1031 }
1025 1032
1026 error = PTR_ERR(vfsmount); 1033 error = PTR_ERR(vfsmount);
1034 unregister_filesystem(&hugetlbfs_fs_type);
1027 1035
1028 out: 1036 out:
1029 if (error) 1037 kmem_cache_destroy(hugetlbfs_inode_cachep);
1030 kmem_cache_destroy(hugetlbfs_inode_cachep);
1031 out2: 1038 out2:
1032 bdi_destroy(&hugetlbfs_backing_dev_info); 1039 bdi_destroy(&hugetlbfs_backing_dev_info);
1033 return error; 1040 return error;
diff --git a/fs/inode.c b/fs/inode.c
index fb10d86ffad7..9f4f5fecc096 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2,29 +2,19 @@
2 * (C) 1997 Linus Torvalds 2 * (C) 1997 Linus Torvalds
3 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) 3 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
4 */ 4 */
5#include <linux/export.h>
5#include <linux/fs.h> 6#include <linux/fs.h>
6#include <linux/mm.h> 7#include <linux/mm.h>
7#include <linux/dcache.h>
8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/writeback.h>
11#include <linux/module.h>
12#include <linux/backing-dev.h> 8#include <linux/backing-dev.h>
13#include <linux/wait.h>
14#include <linux/rwsem.h>
15#include <linux/hash.h> 9#include <linux/hash.h>
16#include <linux/swap.h> 10#include <linux/swap.h>
17#include <linux/security.h> 11#include <linux/security.h>
18#include <linux/pagemap.h>
19#include <linux/cdev.h> 12#include <linux/cdev.h>
20#include <linux/bootmem.h> 13#include <linux/bootmem.h>
21#include <linux/fsnotify.h> 14#include <linux/fsnotify.h>
22#include <linux/mount.h> 15#include <linux/mount.h>
23#include <linux/async.h>
24#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
25#include <linux/prefetch.h> 17#include <linux/prefetch.h>
26#include <linux/ima.h>
27#include <linux/cred.h>
28#include <linux/buffer_head.h> /* for inode_has_buffers */ 18#include <linux/buffer_head.h> /* for inode_has_buffers */
29#include <linux/ratelimit.h> 19#include <linux/ratelimit.h>
30#include "internal.h" 20#include "internal.h"
@@ -938,8 +928,7 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode)
938 struct file_system_type *type = inode->i_sb->s_type; 928 struct file_system_type *type = inode->i_sb->s_type;
939 929
940 /* Set new key only if filesystem hasn't already changed it */ 930 /* Set new key only if filesystem hasn't already changed it */
941 if (!lockdep_match_class(&inode->i_mutex, 931 if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) {
942 &type->i_mutex_key)) {
943 /* 932 /*
944 * ensure nobody is actually holding i_mutex 933 * ensure nobody is actually holding i_mutex
945 */ 934 */
@@ -966,6 +955,7 @@ void unlock_new_inode(struct inode *inode)
966 spin_lock(&inode->i_lock); 955 spin_lock(&inode->i_lock);
967 WARN_ON(!(inode->i_state & I_NEW)); 956 WARN_ON(!(inode->i_state & I_NEW));
968 inode->i_state &= ~I_NEW; 957 inode->i_state &= ~I_NEW;
958 smp_mb();
969 wake_up_bit(&inode->i_state, __I_NEW); 959 wake_up_bit(&inode->i_state, __I_NEW);
970 spin_unlock(&inode->i_lock); 960 spin_unlock(&inode->i_lock);
971} 961}
@@ -1369,17 +1359,6 @@ int generic_delete_inode(struct inode *inode)
1369EXPORT_SYMBOL(generic_delete_inode); 1359EXPORT_SYMBOL(generic_delete_inode);
1370 1360
1371/* 1361/*
1372 * Normal UNIX filesystem behaviour: delete the
1373 * inode when the usage count drops to zero, and
1374 * i_nlink is zero.
1375 */
1376int generic_drop_inode(struct inode *inode)
1377{
1378 return !inode->i_nlink || inode_unhashed(inode);
1379}
1380EXPORT_SYMBOL_GPL(generic_drop_inode);
1381
1382/*
1383 * Called when we're dropping the last reference 1362 * Called when we're dropping the last reference
1384 * to an inode. 1363 * to an inode.
1385 * 1364 *
@@ -1510,9 +1489,10 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1510 * This function automatically handles read only file systems and media, 1489 * This function automatically handles read only file systems and media,
1511 * as well as the "noatime" flag and inode specific "noatime" markers. 1490 * as well as the "noatime" flag and inode specific "noatime" markers.
1512 */ 1491 */
1513void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1492void touch_atime(struct path *path)
1514{ 1493{
1515 struct inode *inode = dentry->d_inode; 1494 struct vfsmount *mnt = path->mnt;
1495 struct inode *inode = path->dentry->d_inode;
1516 struct timespec now; 1496 struct timespec now;
1517 1497
1518 if (inode->i_flags & S_NOATIME) 1498 if (inode->i_flags & S_NOATIME)
@@ -1651,7 +1631,7 @@ __setup("ihash_entries=", set_ihash_entries);
1651 */ 1631 */
1652void __init inode_init_early(void) 1632void __init inode_init_early(void)
1653{ 1633{
1654 int loop; 1634 unsigned int loop;
1655 1635
1656 /* If hashes are distributed across NUMA nodes, defer 1636 /* If hashes are distributed across NUMA nodes, defer
1657 * hash allocation until vmalloc space is available. 1637 * hash allocation until vmalloc space is available.
@@ -1669,13 +1649,13 @@ void __init inode_init_early(void)
1669 &i_hash_mask, 1649 &i_hash_mask,
1670 0); 1650 0);
1671 1651
1672 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1652 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1673 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1653 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1674} 1654}
1675 1655
1676void __init inode_init(void) 1656void __init inode_init(void)
1677{ 1657{
1678 int loop; 1658 unsigned int loop;
1679 1659
1680 /* inode slab cache */ 1660 /* inode slab cache */
1681 inode_cachep = kmem_cache_create("inode_cache", 1661 inode_cachep = kmem_cache_create("inode_cache",
@@ -1699,7 +1679,7 @@ void __init inode_init(void)
1699 &i_hash_mask, 1679 &i_hash_mask,
1700 0); 1680 0);
1701 1681
1702 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1682 for (loop = 0; loop < (1U << i_hash_shift); loop++)
1703 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1683 INIT_HLIST_HEAD(&inode_hashtable[loop]);
1704} 1684}
1705 1685
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bd62c76fb5df..29037c365ba4 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -947,9 +947,8 @@ root_found:
947 s->s_d_op = &isofs_dentry_ops[table]; 947 s->s_d_op = &isofs_dentry_ops[table];
948 948
949 /* get the root dentry */ 949 /* get the root dentry */
950 s->s_root = d_alloc_root(inode); 950 s->s_root = d_make_root(inode);
951 if (!(s->s_root)) { 951 if (!(s->s_root)) {
952 iput(inode);
953 error = -ENOMEM; 952 error = -ENOMEM;
954 goto out_no_inode; 953 goto out_no_inode;
955 } 954 }
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 59c09f9541b5..0971e9217808 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -129,6 +129,8 @@ static int kjournald(void *arg)
129 setup_timer(&journal->j_commit_timer, commit_timeout, 129 setup_timer(&journal->j_commit_timer, commit_timeout,
130 (unsigned long)current); 130 (unsigned long)current);
131 131
132 set_freezable();
133
132 /* Record that the journal thread is running */ 134 /* Record that the journal thread is running */
133 journal->j_task = current; 135 journal->j_task = current;
134 wake_up(&journal->j_wait_done_commit); 136 wake_up(&journal->j_wait_done_commit);
@@ -328,7 +330,7 @@ repeat:
328 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 330 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
329 } 331 }
330 332
331 mapped_data = kmap_atomic(new_page, KM_USER0); 333 mapped_data = kmap_atomic(new_page);
332 /* 334 /*
333 * Check for escaping 335 * Check for escaping
334 */ 336 */
@@ -337,7 +339,7 @@ repeat:
337 need_copy_out = 1; 339 need_copy_out = 1;
338 do_escape = 1; 340 do_escape = 1;
339 } 341 }
340 kunmap_atomic(mapped_data, KM_USER0); 342 kunmap_atomic(mapped_data);
341 343
342 /* 344 /*
343 * Do we need to do a data copy? 345 * Do we need to do a data copy?
@@ -354,9 +356,9 @@ repeat:
354 } 356 }
355 357
356 jh_in->b_frozen_data = tmp; 358 jh_in->b_frozen_data = tmp;
357 mapped_data = kmap_atomic(new_page, KM_USER0); 359 mapped_data = kmap_atomic(new_page);
358 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 360 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
359 kunmap_atomic(mapped_data, KM_USER0); 361 kunmap_atomic(mapped_data);
360 362
361 new_page = virt_to_page(tmp); 363 new_page = virt_to_page(tmp);
362 new_offset = offset_in_page(tmp); 364 new_offset = offset_in_page(tmp);
@@ -368,9 +370,9 @@ repeat:
368 * copying, we can finally do so. 370 * copying, we can finally do so.
369 */ 371 */
370 if (do_escape) { 372 if (do_escape) {
371 mapped_data = kmap_atomic(new_page, KM_USER0); 373 mapped_data = kmap_atomic(new_page);
372 *((unsigned int *)(mapped_data + new_offset)) = 0; 374 *((unsigned int *)(mapped_data + new_offset)) = 0;
373 kunmap_atomic(mapped_data, KM_USER0); 375 kunmap_atomic(mapped_data);
374 } 376 }
375 377
376 set_bh_page(new_bh, new_page, new_offset); 378 set_bh_page(new_bh, new_page, new_offset);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 7fce94b04bc3..b2a7e5244e39 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -718,9 +718,9 @@ done:
718 "Possible IO failure.\n"); 718 "Possible IO failure.\n");
719 page = jh2bh(jh)->b_page; 719 page = jh2bh(jh)->b_page;
720 offset = offset_in_page(jh2bh(jh)->b_data); 720 offset = offset_in_page(jh2bh(jh)->b_data);
721 source = kmap_atomic(page, KM_USER0); 721 source = kmap_atomic(page);
722 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 722 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
723 kunmap_atomic(source, KM_USER0); 723 kunmap_atomic(source);
724 } 724 }
725 jbd_unlock_bh_state(bh); 725 jbd_unlock_bh_state(bh);
726 726
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5069b8475150..c067a8cae63b 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -286,10 +286,10 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
286 char *addr; 286 char *addr;
287 __u32 checksum; 287 __u32 checksum;
288 288
289 addr = kmap_atomic(page, KM_USER0); 289 addr = kmap_atomic(page);
290 checksum = crc32_be(crc32_sum, 290 checksum = crc32_be(crc32_sum,
291 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); 291 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
292 kunmap_atomic(addr, KM_USER0); 292 kunmap_atomic(addr);
293 293
294 return checksum; 294 return checksum;
295} 295}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c0a5f9f1b127..839377e3d624 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -139,6 +139,8 @@ static int kjournald2(void *arg)
139 setup_timer(&journal->j_commit_timer, commit_timeout, 139 setup_timer(&journal->j_commit_timer, commit_timeout,
140 (unsigned long)current); 140 (unsigned long)current);
141 141
142 set_freezable();
143
142 /* Record that the journal thread is running */ 144 /* Record that the journal thread is running */
143 journal->j_task = current; 145 journal->j_task = current;
144 wake_up(&journal->j_wait_done_commit); 146 wake_up(&journal->j_wait_done_commit);
@@ -345,7 +347,7 @@ repeat:
345 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 347 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
346 } 348 }
347 349
348 mapped_data = kmap_atomic(new_page, KM_USER0); 350 mapped_data = kmap_atomic(new_page);
349 /* 351 /*
350 * Fire data frozen trigger if data already wasn't frozen. Do this 352 * Fire data frozen trigger if data already wasn't frozen. Do this
351 * before checking for escaping, as the trigger may modify the magic 353 * before checking for escaping, as the trigger may modify the magic
@@ -364,7 +366,7 @@ repeat:
364 need_copy_out = 1; 366 need_copy_out = 1;
365 do_escape = 1; 367 do_escape = 1;
366 } 368 }
367 kunmap_atomic(mapped_data, KM_USER0); 369 kunmap_atomic(mapped_data);
368 370
369 /* 371 /*
370 * Do we need to do a data copy? 372 * Do we need to do a data copy?
@@ -385,9 +387,9 @@ repeat:
385 } 387 }
386 388
387 jh_in->b_frozen_data = tmp; 389 jh_in->b_frozen_data = tmp;
388 mapped_data = kmap_atomic(new_page, KM_USER0); 390 mapped_data = kmap_atomic(new_page);
389 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 391 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
390 kunmap_atomic(mapped_data, KM_USER0); 392 kunmap_atomic(mapped_data);
391 393
392 new_page = virt_to_page(tmp); 394 new_page = virt_to_page(tmp);
393 new_offset = offset_in_page(tmp); 395 new_offset = offset_in_page(tmp);
@@ -406,9 +408,9 @@ repeat:
406 * copying, we can finally do so. 408 * copying, we can finally do so.
407 */ 409 */
408 if (do_escape) { 410 if (do_escape) {
409 mapped_data = kmap_atomic(new_page, KM_USER0); 411 mapped_data = kmap_atomic(new_page);
410 *((unsigned int *)(mapped_data + new_offset)) = 0; 412 *((unsigned int *)(mapped_data + new_offset)) = 0;
411 kunmap_atomic(mapped_data, KM_USER0); 413 kunmap_atomic(mapped_data);
412 } 414 }
413 415
414 set_bh_page(new_bh, new_page, new_offset); 416 set_bh_page(new_bh, new_page, new_offset);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 35ae096bed5d..e5aba56e1fd5 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -783,12 +783,12 @@ done:
783 "Possible IO failure.\n"); 783 "Possible IO failure.\n");
784 page = jh2bh(jh)->b_page; 784 page = jh2bh(jh)->b_page;
785 offset = offset_in_page(jh2bh(jh)->b_data); 785 offset = offset_in_page(jh2bh(jh)->b_data);
786 source = kmap_atomic(page, KM_USER0); 786 source = kmap_atomic(page);
787 /* Fire data frozen trigger just before we copy the data */ 787 /* Fire data frozen trigger just before we copy the data */
788 jbd2_buffer_frozen_trigger(jh, source + offset, 788 jbd2_buffer_frozen_trigger(jh, source + offset,
789 jh->b_triggers); 789 jh->b_triggers);
790 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 790 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
791 kunmap_atomic(source, KM_USER0); 791 kunmap_atomic(source);
792 792
793 /* 793 /*
794 * Now that the frozen data is saved off, we need to store 794 * Now that the frozen data is saved off, we need to store
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 5b6c9d1a2fb9..96ed3c9ec3fc 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -340,7 +340,7 @@ int jffs2_unregister_compressor(struct jffs2_compressor *comp)
340 340
341 if (comp->usecount) { 341 if (comp->usecount) {
342 spin_unlock(&jffs2_compressor_list_lock); 342 spin_unlock(&jffs2_compressor_list_lock);
343 printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n"); 343 printk(KERN_WARNING "JFFS2: Compressor module is in use. Unregister failed.\n");
344 return -1; 344 return -1;
345 } 345 }
346 list_del(&comp->list); 346 list_del(&comp->list);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2e0123867cb1..c0d5c9d770da 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -561,9 +561,9 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
561 ret = -ENOMEM; 561 ret = -ENOMEM;
562 562
563 D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n")); 563 D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n"));
564 sb->s_root = d_alloc_root(root_i); 564 sb->s_root = d_make_root(root_i);
565 if (!sb->s_root) 565 if (!sb->s_root)
566 goto out_root_i; 566 goto out_root;
567 567
568 sb->s_maxbytes = 0xFFFFFFFF; 568 sb->s_maxbytes = 0xFFFFFFFF;
569 sb->s_blocksize = PAGE_CACHE_SIZE; 569 sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -573,8 +573,6 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
573 jffs2_start_garbage_collect_thread(c); 573 jffs2_start_garbage_collect_thread(c);
574 return 0; 574 return 0;
575 575
576 out_root_i:
577 iput(root_i);
578out_root: 576out_root:
579 jffs2_free_ino_caches(c); 577 jffs2_free_ino_caches(c);
580 jffs2_free_raw_node_refs(c); 578 jffs2_free_raw_node_refs(c);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 5f7c160ea64f..07c91ca6017d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -220,12 +220,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
220 220
221 dquot_initialize(dip); 221 dquot_initialize(dip);
222 222
223 /* link count overflow on parent directory ? */
224 if (dip->i_nlink == JFS_LINK_MAX) {
225 rc = -EMLINK;
226 goto out1;
227 }
228
229 /* 223 /*
230 * search parent directory for entry/freespace 224 * search parent directory for entry/freespace
231 * (dtSearch() returns parent directory page pinned) 225 * (dtSearch() returns parent directory page pinned)
@@ -806,9 +800,6 @@ static int jfs_link(struct dentry *old_dentry,
806 jfs_info("jfs_link: %s %s", old_dentry->d_name.name, 800 jfs_info("jfs_link: %s %s", old_dentry->d_name.name,
807 dentry->d_name.name); 801 dentry->d_name.name);
808 802
809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK;
811
812 dquot_initialize(dir); 803 dquot_initialize(dir);
813 804
814 tid = txBegin(ip->i_sb, 0); 805 tid = txBegin(ip->i_sb, 0);
@@ -1138,10 +1129,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1138 rc = -ENOTEMPTY; 1129 rc = -ENOTEMPTY;
1139 goto out3; 1130 goto out3;
1140 } 1131 }
1141 } else if ((new_dir != old_dir) &&
1142 (new_dir->i_nlink == JFS_LINK_MAX)) {
1143 rc = -EMLINK;
1144 goto out3;
1145 } 1132 }
1146 } else if (new_ip) { 1133 } else if (new_ip) {
1147 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); 1134 IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 682bca642f38..4a82950f412f 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -441,6 +441,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
441 return -ENOMEM; 441 return -ENOMEM;
442 442
443 sb->s_fs_info = sbi; 443 sb->s_fs_info = sbi;
444 sb->s_max_links = JFS_LINK_MAX;
444 sbi->sb = sb; 445 sbi->sb = sb;
445 sbi->uid = sbi->gid = sbi->umask = -1; 446 sbi->uid = sbi->gid = sbi->umask = -1;
446 447
@@ -521,7 +522,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
521 ret = PTR_ERR(inode); 522 ret = PTR_ERR(inode);
522 goto out_no_rw; 523 goto out_no_rw;
523 } 524 }
524 sb->s_root = d_alloc_root(inode); 525 sb->s_root = d_make_root(inode);
525 if (!sb->s_root) 526 if (!sb->s_root)
526 goto out_no_root; 527 goto out_no_root;
527 528
@@ -539,7 +540,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
539 540
540out_no_root: 541out_no_root:
541 jfs_err("jfs_read_super: get root dentry failed"); 542 jfs_err("jfs_read_super: get root dentry failed");
542 iput(inode);
543 543
544out_no_rw: 544out_no_rw:
545 rc = jfs_umount(sb); 545 rc = jfs_umount(sb);
@@ -860,8 +860,14 @@ static int __init init_jfs_fs(void)
860 jfs_proc_init(); 860 jfs_proc_init();
861#endif 861#endif
862 862
863 return register_filesystem(&jfs_fs_type); 863 rc = register_filesystem(&jfs_fs_type);
864 if (!rc)
865 return 0;
864 866
867#ifdef PROC_FS_JFS
868 jfs_proc_clean();
869#endif
870 kthread_stop(jfsSyncThread);
865kill_committask: 871kill_committask:
866 for (i = 0; i < commit_threads; i++) 872 for (i = 0; i < commit_threads; i++)
867 kthread_stop(jfsCommitThread[i]); 873 kthread_stop(jfsCommitThread[i]);
diff --git a/fs/libfs.c b/fs/libfs.c
index 5b2dbb3ba4fc..722e0d5ba182 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -491,11 +491,9 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
491 inode->i_op = &simple_dir_inode_operations; 491 inode->i_op = &simple_dir_inode_operations;
492 inode->i_fop = &simple_dir_operations; 492 inode->i_fop = &simple_dir_operations;
493 set_nlink(inode, 2); 493 set_nlink(inode, 2);
494 root = d_alloc_root(inode); 494 root = d_make_root(inode);
495 if (!root) { 495 if (!root)
496 iput(inode);
497 return -ENOMEM; 496 return -ENOMEM;
498 }
499 for (i = 0; !files->name || files->name[0]; i++, files++) { 497 for (i = 0; !files->name || files->name[0]; i++, files++) {
500 if (!files->name) 498 if (!files->name)
501 continue; 499 continue;
@@ -536,7 +534,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
536 spin_lock(&pin_fs_lock); 534 spin_lock(&pin_fs_lock);
537 if (unlikely(!*mount)) { 535 if (unlikely(!*mount)) {
538 spin_unlock(&pin_fs_lock); 536 spin_unlock(&pin_fs_lock);
539 mnt = vfs_kern_mount(type, 0, type->name, NULL); 537 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL);
540 if (IS_ERR(mnt)) 538 if (IS_ERR(mnt))
541 return PTR_ERR(mnt); 539 return PTR_ERR(mnt);
542 spin_lock(&pin_fs_lock); 540 spin_lock(&pin_fs_lock);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 3de7a32cadbe..bea5d1b9954b 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -177,17 +177,17 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
177 (filler_t *)logfs_readpage, NULL); 177 (filler_t *)logfs_readpage, NULL);
178 if (IS_ERR(page)) 178 if (IS_ERR(page))
179 return page; 179 return page;
180 dd = kmap_atomic(page, KM_USER0); 180 dd = kmap_atomic(page);
181 BUG_ON(dd->namelen == 0); 181 BUG_ON(dd->namelen == 0);
182 182
183 if (name->len != be16_to_cpu(dd->namelen) || 183 if (name->len != be16_to_cpu(dd->namelen) ||
184 memcmp(name->name, dd->name, name->len)) { 184 memcmp(name->name, dd->name, name->len)) {
185 kunmap_atomic(dd, KM_USER0); 185 kunmap_atomic(dd);
186 page_cache_release(page); 186 page_cache_release(page);
187 continue; 187 continue;
188 } 188 }
189 189
190 kunmap_atomic(dd, KM_USER0); 190 kunmap_atomic(dd);
191 return page; 191 return page;
192 } 192 }
193 return NULL; 193 return NULL;
@@ -365,9 +365,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
365 return NULL; 365 return NULL;
366 } 366 }
367 index = page->index; 367 index = page->index;
368 dd = kmap_atomic(page, KM_USER0); 368 dd = kmap_atomic(page);
369 ino = be64_to_cpu(dd->ino); 369 ino = be64_to_cpu(dd->ino);
370 kunmap_atomic(dd, KM_USER0); 370 kunmap_atomic(dd);
371 page_cache_release(page); 371 page_cache_release(page);
372 372
373 inode = logfs_iget(dir->i_sb, ino); 373 inode = logfs_iget(dir->i_sb, ino);
@@ -402,12 +402,12 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
402 if (!page) 402 if (!page)
403 return -ENOMEM; 403 return -ENOMEM;
404 404
405 dd = kmap_atomic(page, KM_USER0); 405 dd = kmap_atomic(page);
406 memset(dd, 0, sizeof(*dd)); 406 memset(dd, 0, sizeof(*dd));
407 dd->ino = cpu_to_be64(inode->i_ino); 407 dd->ino = cpu_to_be64(inode->i_ino);
408 dd->type = logfs_type(inode); 408 dd->type = logfs_type(inode);
409 logfs_set_name(dd, &dentry->d_name); 409 logfs_set_name(dd, &dentry->d_name);
410 kunmap_atomic(dd, KM_USER0); 410 kunmap_atomic(dd);
411 411
412 err = logfs_write_buf(dir, page, WF_LOCK); 412 err = logfs_write_buf(dir, page, WF_LOCK);
413 unlock_page(page); 413 unlock_page(page);
@@ -558,9 +558,6 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
558{ 558{
559 struct inode *inode = old_dentry->d_inode; 559 struct inode *inode = old_dentry->d_inode;
560 560
561 if (inode->i_nlink >= LOGFS_LINK_MAX)
562 return -EMLINK;
563
564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 561 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
565 ihold(inode); 562 ihold(inode);
566 inc_nlink(inode); 563 inc_nlink(inode);
@@ -579,9 +576,9 @@ static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
579 if (IS_ERR(page)) 576 if (IS_ERR(page))
580 return PTR_ERR(page); 577 return PTR_ERR(page);
581 *pos = page->index; 578 *pos = page->index;
582 map = kmap_atomic(page, KM_USER0); 579 map = kmap_atomic(page);
583 memcpy(dd, map, sizeof(*dd)); 580 memcpy(dd, map, sizeof(*dd));
584 kunmap_atomic(map, KM_USER0); 581 kunmap_atomic(map);
585 page_cache_release(page); 582 page_cache_release(page);
586 return 0; 583 return 0;
587} 584}
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 4153e65b0148..e3ab5e5a904c 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -517,9 +517,9 @@ static int indirect_write_alias(struct super_block *sb,
517 517
518 ino = page->mapping->host->i_ino; 518 ino = page->mapping->host->i_ino;
519 logfs_unpack_index(page->index, &bix, &level); 519 logfs_unpack_index(page->index, &bix, &level);
520 child = kmap_atomic(page, KM_USER0); 520 child = kmap_atomic(page);
521 val = child[pos]; 521 val = child[pos];
522 kunmap_atomic(child, KM_USER0); 522 kunmap_atomic(child);
523 err = write_one_alias(sb, ino, bix, level, pos, val); 523 err = write_one_alias(sb, ino, bix, level, pos, val);
524 if (err) 524 if (err)
525 return err; 525 return err;
@@ -673,9 +673,9 @@ static void alloc_indirect_block(struct inode *inode, struct page *page,
673 alloc_data_block(inode, page); 673 alloc_data_block(inode, page);
674 674
675 block = logfs_block(page); 675 block = logfs_block(page);
676 array = kmap_atomic(page, KM_USER0); 676 array = kmap_atomic(page);
677 initialize_block_counters(page, block, array, page_is_empty); 677 initialize_block_counters(page, block, array, page_is_empty);
678 kunmap_atomic(array, KM_USER0); 678 kunmap_atomic(array);
679} 679}
680 680
681static void block_set_pointer(struct page *page, int index, u64 ptr) 681static void block_set_pointer(struct page *page, int index, u64 ptr)
@@ -685,10 +685,10 @@ static void block_set_pointer(struct page *page, int index, u64 ptr)
685 u64 oldptr; 685 u64 oldptr;
686 686
687 BUG_ON(!block); 687 BUG_ON(!block);
688 array = kmap_atomic(page, KM_USER0); 688 array = kmap_atomic(page);
689 oldptr = be64_to_cpu(array[index]); 689 oldptr = be64_to_cpu(array[index]);
690 array[index] = cpu_to_be64(ptr); 690 array[index] = cpu_to_be64(ptr);
691 kunmap_atomic(array, KM_USER0); 691 kunmap_atomic(array);
692 SetPageUptodate(page); 692 SetPageUptodate(page);
693 693
694 block->full += !!(ptr & LOGFS_FULLY_POPULATED) 694 block->full += !!(ptr & LOGFS_FULLY_POPULATED)
@@ -701,9 +701,9 @@ static u64 block_get_pointer(struct page *page, int index)
701 __be64 *block; 701 __be64 *block;
702 u64 ptr; 702 u64 ptr;
703 703
704 block = kmap_atomic(page, KM_USER0); 704 block = kmap_atomic(page);
705 ptr = be64_to_cpu(block[index]); 705 ptr = be64_to_cpu(block[index]);
706 kunmap_atomic(block, KM_USER0); 706 kunmap_atomic(block);
707 return ptr; 707 return ptr;
708} 708}
709 709
@@ -850,7 +850,7 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
850 } 850 }
851 851
852 slot = get_bits(bix, SUBLEVEL(level)); 852 slot = get_bits(bix, SUBLEVEL(level));
853 rblock = kmap_atomic(page, KM_USER0); 853 rblock = kmap_atomic(page);
854 while (slot < LOGFS_BLOCK_FACTOR) { 854 while (slot < LOGFS_BLOCK_FACTOR) {
855 if (data && (rblock[slot] != 0)) 855 if (data && (rblock[slot] != 0))
856 break; 856 break;
@@ -861,12 +861,12 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
861 bix &= ~(increment - 1); 861 bix &= ~(increment - 1);
862 } 862 }
863 if (slot >= LOGFS_BLOCK_FACTOR) { 863 if (slot >= LOGFS_BLOCK_FACTOR) {
864 kunmap_atomic(rblock, KM_USER0); 864 kunmap_atomic(rblock);
865 logfs_put_read_page(page); 865 logfs_put_read_page(page);
866 return bix; 866 return bix;
867 } 867 }
868 bofs = be64_to_cpu(rblock[slot]); 868 bofs = be64_to_cpu(rblock[slot]);
869 kunmap_atomic(rblock, KM_USER0); 869 kunmap_atomic(rblock);
870 logfs_put_read_page(page); 870 logfs_put_read_page(page);
871 if (!bofs) { 871 if (!bofs) {
872 BUG_ON(data); 872 BUG_ON(data);
@@ -1961,9 +1961,9 @@ int logfs_read_inode(struct inode *inode)
1961 if (IS_ERR(page)) 1961 if (IS_ERR(page))
1962 return PTR_ERR(page); 1962 return PTR_ERR(page);
1963 1963
1964 di = kmap_atomic(page, KM_USER0); 1964 di = kmap_atomic(page);
1965 logfs_disk_to_inode(di, inode); 1965 logfs_disk_to_inode(di, inode);
1966 kunmap_atomic(di, KM_USER0); 1966 kunmap_atomic(di);
1967 move_page_to_inode(inode, page); 1967 move_page_to_inode(inode, page);
1968 page_cache_release(page); 1968 page_cache_release(page);
1969 return 0; 1969 return 0;
@@ -1982,9 +1982,9 @@ static struct page *inode_to_page(struct inode *inode)
1982 if (!page) 1982 if (!page)
1983 return NULL; 1983 return NULL;
1984 1984
1985 di = kmap_atomic(page, KM_USER0); 1985 di = kmap_atomic(page);
1986 logfs_inode_to_disk(inode, di); 1986 logfs_inode_to_disk(inode, di);
1987 kunmap_atomic(di, KM_USER0); 1987 kunmap_atomic(di);
1988 move_inode_to_page(page, inode); 1988 move_inode_to_page(page, inode);
1989 return page; 1989 return page;
1990} 1990}
@@ -2041,13 +2041,13 @@ static void logfs_mod_segment_entry(struct super_block *sb, u32 segno,
2041 2041
2042 if (write) 2042 if (write)
2043 alloc_indirect_block(inode, page, 0); 2043 alloc_indirect_block(inode, page, 0);
2044 se = kmap_atomic(page, KM_USER0); 2044 se = kmap_atomic(page);
2045 change_se(se + child_no, arg); 2045 change_se(se + child_no, arg);
2046 if (write) { 2046 if (write) {
2047 logfs_set_alias(sb, logfs_block(page), child_no); 2047 logfs_set_alias(sb, logfs_block(page), child_no);
2048 BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize); 2048 BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize);
2049 } 2049 }
2050 kunmap_atomic(se, KM_USER0); 2050 kunmap_atomic(se);
2051 2051
2052 logfs_put_write_page(page); 2052 logfs_put_write_page(page);
2053} 2053}
@@ -2245,10 +2245,10 @@ int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
2245 if (!page) 2245 if (!page)
2246 return -ENOMEM; 2246 return -ENOMEM;
2247 2247
2248 pagebuf = kmap_atomic(page, KM_USER0); 2248 pagebuf = kmap_atomic(page);
2249 memcpy(pagebuf, buf, count); 2249 memcpy(pagebuf, buf, count);
2250 flush_dcache_page(page); 2250 flush_dcache_page(page);
2251 kunmap_atomic(pagebuf, KM_USER0); 2251 kunmap_atomic(pagebuf);
2252 2252
2253 if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE) 2253 if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
2254 i_size_write(inode, pos + LOGFS_BLOCKSIZE); 2254 i_size_write(inode, pos + LOGFS_BLOCKSIZE);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index ab798ed1cc88..e28d090c98d6 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -543,9 +543,9 @@ void move_page_to_btree(struct page *page)
543 BUG_ON(!item); /* mempool empty */ 543 BUG_ON(!item); /* mempool empty */
544 memset(item, 0, sizeof(*item)); 544 memset(item, 0, sizeof(*item));
545 545
546 child = kmap_atomic(page, KM_USER0); 546 child = kmap_atomic(page);
547 item->val = child[pos]; 547 item->val = child[pos];
548 kunmap_atomic(child, KM_USER0); 548 kunmap_atomic(child);
549 item->child_no = pos; 549 item->child_no = pos;
550 list_add(&item->list, &block->item_list); 550 list_add(&item->list, &block->item_list);
551 } 551 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c9ee7f5d1caf..97bca623d893 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -315,11 +315,9 @@ static int logfs_get_sb_final(struct super_block *sb)
315 if (IS_ERR(rootdir)) 315 if (IS_ERR(rootdir))
316 goto fail; 316 goto fail;
317 317
318 sb->s_root = d_alloc_root(rootdir); 318 sb->s_root = d_make_root(rootdir);
319 if (!sb->s_root) { 319 if (!sb->s_root)
320 iput(rootdir);
321 goto fail; 320 goto fail;
322 }
323 321
324 /* at that point we know that ->put_super() will be called */ 322 /* at that point we know that ->put_super() will be called */
325 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 323 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
@@ -542,6 +540,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
542 * the filesystem incompatible with 32bit systems. 540 * the filesystem incompatible with 32bit systems.
543 */ 541 */
544 sb->s_maxbytes = (1ull << 43) - 1; 542 sb->s_maxbytes = (1ull << 43) - 1;
543 sb->s_max_links = LOGFS_LINK_MAX;
545 sb->s_op = &logfs_super_operations; 544 sb->s_op = &logfs_super_operations;
546 sb->s_flags = flags | MS_NOATIME; 545 sb->s_flags = flags | MS_NOATIME;
547 546
@@ -627,7 +626,10 @@ static int __init logfs_init(void)
627 if (ret) 626 if (ret)
628 goto out2; 627 goto out2;
629 628
630 return register_filesystem(&logfs_fs_type); 629 ret = register_filesystem(&logfs_fs_type);
630 if (!ret)
631 return 0;
632 logfs_destroy_inode_cache();
631out2: 633out2:
632 logfs_compr_exit(); 634 logfs_compr_exit();
633out1: 635out1:
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 085a9262c692..685b2d981b87 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -335,7 +335,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
335 goto fail; 335 goto fail;
336 } 336 }
337 337
338 kaddr = kmap_atomic(page, KM_USER0); 338 kaddr = kmap_atomic(page);
339 memset(kaddr, 0, PAGE_CACHE_SIZE); 339 memset(kaddr, 0, PAGE_CACHE_SIZE);
340 340
341 if (sbi->s_version == MINIX_V3) { 341 if (sbi->s_version == MINIX_V3) {
@@ -355,7 +355,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
355 de->inode = dir->i_ino; 355 de->inode = dir->i_ino;
356 strcpy(de->name, ".."); 356 strcpy(de->name, "..");
357 } 357 }
358 kunmap_atomic(kaddr, KM_USER0); 358 kunmap_atomic(kaddr);
359 359
360 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize); 360 err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
361fail: 361fail:
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fa8b612b8ce2..fcb05d2c6b5f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -190,24 +190,24 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
190 sbi->s_version = MINIX_V1; 190 sbi->s_version = MINIX_V1;
191 sbi->s_dirsize = 16; 191 sbi->s_dirsize = 16;
192 sbi->s_namelen = 14; 192 sbi->s_namelen = 14;
193 sbi->s_link_max = MINIX_LINK_MAX; 193 s->s_max_links = MINIX_LINK_MAX;
194 } else if (s->s_magic == MINIX_SUPER_MAGIC2) { 194 } else if (s->s_magic == MINIX_SUPER_MAGIC2) {
195 sbi->s_version = MINIX_V1; 195 sbi->s_version = MINIX_V1;
196 sbi->s_dirsize = 32; 196 sbi->s_dirsize = 32;
197 sbi->s_namelen = 30; 197 sbi->s_namelen = 30;
198 sbi->s_link_max = MINIX_LINK_MAX; 198 s->s_max_links = MINIX_LINK_MAX;
199 } else if (s->s_magic == MINIX2_SUPER_MAGIC) { 199 } else if (s->s_magic == MINIX2_SUPER_MAGIC) {
200 sbi->s_version = MINIX_V2; 200 sbi->s_version = MINIX_V2;
201 sbi->s_nzones = ms->s_zones; 201 sbi->s_nzones = ms->s_zones;
202 sbi->s_dirsize = 16; 202 sbi->s_dirsize = 16;
203 sbi->s_namelen = 14; 203 sbi->s_namelen = 14;
204 sbi->s_link_max = MINIX2_LINK_MAX; 204 s->s_max_links = MINIX2_LINK_MAX;
205 } else if (s->s_magic == MINIX2_SUPER_MAGIC2) { 205 } else if (s->s_magic == MINIX2_SUPER_MAGIC2) {
206 sbi->s_version = MINIX_V2; 206 sbi->s_version = MINIX_V2;
207 sbi->s_nzones = ms->s_zones; 207 sbi->s_nzones = ms->s_zones;
208 sbi->s_dirsize = 32; 208 sbi->s_dirsize = 32;
209 sbi->s_namelen = 30; 209 sbi->s_namelen = 30;
210 sbi->s_link_max = MINIX2_LINK_MAX; 210 s->s_max_links = MINIX2_LINK_MAX;
211 } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) { 211 } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) {
212 m3s = (struct minix3_super_block *) bh->b_data; 212 m3s = (struct minix3_super_block *) bh->b_data;
213 s->s_magic = m3s->s_magic; 213 s->s_magic = m3s->s_magic;
@@ -221,9 +221,9 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
221 sbi->s_dirsize = 64; 221 sbi->s_dirsize = 64;
222 sbi->s_namelen = 60; 222 sbi->s_namelen = 60;
223 sbi->s_version = MINIX_V3; 223 sbi->s_version = MINIX_V3;
224 sbi->s_link_max = MINIX2_LINK_MAX;
225 sbi->s_mount_state = MINIX_VALID_FS; 224 sbi->s_mount_state = MINIX_VALID_FS;
226 sb_set_blocksize(s, m3s->s_blocksize); 225 sb_set_blocksize(s, m3s->s_blocksize);
226 s->s_max_links = MINIX2_LINK_MAX;
227 } else 227 } else
228 goto out_no_fs; 228 goto out_no_fs;
229 229
@@ -254,14 +254,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
254 minix_set_bit(0,sbi->s_imap[0]->b_data); 254 minix_set_bit(0,sbi->s_imap[0]->b_data);
255 minix_set_bit(0,sbi->s_zmap[0]->b_data); 255 minix_set_bit(0,sbi->s_zmap[0]->b_data);
256 256
257 /* set up enough so that it can read an inode */
258 s->s_op = &minix_sops;
259 root_inode = minix_iget(s, MINIX_ROOT_INO);
260 if (IS_ERR(root_inode)) {
261 ret = PTR_ERR(root_inode);
262 goto out_no_root;
263 }
264
265 /* Apparently minix can create filesystems that allocate more blocks for 257 /* Apparently minix can create filesystems that allocate more blocks for
266 * the bitmaps than needed. We simply ignore that, but verify it didn't 258 * the bitmaps than needed. We simply ignore that, but verify it didn't
267 * create one with not enough blocks and bail out if so. 259 * create one with not enough blocks and bail out if so.
@@ -270,7 +262,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
270 if (sbi->s_imap_blocks < block) { 262 if (sbi->s_imap_blocks < block) {
271 printk("MINIX-fs: file system does not have enough " 263 printk("MINIX-fs: file system does not have enough "
272 "imap blocks allocated. Refusing to mount\n"); 264 "imap blocks allocated. Refusing to mount\n");
273 goto out_iput; 265 goto out_no_bitmap;
274 } 266 }
275 267
276 block = minix_blocks_needed( 268 block = minix_blocks_needed(
@@ -279,13 +271,21 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
279 if (sbi->s_zmap_blocks < block) { 271 if (sbi->s_zmap_blocks < block) {
280 printk("MINIX-fs: file system does not have enough " 272 printk("MINIX-fs: file system does not have enough "
281 "zmap blocks allocated. Refusing to mount.\n"); 273 "zmap blocks allocated. Refusing to mount.\n");
282 goto out_iput; 274 goto out_no_bitmap;
275 }
276
277 /* set up enough so that it can read an inode */
278 s->s_op = &minix_sops;
279 root_inode = minix_iget(s, MINIX_ROOT_INO);
280 if (IS_ERR(root_inode)) {
281 ret = PTR_ERR(root_inode);
282 goto out_no_root;
283 } 283 }
284 284
285 ret = -ENOMEM; 285 ret = -ENOMEM;
286 s->s_root = d_alloc_root(root_inode); 286 s->s_root = d_make_root(root_inode);
287 if (!s->s_root) 287 if (!s->s_root)
288 goto out_iput; 288 goto out_no_root;
289 289
290 if (!(s->s_flags & MS_RDONLY)) { 290 if (!(s->s_flags & MS_RDONLY)) {
291 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 291 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
@@ -301,10 +301,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
301 301
302 return 0; 302 return 0;
303 303
304out_iput:
305 iput(root_inode);
306 goto out_freemap;
307
308out_no_root: 304out_no_root:
309 if (!silent) 305 if (!silent)
310 printk("MINIX-fs: get root inode failed\n"); 306 printk("MINIX-fs: get root inode failed\n");
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index c889ef0aa571..1ebd11854622 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -34,7 +34,6 @@ struct minix_sb_info {
34 unsigned long s_max_size; 34 unsigned long s_max_size;
35 int s_dirsize; 35 int s_dirsize;
36 int s_namelen; 36 int s_namelen;
37 int s_link_max;
38 struct buffer_head ** s_imap; 37 struct buffer_head ** s_imap;
39 struct buffer_head ** s_zmap; 38 struct buffer_head ** s_zmap;
40 struct buffer_head * s_sbh; 39 struct buffer_head * s_sbh;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 2f76e38c2065..2d0ee1786305 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -94,9 +94,6 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
94{ 94{
95 struct inode *inode = old_dentry->d_inode; 95 struct inode *inode = old_dentry->d_inode;
96 96
97 if (inode->i_nlink >= minix_sb(inode->i_sb)->s_link_max)
98 return -EMLINK;
99
100 inode->i_ctime = CURRENT_TIME_SEC; 97 inode->i_ctime = CURRENT_TIME_SEC;
101 inode_inc_link_count(inode); 98 inode_inc_link_count(inode);
102 ihold(inode); 99 ihold(inode);
@@ -106,10 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
106static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) 103static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
107{ 104{
108 struct inode * inode; 105 struct inode * inode;
109 int err = -EMLINK; 106 int err;
110
111 if (dir->i_nlink >= minix_sb(dir->i_sb)->s_link_max)
112 goto out;
113 107
114 inode_inc_link_count(dir); 108 inode_inc_link_count(dir);
115 109
@@ -181,7 +175,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
181static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, 175static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
182 struct inode * new_dir, struct dentry *new_dentry) 176 struct inode * new_dir, struct dentry *new_dentry)
183{ 177{
184 struct minix_sb_info * info = minix_sb(old_dir->i_sb);
185 struct inode * old_inode = old_dentry->d_inode; 178 struct inode * old_inode = old_dentry->d_inode;
186 struct inode * new_inode = new_dentry->d_inode; 179 struct inode * new_inode = new_dentry->d_inode;
187 struct page * dir_page = NULL; 180 struct page * dir_page = NULL;
@@ -219,11 +212,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
219 drop_nlink(new_inode); 212 drop_nlink(new_inode);
220 inode_dec_link_count(new_inode); 213 inode_dec_link_count(new_inode);
221 } else { 214 } else {
222 if (dir_de) {
223 err = -EMLINK;
224 if (new_dir->i_nlink >= info->s_link_max)
225 goto out_dir;
226 }
227 err = minix_add_link(new_dentry, old_inode); 215 err = minix_add_link(new_dentry, old_inode);
228 if (err) 216 if (err)
229 goto out_dir; 217 goto out_dir;
diff --git a/fs/namei.c b/fs/namei.c
index 208c6aa4a989..a94a7f9a03ea 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -642,7 +642,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
642 cond_resched(); 642 cond_resched();
643 current->total_link_count++; 643 current->total_link_count++;
644 644
645 touch_atime(link->mnt, dentry); 645 touch_atime(link);
646 nd_set_link(nd, NULL); 646 nd_set_link(nd, NULL);
647 647
648 error = security_inode_follow_link(link->dentry, nd); 648 error = security_inode_follow_link(link->dentry, nd);
@@ -1095,8 +1095,10 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr
1095 struct dentry *old; 1095 struct dentry *old;
1096 1096
1097 /* Don't create child dentry for a dead directory. */ 1097 /* Don't create child dentry for a dead directory. */
1098 if (unlikely(IS_DEADDIR(inode))) 1098 if (unlikely(IS_DEADDIR(inode))) {
1099 dput(dentry);
1099 return ERR_PTR(-ENOENT); 1100 return ERR_PTR(-ENOENT);
1101 }
1100 1102
1101 old = inode->i_op->lookup(inode, dentry, nd); 1103 old = inode->i_op->lookup(inode, dentry, nd);
1102 if (unlikely(old)) { 1104 if (unlikely(old)) {
@@ -1373,6 +1375,162 @@ static inline int can_lookup(struct inode *inode)
1373} 1375}
1374 1376
1375/* 1377/*
1378 * We can do the critical dentry name comparison and hashing
1379 * operations one word at a time, but we are limited to:
1380 *
1381 * - Architectures with fast unaligned word accesses. We could
1382 * do a "get_unaligned()" if this helps and is sufficiently
1383 * fast.
1384 *
1385 * - Little-endian machines (so that we can generate the mask
1386 * of low bytes efficiently). Again, we *could* do a byte
1387 * swapping load on big-endian architectures if that is not
1388 * expensive enough to make the optimization worthless.
1389 *
1390 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
1391 * do not trap on the (extremely unlikely) case of a page
1392 * crossing operation.
1393 *
1394 * - Furthermore, we need an efficient 64-bit compile for the
1395 * 64-bit case in order to generate the "number of bytes in
1396 * the final mask". Again, that could be replaced with a
1397 * efficient population count instruction or similar.
1398 */
1399#ifdef CONFIG_DCACHE_WORD_ACCESS
1400
1401#ifdef CONFIG_64BIT
1402
1403/*
1404 * Jan Achrenius on G+: microoptimized version of
1405 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
1406 * that works for the bytemasks without having to
1407 * mask them first.
1408 */
1409static inline long count_masked_bytes(unsigned long mask)
1410{
1411 return mask*0x0001020304050608 >> 56;
1412}
1413
1414static inline unsigned int fold_hash(unsigned long hash)
1415{
1416 hash += hash >> (8*sizeof(int));
1417 return hash;
1418}
1419
1420#else /* 32-bit case */
1421
1422/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
1423static inline long count_masked_bytes(long mask)
1424{
1425 /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
1426 long a = (0x0ff0001+mask) >> 23;
1427 /* Fix the 1 for 00 case */
1428 return a & mask;
1429}
1430
1431#define fold_hash(x) (x)
1432
1433#endif
1434
1435unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1436{
1437 unsigned long a, mask;
1438 unsigned long hash = 0;
1439
1440 for (;;) {
1441 a = *(unsigned long *)name;
1442 hash *= 9;
1443 if (len < sizeof(unsigned long))
1444 break;
1445 hash += a;
1446 name += sizeof(unsigned long);
1447 len -= sizeof(unsigned long);
1448 if (!len)
1449 goto done;
1450 }
1451 mask = ~(~0ul << len*8);
1452 hash += mask & a;
1453done:
1454 return fold_hash(hash);
1455}
1456EXPORT_SYMBOL(full_name_hash);
1457
1458#ifdef CONFIG_64BIT
1459#define ONEBYTES 0x0101010101010101ul
1460#define SLASHBYTES 0x2f2f2f2f2f2f2f2ful
1461#define HIGHBITS 0x8080808080808080ul
1462#else
1463#define ONEBYTES 0x01010101ul
1464#define SLASHBYTES 0x2f2f2f2ful
1465#define HIGHBITS 0x80808080ul
1466#endif
1467
1468/* Return the high bit set in the first byte that is a zero */
1469static inline unsigned long has_zero(unsigned long a)
1470{
1471 return ((a - ONEBYTES) & ~a) & HIGHBITS;
1472}
1473
1474/*
1475 * Calculate the length and hash of the path component, and
1476 * return the length of the component;
1477 */
1478static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1479{
1480 unsigned long a, mask, hash, len;
1481
1482 hash = a = 0;
1483 len = -sizeof(unsigned long);
1484 do {
1485 hash = (hash + a) * 9;
1486 len += sizeof(unsigned long);
1487 a = *(unsigned long *)(name+len);
1488 /* Do we have any NUL or '/' bytes in this word? */
1489 mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
1490 } while (!mask);
1491
1492 /* The mask *below* the first high bit set */
1493 mask = (mask - 1) & ~mask;
1494 mask >>= 7;
1495 hash += a & mask;
1496 *hashp = fold_hash(hash);
1497
1498 return len + count_masked_bytes(mask);
1499}
1500
1501#else
1502
1503unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1504{
1505 unsigned long hash = init_name_hash();
1506 while (len--)
1507 hash = partial_name_hash(*name++, hash);
1508 return end_name_hash(hash);
1509}
1510EXPORT_SYMBOL(full_name_hash);
1511
1512/*
1513 * We know there's a real path component here of at least
1514 * one character.
1515 */
1516static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1517{
1518 unsigned long hash = init_name_hash();
1519 unsigned long len = 0, c;
1520
1521 c = (unsigned char)*name;
1522 do {
1523 len++;
1524 hash = partial_name_hash(c, hash);
1525 c = (unsigned char)name[len];
1526 } while (c && c != '/');
1527 *hashp = end_name_hash(hash);
1528 return len;
1529}
1530
1531#endif
1532
1533/*
1376 * Name resolution. 1534 * Name resolution.
1377 * This is the basic name resolution function, turning a pathname into 1535 * This is the basic name resolution function, turning a pathname into
1378 * the final dentry. We expect 'base' to be positive and a directory. 1536 * the final dentry. We expect 'base' to be positive and a directory.
@@ -1392,31 +1550,22 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1392 1550
1393 /* At this point we know we have a real path component. */ 1551 /* At this point we know we have a real path component. */
1394 for(;;) { 1552 for(;;) {
1395 unsigned long hash;
1396 struct qstr this; 1553 struct qstr this;
1397 unsigned int c; 1554 long len;
1398 int type; 1555 int type;
1399 1556
1400 err = may_lookup(nd); 1557 err = may_lookup(nd);
1401 if (err) 1558 if (err)
1402 break; 1559 break;
1403 1560
1561 len = hash_name(name, &this.hash);
1404 this.name = name; 1562 this.name = name;
1405 c = *(const unsigned char *)name; 1563 this.len = len;
1406
1407 hash = init_name_hash();
1408 do {
1409 name++;
1410 hash = partial_name_hash(c, hash);
1411 c = *(const unsigned char *)name;
1412 } while (c && (c != '/'));
1413 this.len = name - (const char *) this.name;
1414 this.hash = end_name_hash(hash);
1415 1564
1416 type = LAST_NORM; 1565 type = LAST_NORM;
1417 if (this.name[0] == '.') switch (this.len) { 1566 if (name[0] == '.') switch (len) {
1418 case 2: 1567 case 2:
1419 if (this.name[1] == '.') { 1568 if (name[1] == '.') {
1420 type = LAST_DOTDOT; 1569 type = LAST_DOTDOT;
1421 nd->flags |= LOOKUP_JUMPED; 1570 nd->flags |= LOOKUP_JUMPED;
1422 } 1571 }
@@ -1435,12 +1584,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1435 } 1584 }
1436 } 1585 }
1437 1586
1438 /* remove trailing slashes? */ 1587 if (!name[len])
1439 if (!c)
1440 goto last_component; 1588 goto last_component;
1441 while (*++name == '/'); 1589 /*
1442 if (!*name) 1590 * If it wasn't NUL, we know it was '/'. Skip that
1591 * slash, and continue until no more slashes.
1592 */
1593 do {
1594 len++;
1595 } while (unlikely(name[len] == '/'));
1596 if (!name[len])
1443 goto last_component; 1597 goto last_component;
1598 name += len;
1444 1599
1445 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); 1600 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1446 if (err < 0) 1601 if (err < 0)
@@ -1773,24 +1928,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1773struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1928struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1774{ 1929{
1775 struct qstr this; 1930 struct qstr this;
1776 unsigned long hash;
1777 unsigned int c; 1931 unsigned int c;
1778 1932
1779 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1933 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1780 1934
1781 this.name = name; 1935 this.name = name;
1782 this.len = len; 1936 this.len = len;
1937 this.hash = full_name_hash(name, len);
1783 if (!len) 1938 if (!len)
1784 return ERR_PTR(-EACCES); 1939 return ERR_PTR(-EACCES);
1785 1940
1786 hash = init_name_hash();
1787 while (len--) { 1941 while (len--) {
1788 c = *(const unsigned char *)name++; 1942 c = *(const unsigned char *)name++;
1789 if (c == '/' || c == '\0') 1943 if (c == '/' || c == '\0')
1790 return ERR_PTR(-EACCES); 1944 return ERR_PTR(-EACCES);
1791 hash = partial_name_hash(c, hash);
1792 } 1945 }
1793 this.hash = end_name_hash(hash);
1794 /* 1946 /*
1795 * See if the low-level filesystem might want 1947 * See if the low-level filesystem might want
1796 * to use its own hash.. 1948 * to use its own hash..
@@ -2138,7 +2290,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2138 /* sayonara */ 2290 /* sayonara */
2139 error = complete_walk(nd); 2291 error = complete_walk(nd);
2140 if (error) 2292 if (error)
2141 return ERR_PTR(-ECHILD); 2293 return ERR_PTR(error);
2142 2294
2143 error = -ENOTDIR; 2295 error = -ENOTDIR;
2144 if (nd->flags & LOOKUP_DIRECTORY) { 2296 if (nd->flags & LOOKUP_DIRECTORY) {
@@ -2237,7 +2389,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2237 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ 2389 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
2238 error = complete_walk(nd); 2390 error = complete_walk(nd);
2239 if (error) 2391 if (error)
2240 goto exit; 2392 return ERR_PTR(error);
2241 error = -EISDIR; 2393 error = -EISDIR;
2242 if (S_ISDIR(nd->inode->i_mode)) 2394 if (S_ISDIR(nd->inode->i_mode))
2243 goto exit; 2395 goto exit;
@@ -2545,6 +2697,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
2545int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 2697int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2546{ 2698{
2547 int error = may_create(dir, dentry); 2699 int error = may_create(dir, dentry);
2700 unsigned max_links = dir->i_sb->s_max_links;
2548 2701
2549 if (error) 2702 if (error)
2550 return error; 2703 return error;
@@ -2557,6 +2710,9 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2557 if (error) 2710 if (error)
2558 return error; 2711 return error;
2559 2712
2713 if (max_links && dir->i_nlink >= max_links)
2714 return -EMLINK;
2715
2560 error = dir->i_op->mkdir(dir, dentry, mode); 2716 error = dir->i_op->mkdir(dir, dentry, mode);
2561 if (!error) 2717 if (!error)
2562 fsnotify_mkdir(dir, dentry); 2718 fsnotify_mkdir(dir, dentry);
@@ -2887,6 +3043,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
2887int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 3043int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
2888{ 3044{
2889 struct inode *inode = old_dentry->d_inode; 3045 struct inode *inode = old_dentry->d_inode;
3046 unsigned max_links = dir->i_sb->s_max_links;
2890 int error; 3047 int error;
2891 3048
2892 if (!inode) 3049 if (!inode)
@@ -2917,6 +3074,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2917 /* Make sure we don't allow creating hardlink to an unlinked file */ 3074 /* Make sure we don't allow creating hardlink to an unlinked file */
2918 if (inode->i_nlink == 0) 3075 if (inode->i_nlink == 0)
2919 error = -ENOENT; 3076 error = -ENOENT;
3077 else if (max_links && inode->i_nlink >= max_links)
3078 error = -EMLINK;
2920 else 3079 else
2921 error = dir->i_op->link(old_dentry, dir, new_dentry); 3080 error = dir->i_op->link(old_dentry, dir, new_dentry);
2922 mutex_unlock(&inode->i_mutex); 3081 mutex_unlock(&inode->i_mutex);
@@ -3026,6 +3185,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3026{ 3185{
3027 int error = 0; 3186 int error = 0;
3028 struct inode *target = new_dentry->d_inode; 3187 struct inode *target = new_dentry->d_inode;
3188 unsigned max_links = new_dir->i_sb->s_max_links;
3029 3189
3030 /* 3190 /*
3031 * If we are going to change the parent - check write permissions, 3191 * If we are going to change the parent - check write permissions,
@@ -3049,6 +3209,11 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3049 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) 3209 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
3050 goto out; 3210 goto out;
3051 3211
3212 error = -EMLINK;
3213 if (max_links && !target && new_dir != old_dir &&
3214 new_dir->i_nlink >= max_links)
3215 goto out;
3216
3052 if (target) 3217 if (target)
3053 shrink_dcache_parent(new_dentry); 3218 shrink_dcache_parent(new_dentry);
3054 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 3219 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3347,9 +3512,9 @@ retry:
3347 if (err) 3512 if (err)
3348 goto fail; 3513 goto fail;
3349 3514
3350 kaddr = kmap_atomic(page, KM_USER0); 3515 kaddr = kmap_atomic(page);
3351 memcpy(kaddr, symname, len-1); 3516 memcpy(kaddr, symname, len-1);
3352 kunmap_atomic(kaddr, KM_USER0); 3517 kunmap_atomic(kaddr);
3353 3518
3354 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, 3519 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
3355 page, fsdata); 3520 page, fsdata);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 3d1e34f8a68e..49df0e7f8379 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -716,13 +716,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
716 if (!root_inode) 716 if (!root_inode)
717 goto out_disconnect; 717 goto out_disconnect;
718 DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber); 718 DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
719 sb->s_root = d_alloc_root(root_inode); 719 sb->s_root = d_make_root(root_inode);
720 if (!sb->s_root) 720 if (!sb->s_root)
721 goto out_no_root; 721 goto out_disconnect;
722 return 0; 722 return 0;
723 723
724out_no_root:
725 iput(root_inode);
726out_disconnect: 724out_disconnect:
727 ncp_lock_server(server); 725 ncp_lock_server(server);
728 ncp_disconnect(server); 726 ncp_disconnect(server);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 31778f74357d..d4f772ebd1ef 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -36,6 +36,7 @@
36#include <linux/inet.h> 36#include <linux/inet.h>
37#include <linux/in6.h> 37#include <linux/in6.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/idr.h>
39#include <net/ipv6.h> 40#include <net/ipv6.h>
40#include <linux/nfs_xdr.h> 41#include <linux/nfs_xdr.h>
41#include <linux/sunrpc/bc_xprt.h> 42#include <linux/sunrpc/bc_xprt.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fd9a872fada0..32aa6917265a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -260,10 +260,10 @@ void nfs_readdir_clear_array(struct page *page)
260 struct nfs_cache_array *array; 260 struct nfs_cache_array *array;
261 int i; 261 int i;
262 262
263 array = kmap_atomic(page, KM_USER0); 263 array = kmap_atomic(page);
264 for (i = 0; i < array->size; i++) 264 for (i = 0; i < array->size; i++)
265 kfree(array->array[i].string.name); 265 kfree(array->array[i].string.name);
266 kunmap_atomic(array, KM_USER0); 266 kunmap_atomic(array);
267} 267}
268 268
269/* 269/*
@@ -1870,11 +1870,11 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1870 if (!page) 1870 if (!page)
1871 return -ENOMEM; 1871 return -ENOMEM;
1872 1872
1873 kaddr = kmap_atomic(page, KM_USER0); 1873 kaddr = kmap_atomic(page);
1874 memcpy(kaddr, symname, pathlen); 1874 memcpy(kaddr, symname, pathlen);
1875 if (pathlen < PAGE_SIZE) 1875 if (pathlen < PAGE_SIZE)
1876 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); 1876 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
1877 kunmap_atomic(kaddr, KM_USER0); 1877 kunmap_atomic(kaddr);
1878 1878
1879 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); 1879 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1880 if (error != 0) { 1880 if (error != 0) {
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index dcb61548887f..801d6d830787 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -49,11 +49,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
49{ 49{
50 /* The mntroot acts as the dummy root dentry for this superblock */ 50 /* The mntroot acts as the dummy root dentry for this superblock */
51 if (sb->s_root == NULL) { 51 if (sb->s_root == NULL) {
52 sb->s_root = d_alloc_root(inode); 52 sb->s_root = d_make_root(inode);
53 if (sb->s_root == NULL) { 53 if (sb->s_root == NULL)
54 iput(inode);
55 return -ENOMEM; 54 return -ENOMEM;
56 }
57 ihold(inode); 55 ihold(inode);
58 /* 56 /*
59 * Ensure that this dentry is invisible to d_find_alias(). 57 * Ensure that this dentry is invisible to d_find_alias().
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 2c05f1991e1e..a1bbf7780dfc 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -198,6 +198,7 @@ int nfs_idmap_init(void)
198 if (ret < 0) 198 if (ret < 0)
199 goto failed_put_key; 199 goto failed_put_key;
200 200
201 set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
201 cred->thread_keyring = keyring; 202 cred->thread_keyring = keyring;
202 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; 203 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
203 id_resolver_cache = cred; 204 id_resolver_cache = cred;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0c849c98fe4..caf92d05c3a9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -193,7 +193,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
193 * when talking to the server, we always send cookie 0 193 * when talking to the server, we always send cookie 0
194 * instead of 1 or 2. 194 * instead of 1 or 2.
195 */ 195 */
196 start = p = kmap_atomic(*readdir->pages, KM_USER0); 196 start = p = kmap_atomic(*readdir->pages);
197 197
198 if (cookie == 0) { 198 if (cookie == 0) {
199 *p++ = xdr_one; /* next */ 199 *p++ = xdr_one; /* next */
@@ -221,7 +221,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
221 221
222 readdir->pgbase = (char *)p - (char *)start; 222 readdir->pgbase = (char *)p - (char *)start;
223 readdir->count -= readdir->pgbase; 223 readdir->count -= readdir->pgbase;
224 kunmap_atomic(start, KM_USER0); 224 kunmap_atomic(start);
225} 225}
226 226
227static int nfs4_wait_clnt_recover(struct nfs_client *clp) 227static int nfs4_wait_clnt_recover(struct nfs_client *clp)
@@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3575 } 3575 }
3576 if (npages > 1) { 3576 if (npages > 1) {
3577 /* for decoding across pages */ 3577 /* for decoding across pages */
3578 args.acl_scratch = alloc_page(GFP_KERNEL); 3578 res.acl_scratch = alloc_page(GFP_KERNEL);
3579 if (!args.acl_scratch) 3579 if (!res.acl_scratch)
3580 goto out_free; 3580 goto out_free;
3581 } 3581 }
3582 args.acl_len = npages * PAGE_SIZE; 3582 args.acl_len = npages * PAGE_SIZE;
@@ -3612,8 +3612,8 @@ out_free:
3612 for (i = 0; i < npages; i++) 3612 for (i = 0; i < npages; i++)
3613 if (pages[i]) 3613 if (pages[i])
3614 __free_page(pages[i]); 3614 __free_page(pages[i]);
3615 if (args.acl_scratch) 3615 if (res.acl_scratch)
3616 __free_page(args.acl_scratch); 3616 __free_page(res.acl_scratch);
3617 return ret; 3617 return ret;
3618} 3618}
3619 3619
@@ -4883,8 +4883,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4883 clp->cl_rpcclient->cl_auth->au_flavor); 4883 clp->cl_rpcclient->cl_auth->au_flavor);
4884 4884
4885 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); 4885 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
4886 if (unlikely(!res.server_scope)) 4886 if (unlikely(!res.server_scope)) {
4887 return -ENOMEM; 4887 status = -ENOMEM;
4888 goto out;
4889 }
4888 4890
4889 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 4891 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4890 if (!status) 4892 if (!status)
@@ -4901,12 +4903,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4901 clp->server_scope = NULL; 4903 clp->server_scope = NULL;
4902 } 4904 }
4903 4905
4904 if (!clp->server_scope) 4906 if (!clp->server_scope) {
4905 clp->server_scope = res.server_scope; 4907 clp->server_scope = res.server_scope;
4906 else 4908 goto out;
4907 kfree(res.server_scope); 4909 }
4908 } 4910 }
4909 4911 kfree(res.server_scope);
4912out:
4910 dprintk("<-- %s status= %d\n", __func__, status); 4913 dprintk("<-- %s status= %d\n", __func__, status);
4911 return status; 4914 return status;
4912} 4915}
@@ -5008,37 +5011,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
5008 return status; 5011 return status;
5009} 5012}
5010 5013
5014static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
5015{
5016 return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags);
5017}
5018
5019static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
5020 struct nfs4_slot *new,
5021 u32 max_slots,
5022 u32 ivalue)
5023{
5024 struct nfs4_slot *old = NULL;
5025 u32 i;
5026
5027 spin_lock(&tbl->slot_tbl_lock);
5028 if (new) {
5029 old = tbl->slots;
5030 tbl->slots = new;
5031 tbl->max_slots = max_slots;
5032 }
5033 tbl->highest_used_slotid = -1; /* no slot is currently used */
5034 for (i = 0; i < tbl->max_slots; i++)
5035 tbl->slots[i].seq_nr = ivalue;
5036 spin_unlock(&tbl->slot_tbl_lock);
5037 kfree(old);
5038}
5039
5011/* 5040/*
5012 * Reset a slot table 5041 * (re)Initialise a slot table
5013 */ 5042 */
5014static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, 5043static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
5015 int ivalue) 5044 u32 ivalue)
5016{ 5045{
5017 struct nfs4_slot *new = NULL; 5046 struct nfs4_slot *new = NULL;
5018 int i; 5047 int ret = -ENOMEM;
5019 int ret = 0;
5020 5048
5021 dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, 5049 dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
5022 max_reqs, tbl->max_slots); 5050 max_reqs, tbl->max_slots);
5023 5051
5024 /* Does the newly negotiated max_reqs match the existing slot table? */ 5052 /* Does the newly negotiated max_reqs match the existing slot table? */
5025 if (max_reqs != tbl->max_slots) { 5053 if (max_reqs != tbl->max_slots) {
5026 ret = -ENOMEM; 5054 new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
5027 new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
5028 GFP_NOFS);
5029 if (!new) 5055 if (!new)
5030 goto out; 5056 goto out;
5031 ret = 0;
5032 kfree(tbl->slots);
5033 } 5057 }
5034 spin_lock(&tbl->slot_tbl_lock); 5058 ret = 0;
5035 if (new) { 5059
5036 tbl->slots = new; 5060 nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
5037 tbl->max_slots = max_reqs;
5038 }
5039 for (i = 0; i < tbl->max_slots; ++i)
5040 tbl->slots[i].seq_nr = ivalue;
5041 spin_unlock(&tbl->slot_tbl_lock);
5042 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, 5061 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
5043 tbl, tbl->slots, tbl->max_slots); 5062 tbl, tbl->slots, tbl->max_slots);
5044out: 5063out:
@@ -5061,36 +5080,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session)
5061} 5080}
5062 5081
5063/* 5082/*
5064 * Initialize slot table
5065 */
5066static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
5067 int max_slots, int ivalue)
5068{
5069 struct nfs4_slot *slot;
5070 int ret = -ENOMEM;
5071
5072 BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
5073
5074 dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
5075
5076 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
5077 if (!slot)
5078 goto out;
5079 ret = 0;
5080
5081 spin_lock(&tbl->slot_tbl_lock);
5082 tbl->max_slots = max_slots;
5083 tbl->slots = slot;
5084 tbl->highest_used_slotid = -1; /* no slot is currently used */
5085 spin_unlock(&tbl->slot_tbl_lock);
5086 dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
5087 tbl, tbl->slots, tbl->max_slots);
5088out:
5089 dprintk("<-- %s: return %d\n", __func__, ret);
5090 return ret;
5091}
5092
5093/*
5094 * Initialize or reset the forechannel and backchannel tables 5083 * Initialize or reset the forechannel and backchannel tables
5095 */ 5084 */
5096static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) 5085static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
@@ -5101,25 +5090,16 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
5101 dprintk("--> %s\n", __func__); 5090 dprintk("--> %s\n", __func__);
5102 /* Fore channel */ 5091 /* Fore channel */
5103 tbl = &ses->fc_slot_table; 5092 tbl = &ses->fc_slot_table;
5104 if (tbl->slots == NULL) { 5093 status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
5105 status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1); 5094 if (status) /* -ENOMEM */
5106 if (status) /* -ENOMEM */ 5095 return status;
5107 return status;
5108 } else {
5109 status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
5110 if (status)
5111 return status;
5112 }
5113 /* Back channel */ 5096 /* Back channel */
5114 tbl = &ses->bc_slot_table; 5097 tbl = &ses->bc_slot_table;
5115 if (tbl->slots == NULL) { 5098 status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
5116 status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0); 5099 if (status && tbl->slots == NULL)
5117 if (status) 5100 /* Fore and back channel share a connection so get
5118 /* Fore and back channel share a connection so get 5101 * both slot tables or neither */
5119 * both slot tables or neither */ 5102 nfs4_destroy_slot_tables(ses);
5120 nfs4_destroy_slot_tables(ses);
5121 } else
5122 status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
5123 return status; 5103 return status;
5124} 5104}
5125 5105
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a53f33b4ac3a..45392032e7bd 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1132,6 +1132,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1132{ 1132{
1133 struct nfs_client *clp = server->nfs_client; 1133 struct nfs_client *clp = server->nfs_client;
1134 1134
1135 if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
1136 nfs_async_inode_return_delegation(state->inode, &state->stateid);
1135 nfs4_state_mark_reclaim_nograce(clp, state); 1137 nfs4_state_mark_reclaim_nograce(clp, state);
1136 nfs4_schedule_state_manager(clp); 1138 nfs4_schedule_state_manager(clp);
1137} 1139}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 95e92e438407..33bd8d0f745d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2522 2522
2523 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2523 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
2524 args->acl_pages, args->acl_pgbase, args->acl_len); 2524 args->acl_pages, args->acl_pgbase, args->acl_len);
2525 xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
2526 2525
2527 encode_nops(&hdr); 2526 encode_nops(&hdr);
2528} 2527}
@@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6032 struct compound_hdr hdr; 6031 struct compound_hdr hdr;
6033 int status; 6032 int status;
6034 6033
6034 if (res->acl_scratch != NULL) {
6035 void *p = page_address(res->acl_scratch);
6036 xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
6037 }
6035 status = decode_compound_hdr(xdr, &hdr); 6038 status = decode_compound_hdr(xdr, &hdr);
6036 if (status) 6039 if (status)
6037 goto out; 6040 goto out;
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index ce7f0758d84c..9559ce468732 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -72,7 +72,7 @@ int nfsd_fault_inject_init(void)
72{ 72{
73 unsigned int i; 73 unsigned int i;
74 struct nfsd_fault_inject_op *op; 74 struct nfsd_fault_inject_op *op;
75 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 75 umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
76 76
77 debug_dir = debugfs_create_dir("nfsd", NULL); 77 debug_dir = debugfs_create_dir("nfsd", NULL);
78 if (!debug_dir) 78 if (!debug_dir)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index edf6d3ed8777..e59f71d0cf73 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1541,30 +1541,31 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1541__be32 1541__be32
1542nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) 1542nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1543{ 1543{
1544 struct dentry *dentry;
1545 struct inode *inode; 1544 struct inode *inode;
1546 mm_segment_t oldfs; 1545 mm_segment_t oldfs;
1547 __be32 err; 1546 __be32 err;
1548 int host_err; 1547 int host_err;
1548 struct path path;
1549 1549
1550 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); 1550 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1551 if (err) 1551 if (err)
1552 goto out; 1552 goto out;
1553 1553
1554 dentry = fhp->fh_dentry; 1554 path.mnt = fhp->fh_export->ex_path.mnt;
1555 inode = dentry->d_inode; 1555 path.dentry = fhp->fh_dentry;
1556 inode = path.dentry->d_inode;
1556 1557
1557 err = nfserr_inval; 1558 err = nfserr_inval;
1558 if (!inode->i_op->readlink) 1559 if (!inode->i_op->readlink)
1559 goto out; 1560 goto out;
1560 1561
1561 touch_atime(fhp->fh_export->ex_path.mnt, dentry); 1562 touch_atime(&path);
1562 /* N.B. Why does this call need a get_fs()?? 1563 /* N.B. Why does this call need a get_fs()??
1563 * Remove the set_fs and watch the fireworks:-) --okir 1564 * Remove the set_fs and watch the fireworks:-) --okir
1564 */ 1565 */
1565 1566
1566 oldfs = get_fs(); set_fs(KERNEL_DS); 1567 oldfs = get_fs(); set_fs(KERNEL_DS);
1567 host_err = inode->i_op->readlink(dentry, buf, *lenp); 1568 host_err = inode->i_op->readlink(path.dentry, buf, *lenp);
1568 set_fs(oldfs); 1569 set_fs(oldfs);
1569 1570
1570 if (host_err < 0) 1571 if (host_err < 0)
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index c9b342c8b503..dab5c4c6dfaf 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -218,11 +218,11 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
218 kaddr, 1); 218 kaddr, 1);
219 mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr);
226 mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
@@ -313,7 +313,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
313 continue; 313 continue;
314 } 314 }
315 315
316 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 316 kaddr = kmap_atomic(cp_bh->b_page);
317 cp = nilfs_cpfile_block_get_checkpoint( 317 cp = nilfs_cpfile_block_get_checkpoint(
318 cpfile, cno, cp_bh, kaddr); 318 cpfile, cno, cp_bh, kaddr);
319 nicps = 0; 319 nicps = 0;
@@ -334,7 +334,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
334 cpfile, cp_bh, kaddr, nicps); 334 cpfile, cp_bh, kaddr, nicps);
335 if (count == 0) { 335 if (count == 0) {
336 /* make hole */ 336 /* make hole */
337 kunmap_atomic(kaddr, KM_USER0); 337 kunmap_atomic(kaddr);
338 brelse(cp_bh); 338 brelse(cp_bh);
339 ret = 339 ret =
340 nilfs_cpfile_delete_checkpoint_block( 340 nilfs_cpfile_delete_checkpoint_block(
@@ -349,18 +349,18 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
349 } 349 }
350 } 350 }
351 351
352 kunmap_atomic(kaddr, KM_USER0); 352 kunmap_atomic(kaddr);
353 brelse(cp_bh); 353 brelse(cp_bh);
354 } 354 }
355 355
356 if (tnicps > 0) { 356 if (tnicps > 0) {
357 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 357 kaddr = kmap_atomic(header_bh->b_page);
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr);
364 } 364 }
365 365
366 brelse(header_bh); 366 brelse(header_bh);
@@ -408,7 +408,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
408 continue; /* skip hole */ 408 continue; /* skip hole */
409 } 409 }
410 410
411 kaddr = kmap_atomic(bh->b_page, KM_USER0); 411 kaddr = kmap_atomic(bh->b_page);
412 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 412 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
413 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { 413 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
414 if (!nilfs_checkpoint_invalid(cp)) { 414 if (!nilfs_checkpoint_invalid(cp)) {
@@ -418,7 +418,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
418 n++; 418 n++;
419 } 419 }
420 } 420 }
421 kunmap_atomic(kaddr, KM_USER0); 421 kunmap_atomic(kaddr);
422 brelse(bh); 422 brelse(bh);
423 } 423 }
424 424
@@ -451,10 +451,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
451 ret = nilfs_cpfile_get_header_block(cpfile, &bh); 451 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
452 if (ret < 0) 452 if (ret < 0)
453 goto out; 453 goto out;
454 kaddr = kmap_atomic(bh->b_page, KM_USER0); 454 kaddr = kmap_atomic(bh->b_page);
455 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); 455 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
456 curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); 456 curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
457 kunmap_atomic(kaddr, KM_USER0); 457 kunmap_atomic(kaddr);
458 brelse(bh); 458 brelse(bh);
459 if (curr == 0) { 459 if (curr == 0) {
460 ret = 0; 460 ret = 0;
@@ -472,7 +472,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
472 ret = 0; /* No snapshots (started from a hole block) */ 472 ret = 0; /* No snapshots (started from a hole block) */
473 goto out; 473 goto out;
474 } 474 }
475 kaddr = kmap_atomic(bh->b_page, KM_USER0); 475 kaddr = kmap_atomic(bh->b_page);
476 while (n < nci) { 476 while (n < nci) {
477 cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); 477 cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
478 curr = ~(__u64)0; /* Terminator */ 478 curr = ~(__u64)0; /* Terminator */
@@ -488,7 +488,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
488 488
489 next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); 489 next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
490 if (curr_blkoff != next_blkoff) { 490 if (curr_blkoff != next_blkoff) {
491 kunmap_atomic(kaddr, KM_USER0); 491 kunmap_atomic(kaddr);
492 brelse(bh); 492 brelse(bh);
493 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 493 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
494 0, &bh); 494 0, &bh);
@@ -496,12 +496,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
496 WARN_ON(ret == -ENOENT); 496 WARN_ON(ret == -ENOENT);
497 goto out; 497 goto out;
498 } 498 }
499 kaddr = kmap_atomic(bh->b_page, KM_USER0); 499 kaddr = kmap_atomic(bh->b_page);
500 } 500 }
501 curr = next; 501 curr = next;
502 curr_blkoff = next_blkoff; 502 curr_blkoff = next_blkoff;
503 } 503 }
504 kunmap_atomic(kaddr, KM_USER0); 504 kunmap_atomic(kaddr);
505 brelse(bh); 505 brelse(bh);
506 *cnop = curr; 506 *cnop = curr;
507 ret = n; 507 ret = n;
@@ -592,24 +592,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
592 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 592 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
593 if (ret < 0) 593 if (ret < 0)
594 goto out_sem; 594 goto out_sem;
595 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 595 kaddr = kmap_atomic(cp_bh->b_page);
596 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 596 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
597 if (nilfs_checkpoint_invalid(cp)) { 597 if (nilfs_checkpoint_invalid(cp)) {
598 ret = -ENOENT; 598 ret = -ENOENT;
599 kunmap_atomic(kaddr, KM_USER0); 599 kunmap_atomic(kaddr);
600 goto out_cp; 600 goto out_cp;
601 } 601 }
602 if (nilfs_checkpoint_snapshot(cp)) { 602 if (nilfs_checkpoint_snapshot(cp)) {
603 ret = 0; 603 ret = 0;
604 kunmap_atomic(kaddr, KM_USER0); 604 kunmap_atomic(kaddr);
605 goto out_cp; 605 goto out_cp;
606 } 606 }
607 kunmap_atomic(kaddr, KM_USER0); 607 kunmap_atomic(kaddr);
608 608
609 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); 609 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
610 if (ret < 0) 610 if (ret < 0)
611 goto out_cp; 611 goto out_cp;
612 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 612 kaddr = kmap_atomic(header_bh->b_page);
613 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 613 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
614 list = &header->ch_snapshot_list; 614 list = &header->ch_snapshot_list;
615 curr_bh = header_bh; 615 curr_bh = header_bh;
@@ -621,13 +621,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
621 prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); 621 prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
622 curr = prev; 622 curr = prev;
623 if (curr_blkoff != prev_blkoff) { 623 if (curr_blkoff != prev_blkoff) {
624 kunmap_atomic(kaddr, KM_USER0); 624 kunmap_atomic(kaddr);
625 brelse(curr_bh); 625 brelse(curr_bh);
626 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 626 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
627 0, &curr_bh); 627 0, &curr_bh);
628 if (ret < 0) 628 if (ret < 0)
629 goto out_header; 629 goto out_header;
630 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); 630 kaddr = kmap_atomic(curr_bh->b_page);
631 } 631 }
632 curr_blkoff = prev_blkoff; 632 curr_blkoff = prev_blkoff;
633 cp = nilfs_cpfile_block_get_checkpoint( 633 cp = nilfs_cpfile_block_get_checkpoint(
@@ -635,7 +635,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
635 list = &cp->cp_snapshot_list; 635 list = &cp->cp_snapshot_list;
636 prev = le64_to_cpu(list->ssl_prev); 636 prev = le64_to_cpu(list->ssl_prev);
637 } 637 }
638 kunmap_atomic(kaddr, KM_USER0); 638 kunmap_atomic(kaddr);
639 639
640 if (prev != 0) { 640 if (prev != 0) {
641 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, 641 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
@@ -647,29 +647,29 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
647 get_bh(prev_bh); 647 get_bh(prev_bh);
648 } 648 }
649 649
650 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); 650 kaddr = kmap_atomic(curr_bh->b_page);
651 list = nilfs_cpfile_block_get_snapshot_list( 651 list = nilfs_cpfile_block_get_snapshot_list(
652 cpfile, curr, curr_bh, kaddr); 652 cpfile, curr, curr_bh, kaddr);
653 list->ssl_prev = cpu_to_le64(cno); 653 list->ssl_prev = cpu_to_le64(cno);
654 kunmap_atomic(kaddr, KM_USER0); 654 kunmap_atomic(kaddr);
655 655
656 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 656 kaddr = kmap_atomic(cp_bh->b_page);
657 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 657 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
658 cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); 658 cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
659 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); 659 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
660 nilfs_checkpoint_set_snapshot(cp); 660 nilfs_checkpoint_set_snapshot(cp);
661 kunmap_atomic(kaddr, KM_USER0); 661 kunmap_atomic(kaddr);
662 662
663 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); 663 kaddr = kmap_atomic(prev_bh->b_page);
664 list = nilfs_cpfile_block_get_snapshot_list( 664 list = nilfs_cpfile_block_get_snapshot_list(
665 cpfile, prev, prev_bh, kaddr); 665 cpfile, prev, prev_bh, kaddr);
666 list->ssl_next = cpu_to_le64(cno); 666 list->ssl_next = cpu_to_le64(cno);
667 kunmap_atomic(kaddr, KM_USER0); 667 kunmap_atomic(kaddr);
668 668
669 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 669 kaddr = kmap_atomic(header_bh->b_page);
670 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 670 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr);
673 673
674 mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
@@ -710,23 +710,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
710 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 710 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
711 if (ret < 0) 711 if (ret < 0)
712 goto out_sem; 712 goto out_sem;
713 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 713 kaddr = kmap_atomic(cp_bh->b_page);
714 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 714 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
715 if (nilfs_checkpoint_invalid(cp)) { 715 if (nilfs_checkpoint_invalid(cp)) {
716 ret = -ENOENT; 716 ret = -ENOENT;
717 kunmap_atomic(kaddr, KM_USER0); 717 kunmap_atomic(kaddr);
718 goto out_cp; 718 goto out_cp;
719 } 719 }
720 if (!nilfs_checkpoint_snapshot(cp)) { 720 if (!nilfs_checkpoint_snapshot(cp)) {
721 ret = 0; 721 ret = 0;
722 kunmap_atomic(kaddr, KM_USER0); 722 kunmap_atomic(kaddr);
723 goto out_cp; 723 goto out_cp;
724 } 724 }
725 725
726 list = &cp->cp_snapshot_list; 726 list = &cp->cp_snapshot_list;
727 next = le64_to_cpu(list->ssl_next); 727 next = le64_to_cpu(list->ssl_next);
728 prev = le64_to_cpu(list->ssl_prev); 728 prev = le64_to_cpu(list->ssl_prev);
729 kunmap_atomic(kaddr, KM_USER0); 729 kunmap_atomic(kaddr);
730 730
731 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); 731 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
732 if (ret < 0) 732 if (ret < 0)
@@ -750,29 +750,29 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
750 get_bh(prev_bh); 750 get_bh(prev_bh);
751 } 751 }
752 752
753 kaddr = kmap_atomic(next_bh->b_page, KM_USER0); 753 kaddr = kmap_atomic(next_bh->b_page);
754 list = nilfs_cpfile_block_get_snapshot_list( 754 list = nilfs_cpfile_block_get_snapshot_list(
755 cpfile, next, next_bh, kaddr); 755 cpfile, next, next_bh, kaddr);
756 list->ssl_prev = cpu_to_le64(prev); 756 list->ssl_prev = cpu_to_le64(prev);
757 kunmap_atomic(kaddr, KM_USER0); 757 kunmap_atomic(kaddr);
758 758
759 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); 759 kaddr = kmap_atomic(prev_bh->b_page);
760 list = nilfs_cpfile_block_get_snapshot_list( 760 list = nilfs_cpfile_block_get_snapshot_list(
761 cpfile, prev, prev_bh, kaddr); 761 cpfile, prev, prev_bh, kaddr);
762 list->ssl_next = cpu_to_le64(next); 762 list->ssl_next = cpu_to_le64(next);
763 kunmap_atomic(kaddr, KM_USER0); 763 kunmap_atomic(kaddr);
764 764
765 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); 765 kaddr = kmap_atomic(cp_bh->b_page);
766 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); 766 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
767 cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); 767 cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
768 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); 768 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
769 nilfs_checkpoint_clear_snapshot(cp); 769 nilfs_checkpoint_clear_snapshot(cp);
770 kunmap_atomic(kaddr, KM_USER0); 770 kunmap_atomic(kaddr);
771 771
772 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 772 kaddr = kmap_atomic(header_bh->b_page);
773 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); 773 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr);
776 776
777 mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
@@ -829,13 +829,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
829 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); 829 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
830 if (ret < 0) 830 if (ret < 0)
831 goto out; 831 goto out;
832 kaddr = kmap_atomic(bh->b_page, KM_USER0); 832 kaddr = kmap_atomic(bh->b_page);
833 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 833 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
834 if (nilfs_checkpoint_invalid(cp)) 834 if (nilfs_checkpoint_invalid(cp))
835 ret = -ENOENT; 835 ret = -ENOENT;
836 else 836 else
837 ret = nilfs_checkpoint_snapshot(cp); 837 ret = nilfs_checkpoint_snapshot(cp);
838 kunmap_atomic(kaddr, KM_USER0); 838 kunmap_atomic(kaddr);
839 brelse(bh); 839 brelse(bh);
840 840
841 out: 841 out:
@@ -912,12 +912,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
912 ret = nilfs_cpfile_get_header_block(cpfile, &bh); 912 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
913 if (ret < 0) 913 if (ret < 0)
914 goto out_sem; 914 goto out_sem;
915 kaddr = kmap_atomic(bh->b_page, KM_USER0); 915 kaddr = kmap_atomic(bh->b_page);
916 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); 916 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
917 cpstat->cs_cno = nilfs_mdt_cno(cpfile); 917 cpstat->cs_cno = nilfs_mdt_cno(cpfile);
918 cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); 918 cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
919 cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); 919 cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
920 kunmap_atomic(kaddr, KM_USER0); 920 kunmap_atomic(kaddr);
921 brelse(bh); 921 brelse(bh);
922 922
923 out_sem: 923 out_sem:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index fcc2f869af16..b5c13f3576b9 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -85,13 +85,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
85 struct nilfs_dat_entry *entry; 85 struct nilfs_dat_entry *entry;
86 void *kaddr; 86 void *kaddr;
87 87
88 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 88 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
89 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 89 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
90 req->pr_entry_bh, kaddr); 90 req->pr_entry_bh, kaddr);
91 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 91 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
92 entry->de_end = cpu_to_le64(NILFS_CNO_MAX); 92 entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
93 entry->de_blocknr = cpu_to_le64(0); 93 entry->de_blocknr = cpu_to_le64(0);
94 kunmap_atomic(kaddr, KM_USER0); 94 kunmap_atomic(kaddr);
95 95
96 nilfs_palloc_commit_alloc_entry(dat, req); 96 nilfs_palloc_commit_alloc_entry(dat, req);
97 nilfs_dat_commit_entry(dat, req); 97 nilfs_dat_commit_entry(dat, req);
@@ -109,13 +109,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
109 struct nilfs_dat_entry *entry; 109 struct nilfs_dat_entry *entry;
110 void *kaddr; 110 void *kaddr;
111 111
112 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 112 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
113 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 113 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
114 req->pr_entry_bh, kaddr); 114 req->pr_entry_bh, kaddr);
115 entry->de_start = cpu_to_le64(NILFS_CNO_MIN); 115 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
116 entry->de_end = cpu_to_le64(NILFS_CNO_MIN); 116 entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
117 entry->de_blocknr = cpu_to_le64(0); 117 entry->de_blocknr = cpu_to_le64(0);
118 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr);
119 119
120 nilfs_dat_commit_entry(dat, req); 120 nilfs_dat_commit_entry(dat, req);
121 nilfs_palloc_commit_free_entry(dat, req); 121 nilfs_palloc_commit_free_entry(dat, req);
@@ -136,12 +136,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
136 struct nilfs_dat_entry *entry; 136 struct nilfs_dat_entry *entry;
137 void *kaddr; 137 void *kaddr;
138 138
139 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 139 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
140 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 140 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
141 req->pr_entry_bh, kaddr); 141 req->pr_entry_bh, kaddr);
142 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 142 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
143 entry->de_blocknr = cpu_to_le64(blocknr); 143 entry->de_blocknr = cpu_to_le64(blocknr);
144 kunmap_atomic(kaddr, KM_USER0); 144 kunmap_atomic(kaddr);
145 145
146 nilfs_dat_commit_entry(dat, req); 146 nilfs_dat_commit_entry(dat, req);
147} 147}
@@ -160,12 +160,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
160 return ret; 160 return ret;
161 } 161 }
162 162
163 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 163 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
164 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 164 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
165 req->pr_entry_bh, kaddr); 165 req->pr_entry_bh, kaddr);
166 start = le64_to_cpu(entry->de_start); 166 start = le64_to_cpu(entry->de_start);
167 blocknr = le64_to_cpu(entry->de_blocknr); 167 blocknr = le64_to_cpu(entry->de_blocknr);
168 kunmap_atomic(kaddr, KM_USER0); 168 kunmap_atomic(kaddr);
169 169
170 if (blocknr == 0) { 170 if (blocknr == 0) {
171 ret = nilfs_palloc_prepare_free_entry(dat, req); 171 ret = nilfs_palloc_prepare_free_entry(dat, req);
@@ -186,7 +186,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
186 sector_t blocknr; 186 sector_t blocknr;
187 void *kaddr; 187 void *kaddr;
188 188
189 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 189 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
190 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 190 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
191 req->pr_entry_bh, kaddr); 191 req->pr_entry_bh, kaddr);
192 end = start = le64_to_cpu(entry->de_start); 192 end = start = le64_to_cpu(entry->de_start);
@@ -196,7 +196,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
196 } 196 }
197 entry->de_end = cpu_to_le64(end); 197 entry->de_end = cpu_to_le64(end);
198 blocknr = le64_to_cpu(entry->de_blocknr); 198 blocknr = le64_to_cpu(entry->de_blocknr);
199 kunmap_atomic(kaddr, KM_USER0); 199 kunmap_atomic(kaddr);
200 200
201 if (blocknr == 0) 201 if (blocknr == 0)
202 nilfs_dat_commit_free(dat, req); 202 nilfs_dat_commit_free(dat, req);
@@ -211,12 +211,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
211 sector_t blocknr; 211 sector_t blocknr;
212 void *kaddr; 212 void *kaddr;
213 213
214 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); 214 kaddr = kmap_atomic(req->pr_entry_bh->b_page);
215 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 215 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
216 req->pr_entry_bh, kaddr); 216 req->pr_entry_bh, kaddr);
217 start = le64_to_cpu(entry->de_start); 217 start = le64_to_cpu(entry->de_start);
218 blocknr = le64_to_cpu(entry->de_blocknr); 218 blocknr = le64_to_cpu(entry->de_blocknr);
219 kunmap_atomic(kaddr, KM_USER0); 219 kunmap_atomic(kaddr);
220 220
221 if (start == nilfs_mdt_cno(dat) && blocknr == 0) 221 if (start == nilfs_mdt_cno(dat) && blocknr == 0)
222 nilfs_palloc_abort_free_entry(dat, req); 222 nilfs_palloc_abort_free_entry(dat, req);
@@ -346,20 +346,20 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
346 } 346 }
347 } 347 }
348 348
349 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 349 kaddr = kmap_atomic(entry_bh->b_page);
350 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 350 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
351 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { 351 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
352 printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, 352 printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__,
353 (unsigned long long)vblocknr, 353 (unsigned long long)vblocknr,
354 (unsigned long long)le64_to_cpu(entry->de_start), 354 (unsigned long long)le64_to_cpu(entry->de_start),
355 (unsigned long long)le64_to_cpu(entry->de_end)); 355 (unsigned long long)le64_to_cpu(entry->de_end));
356 kunmap_atomic(kaddr, KM_USER0); 356 kunmap_atomic(kaddr);
357 brelse(entry_bh); 357 brelse(entry_bh);
358 return -EINVAL; 358 return -EINVAL;
359 } 359 }
360 WARN_ON(blocknr == 0); 360 WARN_ON(blocknr == 0);
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr);
363 363
364 mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
@@ -409,7 +409,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
409 } 409 }
410 } 410 }
411 411
412 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 412 kaddr = kmap_atomic(entry_bh->b_page);
413 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); 413 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
414 blocknr = le64_to_cpu(entry->de_blocknr); 414 blocknr = le64_to_cpu(entry->de_blocknr);
415 if (blocknr == 0) { 415 if (blocknr == 0) {
@@ -419,7 +419,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
419 *blocknrp = blocknr; 419 *blocknrp = blocknr;
420 420
421 out: 421 out:
422 kunmap_atomic(kaddr, KM_USER0); 422 kunmap_atomic(kaddr);
423 brelse(entry_bh); 423 brelse(entry_bh);
424 return ret; 424 return ret;
425} 425}
@@ -440,7 +440,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
440 0, &entry_bh); 440 0, &entry_bh);
441 if (ret < 0) 441 if (ret < 0)
442 return ret; 442 return ret;
443 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); 443 kaddr = kmap_atomic(entry_bh->b_page);
444 /* last virtual block number in this block */ 444 /* last virtual block number in this block */
445 first = vinfo->vi_vblocknr; 445 first = vinfo->vi_vblocknr;
446 do_div(first, entries_per_block); 446 do_div(first, entries_per_block);
@@ -456,7 +456,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
456 vinfo->vi_end = le64_to_cpu(entry->de_end); 456 vinfo->vi_end = le64_to_cpu(entry->de_end);
457 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); 457 vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
458 } 458 }
459 kunmap_atomic(kaddr, KM_USER0); 459 kunmap_atomic(kaddr);
460 brelse(entry_bh); 460 brelse(entry_bh);
461 } 461 }
462 462
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index ca35b3a46d17..df1a7fb238d1 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -602,7 +602,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
602 unlock_page(page); 602 unlock_page(page);
603 goto fail; 603 goto fail;
604 } 604 }
605 kaddr = kmap_atomic(page, KM_USER0); 605 kaddr = kmap_atomic(page);
606 memset(kaddr, 0, chunk_size); 606 memset(kaddr, 0, chunk_size);
607 de = (struct nilfs_dir_entry *)kaddr; 607 de = (struct nilfs_dir_entry *)kaddr;
608 de->name_len = 1; 608 de->name_len = 1;
@@ -617,7 +617,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
617 de->inode = cpu_to_le64(parent->i_ino); 617 de->inode = cpu_to_le64(parent->i_ino);
618 memcpy(de->name, "..\0", 4); 618 memcpy(de->name, "..\0", 4);
619 nilfs_set_de_type(de, inode); 619 nilfs_set_de_type(de, inode);
620 kunmap_atomic(kaddr, KM_USER0); 620 kunmap_atomic(kaddr);
621 nilfs_commit_chunk(page, mapping, 0, chunk_size); 621 nilfs_commit_chunk(page, mapping, 0, chunk_size);
622fail: 622fail:
623 page_cache_release(page); 623 page_cache_release(page);
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 684d76300a80..5a48df79d674 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -122,11 +122,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
122 return ret; 122 return ret;
123 } 123 }
124 124
125 kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); 125 kaddr = kmap_atomic(req.pr_entry_bh->b_page);
126 raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, 126 raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
127 req.pr_entry_bh, kaddr); 127 req.pr_entry_bh, kaddr);
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr);
130 130
131 mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 800e8d78a83b..f9897d09c693 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -58,12 +58,12 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
58 58
59 set_buffer_mapped(bh); 59 set_buffer_mapped(bh);
60 60
61 kaddr = kmap_atomic(bh->b_page, KM_USER0); 61 kaddr = kmap_atomic(bh->b_page);
62 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); 62 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
63 if (init_block) 63 if (init_block)
64 init_block(inode, bh, kaddr); 64 init_block(inode, bh, kaddr);
65 flush_dcache_page(bh->b_page); 65 flush_dcache_page(bh->b_page);
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1cd3f624dffc..fce2bbee66d4 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -193,9 +193,6 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
193 struct nilfs_transaction_info ti; 193 struct nilfs_transaction_info ti;
194 int err; 194 int err;
195 195
196 if (inode->i_nlink >= NILFS_LINK_MAX)
197 return -EMLINK;
198
199 err = nilfs_transaction_begin(dir->i_sb, &ti, 1); 196 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
200 if (err) 197 if (err)
201 return err; 198 return err;
@@ -219,9 +216,6 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
219 struct nilfs_transaction_info ti; 216 struct nilfs_transaction_info ti;
220 int err; 217 int err;
221 218
222 if (dir->i_nlink >= NILFS_LINK_MAX)
223 return -EMLINK;
224
225 err = nilfs_transaction_begin(dir->i_sb, &ti, 1); 219 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
226 if (err) 220 if (err)
227 return err; 221 return err;
@@ -400,11 +394,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
400 drop_nlink(new_inode); 394 drop_nlink(new_inode);
401 nilfs_mark_inode_dirty(new_inode); 395 nilfs_mark_inode_dirty(new_inode);
402 } else { 396 } else {
403 if (dir_de) {
404 err = -EMLINK;
405 if (new_dir->i_nlink >= NILFS_LINK_MAX)
406 goto out_dir;
407 }
408 err = nilfs_add_link(new_dentry, old_inode); 397 err = nilfs_add_link(new_dentry, old_inode);
409 if (err) 398 if (err)
410 goto out_dir; 399 goto out_dir;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 65221a04c6f0..3e7b2a0dc0c8 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -119,11 +119,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
119 struct page *spage = sbh->b_page, *dpage = dbh->b_page; 119 struct page *spage = sbh->b_page, *dpage = dbh->b_page;
120 struct buffer_head *bh; 120 struct buffer_head *bh;
121 121
122 kaddr0 = kmap_atomic(spage, KM_USER0); 122 kaddr0 = kmap_atomic(spage);
123 kaddr1 = kmap_atomic(dpage, KM_USER1); 123 kaddr1 = kmap_atomic(dpage);
124 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); 124 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
125 kunmap_atomic(kaddr1, KM_USER1); 125 kunmap_atomic(kaddr1);
126 kunmap_atomic(kaddr0, KM_USER0); 126 kunmap_atomic(kaddr0);
127 127
128 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; 128 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
129 dbh->b_blocknr = sbh->b_blocknr; 129 dbh->b_blocknr = sbh->b_blocknr;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index a604ac0331b2..f1626f5011c5 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -493,9 +493,9 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
493 if (unlikely(!bh_org)) 493 if (unlikely(!bh_org))
494 return -EIO; 494 return -EIO;
495 495
496 kaddr = kmap_atomic(page, KM_USER0); 496 kaddr = kmap_atomic(page);
497 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); 497 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
498 kunmap_atomic(kaddr, KM_USER0); 498 kunmap_atomic(kaddr);
499 brelse(bh_org); 499 brelse(bh_org);
500 return 0; 500 return 0;
501} 501}
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 850a7c0228fb..dc9a913784ab 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -227,9 +227,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
227 crc = crc32_le(crc, bh->b_data, bh->b_size); 227 crc = crc32_le(crc, bh->b_data, bh->b_size);
228 } 228 }
229 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 229 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
230 kaddr = kmap_atomic(bh->b_page, KM_USER0); 230 kaddr = kmap_atomic(bh->b_page);
231 crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); 231 crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
232 kunmap_atomic(kaddr, KM_USER0); 232 kunmap_atomic(kaddr);
233 } 233 }
234 raw_sum->ss_datasum = cpu_to_le32(crc); 234 raw_sum->ss_datasum = cpu_to_le32(crc);
235} 235}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 0a0aba617d8a..c5b7653a4391 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -111,11 +111,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
111 struct nilfs_sufile_header *header; 111 struct nilfs_sufile_header *header;
112 void *kaddr; 112 void *kaddr;
113 113
114 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 114 kaddr = kmap_atomic(header_bh->b_page);
115 header = kaddr + bh_offset(header_bh); 115 header = kaddr + bh_offset(header_bh);
116 le64_add_cpu(&header->sh_ncleansegs, ncleanadd); 116 le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
118 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr);
119 119
120 mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
121} 121}
@@ -319,11 +319,11 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 ret = nilfs_sufile_get_header_block(sufile, &header_bh); 319 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
320 if (ret < 0) 320 if (ret < 0)
321 goto out_sem; 321 goto out_sem;
322 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 322 kaddr = kmap_atomic(header_bh->b_page);
323 header = kaddr + bh_offset(header_bh); 323 header = kaddr + bh_offset(header_bh);
324 ncleansegs = le64_to_cpu(header->sh_ncleansegs); 324 ncleansegs = le64_to_cpu(header->sh_ncleansegs);
325 last_alloc = le64_to_cpu(header->sh_last_alloc); 325 last_alloc = le64_to_cpu(header->sh_last_alloc);
326 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr);
327 327
328 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax; 329 maxsegnum = sui->allocmax;
@@ -356,7 +356,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
356 &su_bh); 356 &su_bh);
357 if (ret < 0) 357 if (ret < 0)
358 goto out_header; 358 goto out_header;
359 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 359 kaddr = kmap_atomic(su_bh->b_page);
360 su = nilfs_sufile_block_get_segment_usage( 360 su = nilfs_sufile_block_get_segment_usage(
361 sufile, segnum, su_bh, kaddr); 361 sufile, segnum, su_bh, kaddr);
362 362
@@ -367,14 +367,14 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
367 continue; 367 continue;
368 /* found a clean segment */ 368 /* found a clean segment */
369 nilfs_segment_usage_set_dirty(su); 369 nilfs_segment_usage_set_dirty(su);
370 kunmap_atomic(kaddr, KM_USER0); 370 kunmap_atomic(kaddr);
371 371
372 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 372 kaddr = kmap_atomic(header_bh->b_page);
373 header = kaddr + bh_offset(header_bh); 373 header = kaddr + bh_offset(header_bh);
374 le64_add_cpu(&header->sh_ncleansegs, -1); 374 le64_add_cpu(&header->sh_ncleansegs, -1);
375 le64_add_cpu(&header->sh_ndirtysegs, 1); 375 le64_add_cpu(&header->sh_ndirtysegs, 1);
376 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
377 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr);
378 378
379 sui->ncleansegs--; 379 sui->ncleansegs--;
380 mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
@@ -385,7 +385,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
385 goto out_header; 385 goto out_header;
386 } 386 }
387 387
388 kunmap_atomic(kaddr, KM_USER0); 388 kunmap_atomic(kaddr);
389 brelse(su_bh); 389 brelse(su_bh);
390 } 390 }
391 391
@@ -407,16 +407,16 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
407 struct nilfs_segment_usage *su; 407 struct nilfs_segment_usage *su;
408 void *kaddr; 408 void *kaddr;
409 409
410 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 410 kaddr = kmap_atomic(su_bh->b_page);
411 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 411 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
412 if (unlikely(!nilfs_segment_usage_clean(su))) { 412 if (unlikely(!nilfs_segment_usage_clean(su))) {
413 printk(KERN_WARNING "%s: segment %llu must be clean\n", 413 printk(KERN_WARNING "%s: segment %llu must be clean\n",
414 __func__, (unsigned long long)segnum); 414 __func__, (unsigned long long)segnum);
415 kunmap_atomic(kaddr, KM_USER0); 415 kunmap_atomic(kaddr);
416 return; 416 return;
417 } 417 }
418 nilfs_segment_usage_set_dirty(su); 418 nilfs_segment_usage_set_dirty(su);
419 kunmap_atomic(kaddr, KM_USER0); 419 kunmap_atomic(kaddr);
420 420
421 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
422 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
@@ -433,11 +433,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
433 void *kaddr; 433 void *kaddr;
434 int clean, dirty; 434 int clean, dirty;
435 435
436 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 436 kaddr = kmap_atomic(su_bh->b_page);
437 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 437 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
438 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && 438 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
439 su->su_nblocks == cpu_to_le32(0)) { 439 su->su_nblocks == cpu_to_le32(0)) {
440 kunmap_atomic(kaddr, KM_USER0); 440 kunmap_atomic(kaddr);
441 return; 441 return;
442 } 442 }
443 clean = nilfs_segment_usage_clean(su); 443 clean = nilfs_segment_usage_clean(su);
@@ -447,7 +447,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
447 su->su_lastmod = cpu_to_le64(0); 447 su->su_lastmod = cpu_to_le64(0);
448 su->su_nblocks = cpu_to_le32(0); 448 su->su_nblocks = cpu_to_le32(0);
449 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); 449 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
450 kunmap_atomic(kaddr, KM_USER0); 450 kunmap_atomic(kaddr);
451 451
452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
453 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
@@ -464,12 +464,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
464 void *kaddr; 464 void *kaddr;
465 int sudirty; 465 int sudirty;
466 466
467 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 467 kaddr = kmap_atomic(su_bh->b_page);
468 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 468 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
469 if (nilfs_segment_usage_clean(su)) { 469 if (nilfs_segment_usage_clean(su)) {
470 printk(KERN_WARNING "%s: segment %llu is already clean\n", 470 printk(KERN_WARNING "%s: segment %llu is already clean\n",
471 __func__, (unsigned long long)segnum); 471 __func__, (unsigned long long)segnum);
472 kunmap_atomic(kaddr, KM_USER0); 472 kunmap_atomic(kaddr);
473 return; 473 return;
474 } 474 }
475 WARN_ON(nilfs_segment_usage_error(su)); 475 WARN_ON(nilfs_segment_usage_error(su));
@@ -477,7 +477,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
477 477
478 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
479 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
480 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr);
481 mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
482 482
483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
@@ -525,13 +525,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
525 if (ret < 0) 525 if (ret < 0)
526 goto out_sem; 526 goto out_sem;
527 527
528 kaddr = kmap_atomic(bh->b_page, KM_USER0); 528 kaddr = kmap_atomic(bh->b_page);
529 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); 529 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
530 WARN_ON(nilfs_segment_usage_error(su)); 530 WARN_ON(nilfs_segment_usage_error(su));
531 if (modtime) 531 if (modtime)
532 su->su_lastmod = cpu_to_le64(modtime); 532 su->su_lastmod = cpu_to_le64(modtime);
533 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
534 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr);
535 535
536 mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
537 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
@@ -572,7 +572,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
572 if (ret < 0) 572 if (ret < 0)
573 goto out_sem; 573 goto out_sem;
574 574
575 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 575 kaddr = kmap_atomic(header_bh->b_page);
576 header = kaddr + bh_offset(header_bh); 576 header = kaddr + bh_offset(header_bh);
577 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); 577 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
578 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); 578 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
@@ -582,7 +582,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
582 spin_lock(&nilfs->ns_last_segment_lock); 582 spin_lock(&nilfs->ns_last_segment_lock);
583 sustat->ss_prot_seq = nilfs->ns_prot_seq; 583 sustat->ss_prot_seq = nilfs->ns_prot_seq;
584 spin_unlock(&nilfs->ns_last_segment_lock); 584 spin_unlock(&nilfs->ns_last_segment_lock);
585 kunmap_atomic(kaddr, KM_USER0); 585 kunmap_atomic(kaddr);
586 brelse(header_bh); 586 brelse(header_bh);
587 587
588 out_sem: 588 out_sem:
@@ -598,15 +598,15 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
598 void *kaddr; 598 void *kaddr;
599 int suclean; 599 int suclean;
600 600
601 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 601 kaddr = kmap_atomic(su_bh->b_page);
602 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 602 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
603 if (nilfs_segment_usage_error(su)) { 603 if (nilfs_segment_usage_error(su)) {
604 kunmap_atomic(kaddr, KM_USER0); 604 kunmap_atomic(kaddr);
605 return; 605 return;
606 } 606 }
607 suclean = nilfs_segment_usage_clean(su); 607 suclean = nilfs_segment_usage_clean(su);
608 nilfs_segment_usage_set_error(su); 608 nilfs_segment_usage_set_error(su);
609 kunmap_atomic(kaddr, KM_USER0); 609 kunmap_atomic(kaddr);
610 610
611 if (suclean) { 611 if (suclean) {
612 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
@@ -675,7 +675,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
675 /* hole */ 675 /* hole */
676 continue; 676 continue;
677 } 677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 678 kaddr = kmap_atomic(su_bh->b_page);
679 su = nilfs_sufile_block_get_segment_usage( 679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr); 680 sufile, segnum, su_bh, kaddr);
681 su2 = su; 681 su2 = su;
@@ -684,7 +684,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) || 684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) { 685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY; 686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0); 687 kunmap_atomic(kaddr);
688 brelse(su_bh); 688 brelse(su_bh);
689 goto out_header; 689 goto out_header;
690 } 690 }
@@ -696,7 +696,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
696 nc++; 696 nc++;
697 } 697 }
698 } 698 }
699 kunmap_atomic(kaddr, KM_USER0); 699 kunmap_atomic(kaddr);
700 if (nc > 0) { 700 if (nc > 0) {
701 mark_buffer_dirty(su_bh); 701 mark_buffer_dirty(su_bh);
702 ncleaned += nc; 702 ncleaned += nc;
@@ -772,10 +772,10 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
772 sui->ncleansegs -= nsegs - newnsegs; 772 sui->ncleansegs -= nsegs - newnsegs;
773 } 773 }
774 774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 775 kaddr = kmap_atomic(header_bh->b_page);
776 header = kaddr + bh_offset(header_bh); 776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); 777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0); 778 kunmap_atomic(kaddr);
779 779
780 mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile); 781 nilfs_mdt_mark_dirty(sufile);
@@ -840,7 +840,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
840 continue; 840 continue;
841 } 841 }
842 842
843 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 843 kaddr = kmap_atomic(su_bh->b_page);
844 su = nilfs_sufile_block_get_segment_usage( 844 su = nilfs_sufile_block_get_segment_usage(
845 sufile, segnum, su_bh, kaddr); 845 sufile, segnum, su_bh, kaddr);
846 for (j = 0; j < n; 846 for (j = 0; j < n;
@@ -853,7 +853,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
853 si->sui_flags |= 853 si->sui_flags |=
854 (1UL << NILFS_SEGMENT_USAGE_ACTIVE); 854 (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
855 } 855 }
856 kunmap_atomic(kaddr, KM_USER0); 856 kunmap_atomic(kaddr);
857 brelse(su_bh); 857 brelse(su_bh);
858 } 858 }
859 ret = nsegs; 859 ret = nsegs;
@@ -902,10 +902,10 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
902 goto failed; 902 goto failed;
903 903
904 sui = NILFS_SUI(sufile); 904 sui = NILFS_SUI(sufile);
905 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 905 kaddr = kmap_atomic(header_bh->b_page);
906 header = kaddr + bh_offset(header_bh); 906 header = kaddr + bh_offset(header_bh);
907 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); 907 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
908 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr);
909 brelse(header_bh); 909 brelse(header_bh);
910 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; 911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 08e3d4f9df18..1099a76cee59 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -917,9 +917,8 @@ static int nilfs_get_root_dentry(struct super_block *sb,
917 if (root->cno == NILFS_CPTREE_CURRENT_CNO) { 917 if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
918 dentry = d_find_alias(inode); 918 dentry = d_find_alias(inode);
919 if (!dentry) { 919 if (!dentry) {
920 dentry = d_alloc_root(inode); 920 dentry = d_make_root(inode);
921 if (!dentry) { 921 if (!dentry) {
922 iput(inode);
923 ret = -ENOMEM; 922 ret = -ENOMEM;
924 goto failed_dentry; 923 goto failed_dentry;
925 } 924 }
@@ -1059,6 +1058,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1059 sb->s_export_op = &nilfs_export_ops; 1058 sb->s_export_op = &nilfs_export_ops;
1060 sb->s_root = NULL; 1059 sb->s_root = NULL;
1061 sb->s_time_gran = 1; 1060 sb->s_time_gran = 1;
1061 sb->s_max_links = NILFS_LINK_MAX;
1062 1062
1063 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 1063 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
1064 sb->s_bdi = bdi ? : &default_backing_dev_info; 1064 sb->s_bdi = bdi ? : &default_backing_dev_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d32714094375..501b7f8b739f 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -409,6 +409,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); 409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
410 nilfs->ns_r_segments_percentage = 410 nilfs->ns_r_segments_percentage =
411 le32_to_cpu(sbp->s_r_segments_percentage); 411 le32_to_cpu(sbp->s_r_segments_percentage);
412 if (nilfs->ns_r_segments_percentage < 1 ||
413 nilfs->ns_r_segments_percentage > 99) {
414 printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n");
415 return -EINVAL;
416 }
417
412 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); 418 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
413 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); 419 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
414 return 0; 420 return 0;
@@ -515,6 +521,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
515 brelse(sbh[1]); 521 brelse(sbh[1]);
516 sbh[1] = NULL; 522 sbh[1] = NULL;
517 sbp[1] = NULL; 523 sbp[1] = NULL;
524 valid[1] = 0;
518 swp = 0; 525 swp = 0;
519 } 526 }
520 if (!valid[swp]) { 527 if (!valid[swp]) {
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 0b1e885b8cf8..fa9c05f97af4 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -94,11 +94,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
94 if (file_ofs < init_size) 94 if (file_ofs < init_size)
95 ofs = init_size - file_ofs; 95 ofs = init_size - file_ofs;
96 local_irq_save(flags); 96 local_irq_save(flags);
97 kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); 97 kaddr = kmap_atomic(page);
98 memset(kaddr + bh_offset(bh) + ofs, 0, 98 memset(kaddr + bh_offset(bh) + ofs, 0,
99 bh->b_size - ofs); 99 bh->b_size - ofs);
100 flush_dcache_page(page); 100 flush_dcache_page(page);
101 kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); 101 kunmap_atomic(kaddr);
102 local_irq_restore(flags); 102 local_irq_restore(flags);
103 } 103 }
104 } else { 104 } else {
@@ -147,11 +147,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
147 /* Should have been verified before we got here... */ 147 /* Should have been verified before we got here... */
148 BUG_ON(!recs); 148 BUG_ON(!recs);
149 local_irq_save(flags); 149 local_irq_save(flags);
150 kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); 150 kaddr = kmap_atomic(page);
151 for (i = 0; i < recs; i++) 151 for (i = 0; i < recs; i++)
152 post_read_mst_fixup((NTFS_RECORD*)(kaddr + 152 post_read_mst_fixup((NTFS_RECORD*)(kaddr +
153 i * rec_size), rec_size); 153 i * rec_size), rec_size);
154 kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); 154 kunmap_atomic(kaddr);
155 local_irq_restore(flags); 155 local_irq_restore(flags);
156 flush_dcache_page(page); 156 flush_dcache_page(page);
157 if (likely(page_uptodate && !PageError(page))) 157 if (likely(page_uptodate && !PageError(page)))
@@ -504,7 +504,7 @@ retry_readpage:
504 /* Race with shrinking truncate. */ 504 /* Race with shrinking truncate. */
505 attr_len = i_size; 505 attr_len = i_size;
506 } 506 }
507 addr = kmap_atomic(page, KM_USER0); 507 addr = kmap_atomic(page);
508 /* Copy the data to the page. */ 508 /* Copy the data to the page. */
509 memcpy(addr, (u8*)ctx->attr + 509 memcpy(addr, (u8*)ctx->attr +
510 le16_to_cpu(ctx->attr->data.resident.value_offset), 510 le16_to_cpu(ctx->attr->data.resident.value_offset),
@@ -512,7 +512,7 @@ retry_readpage:
512 /* Zero the remainder of the page. */ 512 /* Zero the remainder of the page. */
513 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 513 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
514 flush_dcache_page(page); 514 flush_dcache_page(page);
515 kunmap_atomic(addr, KM_USER0); 515 kunmap_atomic(addr);
516put_unm_err_out: 516put_unm_err_out:
517 ntfs_attr_put_search_ctx(ctx); 517 ntfs_attr_put_search_ctx(ctx);
518unm_err_out: 518unm_err_out:
@@ -746,14 +746,14 @@ lock_retry_remap:
746 unsigned long *bpos, *bend; 746 unsigned long *bpos, *bend;
747 747
748 /* Check if the buffer is zero. */ 748 /* Check if the buffer is zero. */
749 kaddr = kmap_atomic(page, KM_USER0); 749 kaddr = kmap_atomic(page);
750 bpos = (unsigned long *)(kaddr + bh_offset(bh)); 750 bpos = (unsigned long *)(kaddr + bh_offset(bh));
751 bend = (unsigned long *)((u8*)bpos + blocksize); 751 bend = (unsigned long *)((u8*)bpos + blocksize);
752 do { 752 do {
753 if (unlikely(*bpos)) 753 if (unlikely(*bpos))
754 break; 754 break;
755 } while (likely(++bpos < bend)); 755 } while (likely(++bpos < bend));
756 kunmap_atomic(kaddr, KM_USER0); 756 kunmap_atomic(kaddr);
757 if (bpos == bend) { 757 if (bpos == bend) {
758 /* 758 /*
759 * Buffer is zero and sparse, no need to write 759 * Buffer is zero and sparse, no need to write
@@ -1495,14 +1495,14 @@ retry_writepage:
1495 /* Shrinking cannot fail. */ 1495 /* Shrinking cannot fail. */
1496 BUG_ON(err); 1496 BUG_ON(err);
1497 } 1497 }
1498 addr = kmap_atomic(page, KM_USER0); 1498 addr = kmap_atomic(page);
1499 /* Copy the data from the page to the mft record. */ 1499 /* Copy the data from the page to the mft record. */
1500 memcpy((u8*)ctx->attr + 1500 memcpy((u8*)ctx->attr +
1501 le16_to_cpu(ctx->attr->data.resident.value_offset), 1501 le16_to_cpu(ctx->attr->data.resident.value_offset),
1502 addr, attr_len); 1502 addr, attr_len);
1503 /* Zero out of bounds area in the page cache page. */ 1503 /* Zero out of bounds area in the page cache page. */
1504 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 1504 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1505 kunmap_atomic(addr, KM_USER0); 1505 kunmap_atomic(addr);
1506 flush_dcache_page(page); 1506 flush_dcache_page(page);
1507 flush_dcache_mft_record_page(ctx->ntfs_ino); 1507 flush_dcache_mft_record_page(ctx->ntfs_ino);
1508 /* We are done with the page. */ 1508 /* We are done with the page. */
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index f14fde2b03d6..a27e3fecefaf 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. 2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2007 Anton Altaparmakov 4 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -345,10 +345,10 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
345 unsigned long flags; 345 unsigned long flags;
346 bool is_retry = false; 346 bool is_retry = false;
347 347
348 BUG_ON(!ni);
348 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", 349 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
349 ni->mft_no, (unsigned long long)vcn, 350 ni->mft_no, (unsigned long long)vcn,
350 write_locked ? "write" : "read"); 351 write_locked ? "write" : "read");
351 BUG_ON(!ni);
352 BUG_ON(!NInoNonResident(ni)); 352 BUG_ON(!NInoNonResident(ni));
353 BUG_ON(vcn < 0); 353 BUG_ON(vcn < 0);
354 if (!ni->runlist.rl) { 354 if (!ni->runlist.rl) {
@@ -469,9 +469,9 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
469 int err = 0; 469 int err = 0;
470 bool is_retry = false; 470 bool is_retry = false;
471 471
472 BUG_ON(!ni);
472 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.", 473 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
473 ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out"); 474 ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
474 BUG_ON(!ni);
475 BUG_ON(!NInoNonResident(ni)); 475 BUG_ON(!NInoNonResident(ni));
476 BUG_ON(vcn < 0); 476 BUG_ON(vcn < 0);
477 if (!ni->runlist.rl) { 477 if (!ni->runlist.rl) {
@@ -1656,12 +1656,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
1656 attr_size = le32_to_cpu(a->data.resident.value_length); 1656 attr_size = le32_to_cpu(a->data.resident.value_length);
1657 BUG_ON(attr_size != data_size); 1657 BUG_ON(attr_size != data_size);
1658 if (page && !PageUptodate(page)) { 1658 if (page && !PageUptodate(page)) {
1659 kaddr = kmap_atomic(page, KM_USER0); 1659 kaddr = kmap_atomic(page);
1660 memcpy(kaddr, (u8*)a + 1660 memcpy(kaddr, (u8*)a +
1661 le16_to_cpu(a->data.resident.value_offset), 1661 le16_to_cpu(a->data.resident.value_offset),
1662 attr_size); 1662 attr_size);
1663 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size); 1663 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
1664 kunmap_atomic(kaddr, KM_USER0); 1664 kunmap_atomic(kaddr);
1665 flush_dcache_page(page); 1665 flush_dcache_page(page);
1666 SetPageUptodate(page); 1666 SetPageUptodate(page);
1667 } 1667 }
@@ -1806,9 +1806,9 @@ undo_err_out:
1806 sizeof(a->data.resident.reserved)); 1806 sizeof(a->data.resident.reserved));
1807 /* Copy the data from the page back to the attribute value. */ 1807 /* Copy the data from the page back to the attribute value. */
1808 if (page) { 1808 if (page) {
1809 kaddr = kmap_atomic(page, KM_USER0); 1809 kaddr = kmap_atomic(page);
1810 memcpy((u8*)a + mp_ofs, kaddr, attr_size); 1810 memcpy((u8*)a + mp_ofs, kaddr, attr_size);
1811 kunmap_atomic(kaddr, KM_USER0); 1811 kunmap_atomic(kaddr);
1812 } 1812 }
1813 /* Setup the allocated size in the ntfs inode in case it changed. */ 1813 /* Setup the allocated size in the ntfs inode in case it changed. */
1814 write_lock_irqsave(&ni->size_lock, flags); 1814 write_lock_irqsave(&ni->size_lock, flags);
@@ -2540,10 +2540,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2540 size = PAGE_CACHE_SIZE; 2540 size = PAGE_CACHE_SIZE;
2541 if (idx == end) 2541 if (idx == end)
2542 size = end_ofs; 2542 size = end_ofs;
2543 kaddr = kmap_atomic(page, KM_USER0); 2543 kaddr = kmap_atomic(page);
2544 memset(kaddr + start_ofs, val, size - start_ofs); 2544 memset(kaddr + start_ofs, val, size - start_ofs);
2545 flush_dcache_page(page); 2545 flush_dcache_page(page);
2546 kunmap_atomic(kaddr, KM_USER0); 2546 kunmap_atomic(kaddr);
2547 set_page_dirty(page); 2547 set_page_dirty(page);
2548 page_cache_release(page); 2548 page_cache_release(page);
2549 balance_dirty_pages_ratelimited(mapping); 2549 balance_dirty_pages_ratelimited(mapping);
@@ -2561,10 +2561,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2561 "page (index 0x%lx).", idx); 2561 "page (index 0x%lx).", idx);
2562 return -ENOMEM; 2562 return -ENOMEM;
2563 } 2563 }
2564 kaddr = kmap_atomic(page, KM_USER0); 2564 kaddr = kmap_atomic(page);
2565 memset(kaddr, val, PAGE_CACHE_SIZE); 2565 memset(kaddr, val, PAGE_CACHE_SIZE);
2566 flush_dcache_page(page); 2566 flush_dcache_page(page);
2567 kunmap_atomic(kaddr, KM_USER0); 2567 kunmap_atomic(kaddr);
2568 /* 2568 /*
2569 * If the page has buffers, mark them uptodate since buffer 2569 * If the page has buffers, mark them uptodate since buffer
2570 * state and not page state is definitive in 2.6 kernels. 2570 * state and not page state is definitive in 2.6 kernels.
@@ -2598,10 +2598,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2598 "(error, index 0x%lx).", idx); 2598 "(error, index 0x%lx).", idx);
2599 return PTR_ERR(page); 2599 return PTR_ERR(page);
2600 } 2600 }
2601 kaddr = kmap_atomic(page, KM_USER0); 2601 kaddr = kmap_atomic(page);
2602 memset(kaddr, val, end_ofs); 2602 memset(kaddr, val, end_ofs);
2603 flush_dcache_page(page); 2603 flush_dcache_page(page);
2604 kunmap_atomic(kaddr, KM_USER0); 2604 kunmap_atomic(kaddr);
2605 set_page_dirty(page); 2605 set_page_dirty(page);
2606 page_cache_release(page); 2606 page_cache_release(page);
2607 balance_dirty_pages_ratelimited(mapping); 2607 balance_dirty_pages_ratelimited(mapping);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c587e2d27183..8639169221c7 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -704,7 +704,7 @@ map_buffer_cached:
704 u8 *kaddr; 704 u8 *kaddr;
705 unsigned pofs; 705 unsigned pofs;
706 706
707 kaddr = kmap_atomic(page, KM_USER0); 707 kaddr = kmap_atomic(page);
708 if (bh_pos < pos) { 708 if (bh_pos < pos) {
709 pofs = bh_pos & ~PAGE_CACHE_MASK; 709 pofs = bh_pos & ~PAGE_CACHE_MASK;
710 memset(kaddr + pofs, 0, pos - bh_pos); 710 memset(kaddr + pofs, 0, pos - bh_pos);
@@ -713,7 +713,7 @@ map_buffer_cached:
713 pofs = end & ~PAGE_CACHE_MASK; 713 pofs = end & ~PAGE_CACHE_MASK;
714 memset(kaddr + pofs, 0, bh_end - end); 714 memset(kaddr + pofs, 0, bh_end - end);
715 } 715 }
716 kunmap_atomic(kaddr, KM_USER0); 716 kunmap_atomic(kaddr);
717 flush_dcache_page(page); 717 flush_dcache_page(page);
718 } 718 }
719 continue; 719 continue;
@@ -1287,9 +1287,9 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
1287 len = PAGE_CACHE_SIZE - ofs; 1287 len = PAGE_CACHE_SIZE - ofs;
1288 if (len > bytes) 1288 if (len > bytes)
1289 len = bytes; 1289 len = bytes;
1290 addr = kmap_atomic(*pages, KM_USER0); 1290 addr = kmap_atomic(*pages);
1291 left = __copy_from_user_inatomic(addr + ofs, buf, len); 1291 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1292 kunmap_atomic(addr, KM_USER0); 1292 kunmap_atomic(addr);
1293 if (unlikely(left)) { 1293 if (unlikely(left)) {
1294 /* Do it the slow way. */ 1294 /* Do it the slow way. */
1295 addr = kmap(*pages); 1295 addr = kmap(*pages);
@@ -1401,10 +1401,10 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1401 len = PAGE_CACHE_SIZE - ofs; 1401 len = PAGE_CACHE_SIZE - ofs;
1402 if (len > bytes) 1402 if (len > bytes)
1403 len = bytes; 1403 len = bytes;
1404 addr = kmap_atomic(*pages, KM_USER0); 1404 addr = kmap_atomic(*pages);
1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, 1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1406 *iov, *iov_ofs, len); 1406 *iov, *iov_ofs, len);
1407 kunmap_atomic(addr, KM_USER0); 1407 kunmap_atomic(addr);
1408 if (unlikely(copied != len)) { 1408 if (unlikely(copied != len)) {
1409 /* Do it the slow way. */ 1409 /* Do it the slow way. */
1410 addr = kmap(*pages); 1410 addr = kmap(*pages);
@@ -1691,7 +1691,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
1691 BUG_ON(end > le32_to_cpu(a->length) - 1691 BUG_ON(end > le32_to_cpu(a->length) -
1692 le16_to_cpu(a->data.resident.value_offset)); 1692 le16_to_cpu(a->data.resident.value_offset));
1693 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 1693 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
1694 kaddr = kmap_atomic(page, KM_USER0); 1694 kaddr = kmap_atomic(page);
1695 /* Copy the received data from the page to the mft record. */ 1695 /* Copy the received data from the page to the mft record. */
1696 memcpy(kattr + pos, kaddr + pos, bytes); 1696 memcpy(kattr + pos, kaddr + pos, bytes);
1697 /* Update the attribute length if necessary. */ 1697 /* Update the attribute length if necessary. */
@@ -1713,7 +1713,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
1713 flush_dcache_page(page); 1713 flush_dcache_page(page);
1714 SetPageUptodate(page); 1714 SetPageUptodate(page);
1715 } 1715 }
1716 kunmap_atomic(kaddr, KM_USER0); 1716 kunmap_atomic(kaddr);
1717 /* Update initialized_size/i_size if necessary. */ 1717 /* Update initialized_size/i_size if necessary. */
1718 read_lock_irqsave(&ni->size_lock, flags); 1718 read_lock_irqsave(&ni->size_lock, flags);
1719 initialized_size = ni->initialized_size; 1719 initialized_size = ni->initialized_size;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index faece7190866..809c0e6d8e09 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2008,14 +2008,14 @@ typedef struct {
2008 * 2008 *
2009 * When a directory is small enough to fit inside the index root then this 2009 * When a directory is small enough to fit inside the index root then this
2010 * is the only attribute describing the directory. When the directory is too 2010 * is the only attribute describing the directory. When the directory is too
2011 * large to fit in the index root, on the other hand, two aditional attributes 2011 * large to fit in the index root, on the other hand, two additional attributes
2012 * are present: an index allocation attribute, containing sub-nodes of the B+ 2012 * are present: an index allocation attribute, containing sub-nodes of the B+
2013 * directory tree (see below), and a bitmap attribute, describing which virtual 2013 * directory tree (see below), and a bitmap attribute, describing which virtual
2014 * cluster numbers (vcns) in the index allocation attribute are in use by an 2014 * cluster numbers (vcns) in the index allocation attribute are in use by an
2015 * index block. 2015 * index block.
2016 * 2016 *
2017 * NOTE: The root directory (FILE_root) contains an entry for itself. Other 2017 * NOTE: The root directory (FILE_root) contains an entry for itself. Other
2018 * dircetories do not contain entries for themselves, though. 2018 * directories do not contain entries for themselves, though.
2019 */ 2019 */
2020typedef struct { 2020typedef struct {
2021 ATTR_TYPE type; /* Type of the indexed attribute. Is 2021 ATTR_TYPE type; /* Type of the indexed attribute. Is
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 382857f9c7db..3014a36a255b 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1367 ntfs_error(vol->sb, "Failed to merge runlists for mft " 1367 ntfs_error(vol->sb, "Failed to merge runlists for mft "
1368 "bitmap."); 1368 "bitmap.");
1369 if (ntfs_cluster_free_from_rl(vol, rl2)) { 1369 if (ntfs_cluster_free_from_rl(vol, rl2)) {
1370 ntfs_error(vol->sb, "Failed to dealocate " 1370 ntfs_error(vol->sb, "Failed to deallocate "
1371 "allocated cluster.%s", es); 1371 "allocated cluster.%s", es);
1372 NVolSetErrors(vol); 1372 NVolSetErrors(vol);
1373 } 1373 }
@@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1805 ntfs_error(vol->sb, "Failed to merge runlists for mft data " 1805 ntfs_error(vol->sb, "Failed to merge runlists for mft data "
1806 "attribute."); 1806 "attribute.");
1807 if (ntfs_cluster_free_from_rl(vol, rl2)) { 1807 if (ntfs_cluster_free_from_rl(vol, rl2)) {
1808 ntfs_error(vol->sb, "Failed to dealocate clusters " 1808 ntfs_error(vol->sb, "Failed to deallocate clusters "
1809 "from the mft data attribute.%s", es); 1809 "from the mft data attribute.%s", es);
1810 NVolSetErrors(vol); 1810 NVolSetErrors(vol);
1811 } 1811 }
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 5a4a8af5c406..b341492542ca 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. 2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2001,2002 Richard Russon 5 * Copyright (c) 2001,2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -1239,7 +1239,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1239{ 1239{
1240 MFT_REF mref; 1240 MFT_REF mref;
1241 struct inode *vi; 1241 struct inode *vi;
1242 ntfs_inode *ni;
1243 struct page *page; 1242 struct page *page;
1244 u32 *kaddr, *kend; 1243 u32 *kaddr, *kend;
1245 ntfs_name *name = NULL; 1244 ntfs_name *name = NULL;
@@ -1290,7 +1289,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1290 "is not the system volume.", i_size_read(vi)); 1289 "is not the system volume.", i_size_read(vi));
1291 goto iput_out; 1290 goto iput_out;
1292 } 1291 }
1293 ni = NTFS_I(vi);
1294 page = ntfs_map_page(vi->i_mapping, 0); 1292 page = ntfs_map_page(vi->i_mapping, 0);
1295 if (IS_ERR(page)) { 1293 if (IS_ERR(page)) {
1296 ntfs_error(vol->sb, "Failed to read from hiberfil.sys."); 1294 ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
@@ -2475,7 +2473,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2475 nr_free -= PAGE_CACHE_SIZE * 8; 2473 nr_free -= PAGE_CACHE_SIZE * 8;
2476 continue; 2474 continue;
2477 } 2475 }
2478 kaddr = kmap_atomic(page, KM_USER0); 2476 kaddr = kmap_atomic(page);
2479 /* 2477 /*
2480 * Subtract the number of set bits. If this 2478 * Subtract the number of set bits. If this
2481 * is the last page and it is partial we don't really care as 2479 * is the last page and it is partial we don't really care as
@@ -2485,7 +2483,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2485 */ 2483 */
2486 nr_free -= bitmap_weight(kaddr, 2484 nr_free -= bitmap_weight(kaddr,
2487 PAGE_CACHE_SIZE * BITS_PER_BYTE); 2485 PAGE_CACHE_SIZE * BITS_PER_BYTE);
2488 kunmap_atomic(kaddr, KM_USER0); 2486 kunmap_atomic(kaddr);
2489 page_cache_release(page); 2487 page_cache_release(page);
2490 } 2488 }
2491 ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); 2489 ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
@@ -2546,7 +2544,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2546 nr_free -= PAGE_CACHE_SIZE * 8; 2544 nr_free -= PAGE_CACHE_SIZE * 8;
2547 continue; 2545 continue;
2548 } 2546 }
2549 kaddr = kmap_atomic(page, KM_USER0); 2547 kaddr = kmap_atomic(page);
2550 /* 2548 /*
2551 * Subtract the number of set bits. If this 2549 * Subtract the number of set bits. If this
2552 * is the last page and it is partial we don't really care as 2550 * is the last page and it is partial we don't really care as
@@ -2556,7 +2554,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2556 */ 2554 */
2557 nr_free -= bitmap_weight(kaddr, 2555 nr_free -= bitmap_weight(kaddr,
2558 PAGE_CACHE_SIZE * BITS_PER_BYTE); 2556 PAGE_CACHE_SIZE * BITS_PER_BYTE);
2559 kunmap_atomic(kaddr, KM_USER0); 2557 kunmap_atomic(kaddr);
2560 page_cache_release(page); 2558 page_cache_release(page);
2561 } 2559 }
2562 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", 2560 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
@@ -2910,9 +2908,10 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2910 ntfs_error(sb, "Failed to load system files."); 2908 ntfs_error(sb, "Failed to load system files.");
2911 goto unl_upcase_iput_tmp_ino_err_out_now; 2909 goto unl_upcase_iput_tmp_ino_err_out_now;
2912 } 2910 }
2913 if ((sb->s_root = d_alloc_root(vol->root_ino))) { 2911
2914 /* We grab a reference, simulating an ntfs_iget(). */ 2912 /* We grab a reference, simulating an ntfs_iget(). */
2915 ihold(vol->root_ino); 2913 ihold(vol->root_ino);
2914 if ((sb->s_root = d_make_root(vol->root_ino))) {
2916 ntfs_debug("Exiting, status successful."); 2915 ntfs_debug("Exiting, status successful.");
2917 /* Release the default upcase if it has no users. */ 2916 /* Release the default upcase if it has no users. */
2918 mutex_lock(&ntfs_lock); 2917 mutex_lock(&ntfs_lock);
@@ -3160,6 +3159,8 @@ static int __init init_ntfs_fs(void)
3160 } 3159 }
3161 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); 3160 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
3162 3161
3162 /* Unregister the ntfs sysctls. */
3163 ntfs_sysctl(0);
3163sysctl_err_out: 3164sysctl_err_out:
3164 kmem_cache_destroy(ntfs_big_inode_cache); 3165 kmem_cache_destroy(ntfs_big_inode_cache);
3165big_inode_err_out: 3166big_inode_err_out:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 78b68af3b0e3..657743254eb9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -102,7 +102,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
102 * copy, the data is still good. */ 102 * copy, the data is still good. */
103 if (buffer_jbd(buffer_cache_bh) 103 if (buffer_jbd(buffer_cache_bh)
104 && ocfs2_inode_is_new(inode)) { 104 && ocfs2_inode_is_new(inode)) {
105 kaddr = kmap_atomic(bh_result->b_page, KM_USER0); 105 kaddr = kmap_atomic(bh_result->b_page);
106 if (!kaddr) { 106 if (!kaddr) {
107 mlog(ML_ERROR, "couldn't kmap!\n"); 107 mlog(ML_ERROR, "couldn't kmap!\n");
108 goto bail; 108 goto bail;
@@ -110,7 +110,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
110 memcpy(kaddr + (bh_result->b_size * iblock), 110 memcpy(kaddr + (bh_result->b_size * iblock),
111 buffer_cache_bh->b_data, 111 buffer_cache_bh->b_data,
112 bh_result->b_size); 112 bh_result->b_size);
113 kunmap_atomic(kaddr, KM_USER0); 113 kunmap_atomic(kaddr);
114 set_buffer_uptodate(bh_result); 114 set_buffer_uptodate(bh_result);
115 } 115 }
116 brelse(buffer_cache_bh); 116 brelse(buffer_cache_bh);
@@ -236,13 +236,13 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
236 return -EROFS; 236 return -EROFS;
237 } 237 }
238 238
239 kaddr = kmap_atomic(page, KM_USER0); 239 kaddr = kmap_atomic(page);
240 if (size) 240 if (size)
241 memcpy(kaddr, di->id2.i_data.id_data, size); 241 memcpy(kaddr, di->id2.i_data.id_data, size);
242 /* Clear the remaining part of the page */ 242 /* Clear the remaining part of the page */
243 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size); 243 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
244 flush_dcache_page(page); 244 flush_dcache_page(page);
245 kunmap_atomic(kaddr, KM_USER0); 245 kunmap_atomic(kaddr);
246 246
247 SetPageUptodate(page); 247 SetPageUptodate(page);
248 248
@@ -689,7 +689,7 @@ static void ocfs2_clear_page_regions(struct page *page,
689 689
690 ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); 690 ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);
691 691
692 kaddr = kmap_atomic(page, KM_USER0); 692 kaddr = kmap_atomic(page);
693 693
694 if (from || to) { 694 if (from || to) {
695 if (from > cluster_start) 695 if (from > cluster_start)
@@ -700,7 +700,7 @@ static void ocfs2_clear_page_regions(struct page *page,
700 memset(kaddr + cluster_start, 0, cluster_end - cluster_start); 700 memset(kaddr + cluster_start, 0, cluster_end - cluster_start);
701 } 701 }
702 702
703 kunmap_atomic(kaddr, KM_USER0); 703 kunmap_atomic(kaddr);
704} 704}
705 705
706/* 706/*
@@ -1981,9 +1981,9 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1981 } 1981 }
1982 } 1982 }
1983 1983
1984 kaddr = kmap_atomic(wc->w_target_page, KM_USER0); 1984 kaddr = kmap_atomic(wc->w_target_page);
1985 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); 1985 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1986 kunmap_atomic(kaddr, KM_USER0); 1986 kunmap_atomic(kaddr);
1987 1987
1988 trace_ocfs2_write_end_inline( 1988 trace_ocfs2_write_end_inline(
1989 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1989 (unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index abfac0d7ae9c..3b5825ef3193 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -582,24 +582,14 @@ static int dlmfs_fill_super(struct super_block * sb,
582 void * data, 582 void * data,
583 int silent) 583 int silent)
584{ 584{
585 struct inode * inode;
586 struct dentry * root;
587
588 sb->s_maxbytes = MAX_LFS_FILESIZE; 585 sb->s_maxbytes = MAX_LFS_FILESIZE;
589 sb->s_blocksize = PAGE_CACHE_SIZE; 586 sb->s_blocksize = PAGE_CACHE_SIZE;
590 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 587 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
591 sb->s_magic = DLMFS_MAGIC; 588 sb->s_magic = DLMFS_MAGIC;
592 sb->s_op = &dlmfs_ops; 589 sb->s_op = &dlmfs_ops;
593 inode = dlmfs_get_root_inode(sb); 590 sb->s_root = d_make_root(dlmfs_get_root_inode(sb));
594 if (!inode) 591 if (!sb->s_root)
595 return -ENOMEM;
596
597 root = d_alloc_root(inode);
598 if (!root) {
599 iput(inode);
600 return -ENOMEM; 592 return -ENOMEM;
601 }
602 sb->s_root = root;
603 return 0; 593 return 0;
604} 594}
605 595
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index be244692550d..a9856e3eaaf0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1053,7 +1053,7 @@ static int ocfs2_rename(struct inode *old_dir,
1053 handle_t *handle = NULL; 1053 handle_t *handle = NULL;
1054 struct buffer_head *old_dir_bh = NULL; 1054 struct buffer_head *old_dir_bh = NULL;
1055 struct buffer_head *new_dir_bh = NULL; 1055 struct buffer_head *new_dir_bh = NULL;
1056 nlink_t old_dir_nlink = old_dir->i_nlink; 1056 u32 old_dir_nlink = old_dir->i_nlink;
1057 struct ocfs2_dinode *old_di; 1057 struct ocfs2_dinode *old_di;
1058 struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, }; 1058 struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
1059 struct ocfs2_dir_lookup_result target_lookup_res = { NULL, }; 1059 struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 604e12c4e979..68f4541c2db9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1154,19 +1154,19 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1154 } 1154 }
1155 1155
1156 status = ocfs2_mount_volume(sb); 1156 status = ocfs2_mount_volume(sb);
1157 if (osb->root_inode)
1158 inode = igrab(osb->root_inode);
1159
1160 if (status < 0) 1157 if (status < 0)
1161 goto read_super_error; 1158 goto read_super_error;
1162 1159
1160 if (osb->root_inode)
1161 inode = igrab(osb->root_inode);
1162
1163 if (!inode) { 1163 if (!inode) {
1164 status = -EIO; 1164 status = -EIO;
1165 mlog_errno(status); 1165 mlog_errno(status);
1166 goto read_super_error; 1166 goto read_super_error;
1167 } 1167 }
1168 1168
1169 root = d_alloc_root(inode); 1169 root = d_make_root(inode);
1170 if (!root) { 1170 if (!root) {
1171 status = -ENOMEM; 1171 status = -ENOMEM;
1172 mlog_errno(status); 1172 mlog_errno(status);
@@ -1220,9 +1220,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1220read_super_error: 1220read_super_error:
1221 brelse(bh); 1221 brelse(bh);
1222 1222
1223 if (inode)
1224 iput(inode);
1225
1226 if (osb) { 1223 if (osb) {
1227 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1224 atomic_set(&osb->vol_state, VOLUME_DISABLED);
1228 wake_up(&osb->osb_mount_event); 1225 wake_up(&osb->osb_mount_event);
@@ -1627,21 +1624,17 @@ static int __init ocfs2_init(void)
1627 init_waitqueue_head(&ocfs2__ioend_wq[i]); 1624 init_waitqueue_head(&ocfs2__ioend_wq[i]);
1628 1625
1629 status = init_ocfs2_uptodate_cache(); 1626 status = init_ocfs2_uptodate_cache();
1630 if (status < 0) { 1627 if (status < 0)
1631 mlog_errno(status); 1628 goto out1;
1632 goto leave;
1633 }
1634 1629
1635 status = ocfs2_initialize_mem_caches(); 1630 status = ocfs2_initialize_mem_caches();
1636 if (status < 0) { 1631 if (status < 0)
1637 mlog_errno(status); 1632 goto out2;
1638 goto leave;
1639 }
1640 1633
1641 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq"); 1634 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
1642 if (!ocfs2_wq) { 1635 if (!ocfs2_wq) {
1643 status = -ENOMEM; 1636 status = -ENOMEM;
1644 goto leave; 1637 goto out3;
1645 } 1638 }
1646 1639
1647 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 1640 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
@@ -1653,17 +1646,23 @@ static int __init ocfs2_init(void)
1653 ocfs2_set_locking_protocol(); 1646 ocfs2_set_locking_protocol();
1654 1647
1655 status = register_quota_format(&ocfs2_quota_format); 1648 status = register_quota_format(&ocfs2_quota_format);
1656leave: 1649 if (status < 0)
1657 if (status < 0) { 1650 goto out4;
1658 ocfs2_free_mem_caches(); 1651 status = register_filesystem(&ocfs2_fs_type);
1659 exit_ocfs2_uptodate_cache(); 1652 if (!status)
1660 mlog_errno(status); 1653 return 0;
1661 }
1662 1654
1663 if (status >= 0) { 1655 unregister_quota_format(&ocfs2_quota_format);
1664 return register_filesystem(&ocfs2_fs_type); 1656out4:
1665 } else 1657 destroy_workqueue(ocfs2_wq);
1666 return -1; 1658 debugfs_remove(ocfs2_debugfs_root);
1659out3:
1660 ocfs2_free_mem_caches();
1661out2:
1662 exit_ocfs2_uptodate_cache();
1663out1:
1664 mlog_errno(status);
1665 return status;
1667} 1666}
1668 1667
1669static void __exit ocfs2_exit(void) 1668static void __exit ocfs2_exit(void)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 6065bb0ba207..dbc842222589 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -539,11 +539,9 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
539 goto out_brelse_bh2; 539 goto out_brelse_bh2;
540 } 540 }
541 541
542 sb->s_root = d_alloc_root(root); 542 sb->s_root = d_make_root(root);
543 if (!sb->s_root) { 543 if (!sb->s_root)
544 iput(root);
545 goto out_brelse_bh2; 544 goto out_brelse_bh2;
546 }
547 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); 545 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
548 546
549 ret = 0; 547 ret = 0;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a88c03bc749d..bc49c975d501 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -408,13 +408,12 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
408 oi->type = op_inode_node; 408 oi->type = op_inode_node;
409 oi->u.node = of_find_node_by_path("/"); 409 oi->u.node = of_find_node_by_path("/");
410 410
411 s->s_root = d_alloc_root(root_inode); 411 s->s_root = d_make_root(root_inode);
412 if (!s->s_root) 412 if (!s->s_root)
413 goto out_no_root_dentry; 413 goto out_no_root_dentry;
414 return 0; 414 return 0;
415 415
416out_no_root_dentry: 416out_no_root_dentry:
417 iput(root_inode);
418 ret = -ENOMEM; 417 ret = -ENOMEM;
419out_no_root: 418out_no_root:
420 printk("openprom_fill_super: get root inode failed\n"); 419 printk("openprom_fill_super: get root inode failed\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index a932ced92a16..fe0502f9beb2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -230,7 +230,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
230{ 230{
231 if (atomic) { 231 if (atomic) {
232 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 232 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
233 return kmap_atomic(buf->page, KM_USER0); 233 return kmap_atomic(buf->page);
234 } 234 }
235 235
236 return kmap(buf->page); 236 return kmap(buf->page);
@@ -251,7 +251,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
251{ 251{
252 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 252 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
253 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 253 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
254 kunmap_atomic(map_data, KM_USER0); 254 kunmap_atomic(map_data);
255 } else 255 } else
256 kunmap(buf->page); 256 kunmap(buf->page);
257} 257}
@@ -565,14 +565,14 @@ redo1:
565 iov_fault_in_pages_read(iov, chars); 565 iov_fault_in_pages_read(iov, chars);
566redo2: 566redo2:
567 if (atomic) 567 if (atomic)
568 src = kmap_atomic(page, KM_USER0); 568 src = kmap_atomic(page);
569 else 569 else
570 src = kmap(page); 570 src = kmap(page);
571 571
572 error = pipe_iov_copy_from_user(src, iov, chars, 572 error = pipe_iov_copy_from_user(src, iov, chars,
573 atomic); 573 atomic);
574 if (atomic) 574 if (atomic)
575 kunmap_atomic(src, KM_USER0); 575 kunmap_atomic(src);
576 else 576 else
577 kunmap(page); 577 kunmap(page);
578 578
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d4548dd49b02..3b42c1418f31 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
1310 if (!p) 1310 if (!p)
1311 return -ESRCH; 1311 return -ESRCH;
1312 1312
1313 err = nice; 1313 err = proc_sched_autogroup_set_nice(p, nice);
1314 err = proc_sched_autogroup_set_nice(p, &err);
1315 if (err) 1314 if (err)
1316 count = err; 1315 count = err;
1317 1316
@@ -2990,9 +2989,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2990 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2989 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2991 ONE("stat", S_IRUGO, proc_tgid_stat), 2990 ONE("stat", S_IRUGO, proc_tgid_stat),
2992 ONE("statm", S_IRUGO, proc_pid_statm), 2991 ONE("statm", S_IRUGO, proc_pid_statm),
2993 REG("maps", S_IRUGO, proc_maps_operations), 2992 REG("maps", S_IRUGO, proc_pid_maps_operations),
2994#ifdef CONFIG_NUMA 2993#ifdef CONFIG_NUMA
2995 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2994 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
2996#endif 2995#endif
2997 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2996 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
2998 LNK("cwd", proc_cwd_link), 2997 LNK("cwd", proc_cwd_link),
@@ -3003,7 +3002,7 @@ static const struct pid_entry tgid_base_stuff[] = {
3003 REG("mountstats", S_IRUSR, proc_mountstats_operations), 3002 REG("mountstats", S_IRUSR, proc_mountstats_operations),
3004#ifdef CONFIG_PROC_PAGE_MONITOR 3003#ifdef CONFIG_PROC_PAGE_MONITOR
3005 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3004 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3006 REG("smaps", S_IRUGO, proc_smaps_operations), 3005 REG("smaps", S_IRUGO, proc_pid_smaps_operations),
3007 REG("pagemap", S_IRUGO, proc_pagemap_operations), 3006 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3008#endif 3007#endif
3009#ifdef CONFIG_SECURITY 3008#ifdef CONFIG_SECURITY
@@ -3349,9 +3348,9 @@ static const struct pid_entry tid_base_stuff[] = {
3349 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3348 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3350 ONE("stat", S_IRUGO, proc_tid_stat), 3349 ONE("stat", S_IRUGO, proc_tid_stat),
3351 ONE("statm", S_IRUGO, proc_pid_statm), 3350 ONE("statm", S_IRUGO, proc_pid_statm),
3352 REG("maps", S_IRUGO, proc_maps_operations), 3351 REG("maps", S_IRUGO, proc_tid_maps_operations),
3353#ifdef CONFIG_NUMA 3352#ifdef CONFIG_NUMA
3354 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 3353 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
3355#endif 3354#endif
3356 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3355 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
3357 LNK("cwd", proc_cwd_link), 3356 LNK("cwd", proc_cwd_link),
@@ -3361,7 +3360,7 @@ static const struct pid_entry tid_base_stuff[] = {
3361 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3360 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3362#ifdef CONFIG_PROC_PAGE_MONITOR 3361#ifdef CONFIG_PROC_PAGE_MONITOR
3363 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3362 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3364 REG("smaps", S_IRUGO, proc_smaps_operations), 3363 REG("smaps", S_IRUGO, proc_tid_smaps_operations),
3365 REG("pagemap", S_IRUGO, proc_pagemap_operations), 3364 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3366#endif 3365#endif
3367#ifdef CONFIG_SECURITY 3366#ifdef CONFIG_SECURITY
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 84fd3235a590..8461a7b82fdb 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -486,8 +486,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
486 486
487int proc_fill_super(struct super_block *s) 487int proc_fill_super(struct super_block *s)
488{ 488{
489 struct inode * root_inode;
490
491 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; 489 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
492 s->s_blocksize = 1024; 490 s->s_blocksize = 1024;
493 s->s_blocksize_bits = 10; 491 s->s_blocksize_bits = 10;
@@ -496,19 +494,11 @@ int proc_fill_super(struct super_block *s)
496 s->s_time_gran = 1; 494 s->s_time_gran = 1;
497 495
498 pde_get(&proc_root); 496 pde_get(&proc_root);
499 root_inode = proc_get_inode(s, &proc_root); 497 s->s_root = d_make_root(proc_get_inode(s, &proc_root));
500 if (!root_inode) 498 if (s->s_root)
501 goto out_no_root; 499 return 0;
502 root_inode->i_uid = 0;
503 root_inode->i_gid = 0;
504 s->s_root = d_alloc_root(root_inode);
505 if (!s->s_root)
506 goto out_no_root;
507 return 0;
508 500
509out_no_root:
510 printk("proc_read_super: get root inode failed\n"); 501 printk("proc_read_super: get root inode failed\n");
511 iput(root_inode);
512 pde_put(&proc_root); 502 pde_put(&proc_root);
513 return -ENOMEM; 503 return -ENOMEM;
514} 504}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 292577531ad1..c44efe19798f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -53,9 +53,12 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
53 struct pid *pid, struct task_struct *task); 53 struct pid *pid, struct task_struct *task);
54extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); 54extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
55 55
56extern const struct file_operations proc_maps_operations; 56extern const struct file_operations proc_pid_maps_operations;
57extern const struct file_operations proc_numa_maps_operations; 57extern const struct file_operations proc_tid_maps_operations;
58extern const struct file_operations proc_smaps_operations; 58extern const struct file_operations proc_pid_numa_maps_operations;
59extern const struct file_operations proc_tid_numa_maps_operations;
60extern const struct file_operations proc_pid_smaps_operations;
61extern const struct file_operations proc_tid_smaps_operations;
59extern const struct file_operations proc_clear_refs_operations; 62extern const struct file_operations proc_clear_refs_operations;
60extern const struct file_operations proc_pagemap_operations; 63extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 64extern const struct file_operations proc_net_operations;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d245cb23dd72..e5e69aff6c69 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -513,7 +513,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
513 513
514 n = copy_to_user(buffer, (char *)start, tsz); 514 n = copy_to_user(buffer, (char *)start, tsz);
515 /* 515 /*
516 * We cannot distingush between fault on source 516 * We cannot distinguish between fault on source
517 * and fault on destination. When this happens 517 * and fault on destination. When this happens
518 * we clear too and hope it will trigger the 518 * we clear too and hope it will trigger the
519 * EFAULT again. 519 * EFAULT again.
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 6d8e6a9e93ab..7fcd0d60a968 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -115,6 +115,8 @@ u64 stable_page_flags(struct page *page)
115 u |= 1 << KPF_COMPOUND_TAIL; 115 u |= 1 << KPF_COMPOUND_TAIL;
116 if (PageHuge(page)) 116 if (PageHuge(page))
117 u |= 1 << KPF_HUGE; 117 u |= 1 << KPF_HUGE;
118 else if (PageTransCompound(page))
119 u |= 1 << KPF_THP;
118 120
119 /* 121 /*
120 * Caveats on high order pages: page->_count will only be set 122 * Caveats on high order pages: page->_count will only be set
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index a6b62173d4c3..67bbf6e4e197 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -6,7 +6,9 @@
6#include <linux/poll.h> 6#include <linux/poll.h>
7#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
8#include <linux/security.h> 8#include <linux/security.h>
9#include <linux/sched.h>
9#include <linux/namei.h> 10#include <linux/namei.h>
11#include <linux/mm.h>
10#include "internal.h" 12#include "internal.h"
11 13
12static const struct dentry_operations proc_sys_dentry_operations; 14static const struct dentry_operations proc_sys_dentry_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7dcd2a250495..9694cc283511 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file,
209 return ret; 209 return ret;
210} 210}
211 211
212static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) 212static void
213show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
213{ 214{
214 struct mm_struct *mm = vma->vm_mm; 215 struct mm_struct *mm = vma->vm_mm;
215 struct file *file = vma->vm_file; 216 struct file *file = vma->vm_file;
217 struct proc_maps_private *priv = m->private;
218 struct task_struct *task = priv->task;
216 vm_flags_t flags = vma->vm_flags; 219 vm_flags_t flags = vma->vm_flags;
217 unsigned long ino = 0; 220 unsigned long ino = 0;
218 unsigned long long pgoff = 0; 221 unsigned long long pgoff = 0;
219 unsigned long start, end; 222 unsigned long start, end;
220 dev_t dev = 0; 223 dev_t dev = 0;
221 int len; 224 int len;
225 const char *name = NULL;
222 226
223 if (file) { 227 if (file) {
224 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 228 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
252 if (file) { 256 if (file) {
253 pad_len_spaces(m, len); 257 pad_len_spaces(m, len);
254 seq_path(m, &file->f_path, "\n"); 258 seq_path(m, &file->f_path, "\n");
255 } else { 259 goto done;
256 const char *name = arch_vma_name(vma); 260 }
257 if (!name) { 261
258 if (mm) { 262 name = arch_vma_name(vma);
259 if (vma->vm_start <= mm->brk && 263 if (!name) {
260 vma->vm_end >= mm->start_brk) { 264 pid_t tid;
261 name = "[heap]"; 265
262 } else if (vma->vm_start <= mm->start_stack && 266 if (!mm) {
263 vma->vm_end >= mm->start_stack) { 267 name = "[vdso]";
264 name = "[stack]"; 268 goto done;
265 } 269 }
270
271 if (vma->vm_start <= mm->brk &&
272 vma->vm_end >= mm->start_brk) {
273 name = "[heap]";
274 goto done;
275 }
276
277 tid = vm_is_stack(task, vma, is_pid);
278
279 if (tid != 0) {
280 /*
281 * Thread stack in /proc/PID/task/TID/maps or
282 * the main process stack.
283 */
284 if (!is_pid || (vma->vm_start <= mm->start_stack &&
285 vma->vm_end >= mm->start_stack)) {
286 name = "[stack]";
266 } else { 287 } else {
267 name = "[vdso]"; 288 /* Thread stack in /proc/PID/maps */
289 pad_len_spaces(m, len);
290 seq_printf(m, "[stack:%d]", tid);
268 } 291 }
269 } 292 }
270 if (name) { 293 }
271 pad_len_spaces(m, len); 294
272 seq_puts(m, name); 295done:
273 } 296 if (name) {
297 pad_len_spaces(m, len);
298 seq_puts(m, name);
274 } 299 }
275 seq_putc(m, '\n'); 300 seq_putc(m, '\n');
276} 301}
277 302
278static int show_map(struct seq_file *m, void *v) 303static int show_map(struct seq_file *m, void *v, int is_pid)
279{ 304{
280 struct vm_area_struct *vma = v; 305 struct vm_area_struct *vma = v;
281 struct proc_maps_private *priv = m->private; 306 struct proc_maps_private *priv = m->private;
282 struct task_struct *task = priv->task; 307 struct task_struct *task = priv->task;
283 308
284 show_map_vma(m, vma); 309 show_map_vma(m, vma, is_pid);
285 310
286 if (m->count < m->size) /* vma is copied successfully */ 311 if (m->count < m->size) /* vma is copied successfully */
287 m->version = (vma != get_gate_vma(task->mm)) 312 m->version = (vma != get_gate_vma(task->mm))
@@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v)
289 return 0; 314 return 0;
290} 315}
291 316
317static int show_pid_map(struct seq_file *m, void *v)
318{
319 return show_map(m, v, 1);
320}
321
322static int show_tid_map(struct seq_file *m, void *v)
323{
324 return show_map(m, v, 0);
325}
326
292static const struct seq_operations proc_pid_maps_op = { 327static const struct seq_operations proc_pid_maps_op = {
293 .start = m_start, 328 .start = m_start,
294 .next = m_next, 329 .next = m_next,
295 .stop = m_stop, 330 .stop = m_stop,
296 .show = show_map 331 .show = show_pid_map
297}; 332};
298 333
299static int maps_open(struct inode *inode, struct file *file) 334static const struct seq_operations proc_tid_maps_op = {
335 .start = m_start,
336 .next = m_next,
337 .stop = m_stop,
338 .show = show_tid_map
339};
340
341static int pid_maps_open(struct inode *inode, struct file *file)
300{ 342{
301 return do_maps_open(inode, file, &proc_pid_maps_op); 343 return do_maps_open(inode, file, &proc_pid_maps_op);
302} 344}
303 345
304const struct file_operations proc_maps_operations = { 346static int tid_maps_open(struct inode *inode, struct file *file)
305 .open = maps_open, 347{
348 return do_maps_open(inode, file, &proc_tid_maps_op);
349}
350
351const struct file_operations proc_pid_maps_operations = {
352 .open = pid_maps_open,
353 .read = seq_read,
354 .llseek = seq_lseek,
355 .release = seq_release_private,
356};
357
358const struct file_operations proc_tid_maps_operations = {
359 .open = tid_maps_open,
306 .read = seq_read, 360 .read = seq_read,
307 .llseek = seq_lseek, 361 .llseek = seq_lseek,
308 .release = seq_release_private, 362 .release = seq_release_private,
@@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
394 pte_t *pte; 448 pte_t *pte;
395 spinlock_t *ptl; 449 spinlock_t *ptl;
396 450
397 spin_lock(&walk->mm->page_table_lock); 451 if (pmd_trans_huge_lock(pmd, vma) == 1) {
398 if (pmd_trans_huge(*pmd)) { 452 smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
399 if (pmd_trans_splitting(*pmd)) {
400 spin_unlock(&walk->mm->page_table_lock);
401 wait_split_huge_page(vma->anon_vma, pmd);
402 } else {
403 smaps_pte_entry(*(pte_t *)pmd, addr,
404 HPAGE_PMD_SIZE, walk);
405 spin_unlock(&walk->mm->page_table_lock);
406 mss->anonymous_thp += HPAGE_PMD_SIZE;
407 return 0;
408 }
409 } else {
410 spin_unlock(&walk->mm->page_table_lock); 453 spin_unlock(&walk->mm->page_table_lock);
454 mss->anonymous_thp += HPAGE_PMD_SIZE;
455 return 0;
411 } 456 }
457
458 if (pmd_trans_unstable(pmd))
459 return 0;
412 /* 460 /*
413 * The mmap_sem held all the way back in m_start() is what 461 * The mmap_sem held all the way back in m_start() is what
414 * keeps khugepaged out of here and from collapsing things 462 * keeps khugepaged out of here and from collapsing things
@@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
422 return 0; 470 return 0;
423} 471}
424 472
425static int show_smap(struct seq_file *m, void *v) 473static int show_smap(struct seq_file *m, void *v, int is_pid)
426{ 474{
427 struct proc_maps_private *priv = m->private; 475 struct proc_maps_private *priv = m->private;
428 struct task_struct *task = priv->task; 476 struct task_struct *task = priv->task;
@@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v)
440 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 488 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
441 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); 489 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
442 490
443 show_map_vma(m, vma); 491 show_map_vma(m, vma, is_pid);
444 492
445 seq_printf(m, 493 seq_printf(m,
446 "Size: %8lu kB\n" 494 "Size: %8lu kB\n"
@@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v)
479 return 0; 527 return 0;
480} 528}
481 529
530static int show_pid_smap(struct seq_file *m, void *v)
531{
532 return show_smap(m, v, 1);
533}
534
535static int show_tid_smap(struct seq_file *m, void *v)
536{
537 return show_smap(m, v, 0);
538}
539
482static const struct seq_operations proc_pid_smaps_op = { 540static const struct seq_operations proc_pid_smaps_op = {
483 .start = m_start, 541 .start = m_start,
484 .next = m_next, 542 .next = m_next,
485 .stop = m_stop, 543 .stop = m_stop,
486 .show = show_smap 544 .show = show_pid_smap
545};
546
547static const struct seq_operations proc_tid_smaps_op = {
548 .start = m_start,
549 .next = m_next,
550 .stop = m_stop,
551 .show = show_tid_smap
487}; 552};
488 553
489static int smaps_open(struct inode *inode, struct file *file) 554static int pid_smaps_open(struct inode *inode, struct file *file)
490{ 555{
491 return do_maps_open(inode, file, &proc_pid_smaps_op); 556 return do_maps_open(inode, file, &proc_pid_smaps_op);
492} 557}
493 558
494const struct file_operations proc_smaps_operations = { 559static int tid_smaps_open(struct inode *inode, struct file *file)
495 .open = smaps_open, 560{
561 return do_maps_open(inode, file, &proc_tid_smaps_op);
562}
563
564const struct file_operations proc_pid_smaps_operations = {
565 .open = pid_smaps_open,
566 .read = seq_read,
567 .llseek = seq_lseek,
568 .release = seq_release_private,
569};
570
571const struct file_operations proc_tid_smaps_operations = {
572 .open = tid_smaps_open,
496 .read = seq_read, 573 .read = seq_read,
497 .llseek = seq_lseek, 574 .llseek = seq_lseek,
498 .release = seq_release_private, 575 .release = seq_release_private,
@@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
507 struct page *page; 584 struct page *page;
508 585
509 split_huge_page_pmd(walk->mm, pmd); 586 split_huge_page_pmd(walk->mm, pmd);
587 if (pmd_trans_unstable(pmd))
588 return 0;
510 589
511 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 590 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
512 for (; addr != end; pte++, addr += PAGE_SIZE) { 591 for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -598,11 +677,18 @@ const struct file_operations proc_clear_refs_operations = {
598 .llseek = noop_llseek, 677 .llseek = noop_llseek,
599}; 678};
600 679
680typedef struct {
681 u64 pme;
682} pagemap_entry_t;
683
601struct pagemapread { 684struct pagemapread {
602 int pos, len; 685 int pos, len;
603 u64 *buffer; 686 pagemap_entry_t *buffer;
604}; 687};
605 688
689#define PAGEMAP_WALK_SIZE (PMD_SIZE)
690#define PAGEMAP_WALK_MASK (PMD_MASK)
691
606#define PM_ENTRY_BYTES sizeof(u64) 692#define PM_ENTRY_BYTES sizeof(u64)
607#define PM_STATUS_BITS 3 693#define PM_STATUS_BITS 3
608#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 694#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
@@ -620,10 +706,15 @@ struct pagemapread {
620#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) 706#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
621#define PM_END_OF_BUFFER 1 707#define PM_END_OF_BUFFER 1
622 708
623static int add_to_pagemap(unsigned long addr, u64 pfn, 709static inline pagemap_entry_t make_pme(u64 val)
710{
711 return (pagemap_entry_t) { .pme = val };
712}
713
714static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
624 struct pagemapread *pm) 715 struct pagemapread *pm)
625{ 716{
626 pm->buffer[pm->pos++] = pfn; 717 pm->buffer[pm->pos++] = *pme;
627 if (pm->pos >= pm->len) 718 if (pm->pos >= pm->len)
628 return PM_END_OF_BUFFER; 719 return PM_END_OF_BUFFER;
629 return 0; 720 return 0;
@@ -635,8 +726,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
635 struct pagemapread *pm = walk->private; 726 struct pagemapread *pm = walk->private;
636 unsigned long addr; 727 unsigned long addr;
637 int err = 0; 728 int err = 0;
729 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
730
638 for (addr = start; addr < end; addr += PAGE_SIZE) { 731 for (addr = start; addr < end; addr += PAGE_SIZE) {
639 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); 732 err = add_to_pagemap(addr, &pme, pm);
640 if (err) 733 if (err)
641 break; 734 break;
642 } 735 }
@@ -649,17 +742,35 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
649 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 742 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
650} 743}
651 744
652static u64 pte_to_pagemap_entry(pte_t pte) 745static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
653{ 746{
654 u64 pme = 0;
655 if (is_swap_pte(pte)) 747 if (is_swap_pte(pte))
656 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 748 *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte))
657 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 749 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);
658 else if (pte_present(pte)) 750 else if (pte_present(pte))
659 pme = PM_PFRAME(pte_pfn(pte)) 751 *pme = make_pme(PM_PFRAME(pte_pfn(pte))
660 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 752 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
661 return pme; 753}
754
755#ifdef CONFIG_TRANSPARENT_HUGEPAGE
756static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
757 pmd_t pmd, int offset)
758{
759 /*
760 * Currently pmd for thp is always present because thp can not be
761 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
762 * This if-check is just to prepare for future implementation.
763 */
764 if (pmd_present(pmd))
765 *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
766 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
662} 767}
768#else
769static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
770 pmd_t pmd, int offset)
771{
772}
773#endif
663 774
664static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 775static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
665 struct mm_walk *walk) 776 struct mm_walk *walk)
@@ -668,13 +779,30 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
668 struct pagemapread *pm = walk->private; 779 struct pagemapread *pm = walk->private;
669 pte_t *pte; 780 pte_t *pte;
670 int err = 0; 781 int err = 0;
782 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
671 783
672 split_huge_page_pmd(walk->mm, pmd); 784 if (pmd_trans_unstable(pmd))
785 return 0;
673 786
674 /* find the first VMA at or above 'addr' */ 787 /* find the first VMA at or above 'addr' */
675 vma = find_vma(walk->mm, addr); 788 vma = find_vma(walk->mm, addr);
789 spin_lock(&walk->mm->page_table_lock);
790 if (pmd_trans_huge_lock(pmd, vma) == 1) {
791 for (; addr != end; addr += PAGE_SIZE) {
792 unsigned long offset;
793
794 offset = (addr & ~PAGEMAP_WALK_MASK) >>
795 PAGE_SHIFT;
796 thp_pmd_to_pagemap_entry(&pme, *pmd, offset);
797 err = add_to_pagemap(addr, &pme, pm);
798 if (err)
799 break;
800 }
801 spin_unlock(&walk->mm->page_table_lock);
802 return err;
803 }
804
676 for (; addr != end; addr += PAGE_SIZE) { 805 for (; addr != end; addr += PAGE_SIZE) {
677 u64 pfn = PM_NOT_PRESENT;
678 806
679 /* check to see if we've left 'vma' behind 807 /* check to see if we've left 'vma' behind
680 * and need a new, higher one */ 808 * and need a new, higher one */
@@ -686,11 +814,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
686 if (vma && (vma->vm_start <= addr) && 814 if (vma && (vma->vm_start <= addr) &&
687 !is_vm_hugetlb_page(vma)) { 815 !is_vm_hugetlb_page(vma)) {
688 pte = pte_offset_map(pmd, addr); 816 pte = pte_offset_map(pmd, addr);
689 pfn = pte_to_pagemap_entry(*pte); 817 pte_to_pagemap_entry(&pme, *pte);
690 /* unmap before userspace copy */ 818 /* unmap before userspace copy */
691 pte_unmap(pte); 819 pte_unmap(pte);
692 } 820 }
693 err = add_to_pagemap(addr, pfn, pm); 821 err = add_to_pagemap(addr, &pme, pm);
694 if (err) 822 if (err)
695 return err; 823 return err;
696 } 824 }
@@ -701,13 +829,12 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
701} 829}
702 830
703#ifdef CONFIG_HUGETLB_PAGE 831#ifdef CONFIG_HUGETLB_PAGE
704static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) 832static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
833 pte_t pte, int offset)
705{ 834{
706 u64 pme = 0;
707 if (pte_present(pte)) 835 if (pte_present(pte))
708 pme = PM_PFRAME(pte_pfn(pte) + offset) 836 *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
709 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 837 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
710 return pme;
711} 838}
712 839
713/* This function walks within one hugetlb entry in the single call */ 840/* This function walks within one hugetlb entry in the single call */
@@ -717,12 +844,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
717{ 844{
718 struct pagemapread *pm = walk->private; 845 struct pagemapread *pm = walk->private;
719 int err = 0; 846 int err = 0;
720 u64 pfn; 847 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
721 848
722 for (; addr != end; addr += PAGE_SIZE) { 849 for (; addr != end; addr += PAGE_SIZE) {
723 int offset = (addr & ~hmask) >> PAGE_SHIFT; 850 int offset = (addr & ~hmask) >> PAGE_SHIFT;
724 pfn = huge_pte_to_pagemap_entry(*pte, offset); 851 huge_pte_to_pagemap_entry(&pme, *pte, offset);
725 err = add_to_pagemap(addr, pfn, pm); 852 err = add_to_pagemap(addr, &pme, pm);
726 if (err) 853 if (err)
727 return err; 854 return err;
728 } 855 }
@@ -757,8 +884,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
757 * determine which areas of memory are actually mapped and llseek to 884 * determine which areas of memory are actually mapped and llseek to
758 * skip over unmapped regions. 885 * skip over unmapped regions.
759 */ 886 */
760#define PAGEMAP_WALK_SIZE (PMD_SIZE)
761#define PAGEMAP_WALK_MASK (PMD_MASK)
762static ssize_t pagemap_read(struct file *file, char __user *buf, 887static ssize_t pagemap_read(struct file *file, char __user *buf,
763 size_t count, loff_t *ppos) 888 size_t count, loff_t *ppos)
764{ 889{
@@ -941,26 +1066,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
941 pte_t *pte; 1066 pte_t *pte;
942 1067
943 md = walk->private; 1068 md = walk->private;
944 spin_lock(&walk->mm->page_table_lock); 1069
945 if (pmd_trans_huge(*pmd)) { 1070 if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
946 if (pmd_trans_splitting(*pmd)) { 1071 pte_t huge_pte = *(pte_t *)pmd;
947 spin_unlock(&walk->mm->page_table_lock); 1072 struct page *page;
948 wait_split_huge_page(md->vma->anon_vma, pmd); 1073
949 } else { 1074 page = can_gather_numa_stats(huge_pte, md->vma, addr);
950 pte_t huge_pte = *(pte_t *)pmd; 1075 if (page)
951 struct page *page; 1076 gather_stats(page, md, pte_dirty(huge_pte),
952 1077 HPAGE_PMD_SIZE/PAGE_SIZE);
953 page = can_gather_numa_stats(huge_pte, md->vma, addr);
954 if (page)
955 gather_stats(page, md, pte_dirty(huge_pte),
956 HPAGE_PMD_SIZE/PAGE_SIZE);
957 spin_unlock(&walk->mm->page_table_lock);
958 return 0;
959 }
960 } else {
961 spin_unlock(&walk->mm->page_table_lock); 1078 spin_unlock(&walk->mm->page_table_lock);
1079 return 0;
962 } 1080 }
963 1081
1082 if (pmd_trans_unstable(pmd))
1083 return 0;
964 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 1084 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
965 do { 1085 do {
966 struct page *page = can_gather_numa_stats(*pte, md->vma, addr); 1086 struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
@@ -1002,7 +1122,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
1002/* 1122/*
1003 * Display pages allocated per node and memory policy via /proc. 1123 * Display pages allocated per node and memory policy via /proc.
1004 */ 1124 */
1005static int show_numa_map(struct seq_file *m, void *v) 1125static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1006{ 1126{
1007 struct numa_maps_private *numa_priv = m->private; 1127 struct numa_maps_private *numa_priv = m->private;
1008 struct proc_maps_private *proc_priv = &numa_priv->proc_maps; 1128 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
@@ -1039,9 +1159,19 @@ static int show_numa_map(struct seq_file *m, void *v)
1039 seq_path(m, &file->f_path, "\n\t= "); 1159 seq_path(m, &file->f_path, "\n\t= ");
1040 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1160 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1041 seq_printf(m, " heap"); 1161 seq_printf(m, " heap");
1042 } else if (vma->vm_start <= mm->start_stack && 1162 } else {
1043 vma->vm_end >= mm->start_stack) { 1163 pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
1044 seq_printf(m, " stack"); 1164 if (tid != 0) {
1165 /*
1166 * Thread stack in /proc/PID/task/TID/maps or
1167 * the main process stack.
1168 */
1169 if (!is_pid || (vma->vm_start <= mm->start_stack &&
1170 vma->vm_end >= mm->start_stack))
1171 seq_printf(m, " stack");
1172 else
1173 seq_printf(m, " stack:%d", tid);
1174 }
1045 } 1175 }
1046 1176
1047 if (is_vm_hugetlb_page(vma)) 1177 if (is_vm_hugetlb_page(vma))
@@ -1084,21 +1214,39 @@ out:
1084 return 0; 1214 return 0;
1085} 1215}
1086 1216
1217static int show_pid_numa_map(struct seq_file *m, void *v)
1218{
1219 return show_numa_map(m, v, 1);
1220}
1221
1222static int show_tid_numa_map(struct seq_file *m, void *v)
1223{
1224 return show_numa_map(m, v, 0);
1225}
1226
1087static const struct seq_operations proc_pid_numa_maps_op = { 1227static const struct seq_operations proc_pid_numa_maps_op = {
1088 .start = m_start, 1228 .start = m_start,
1089 .next = m_next, 1229 .next = m_next,
1090 .stop = m_stop, 1230 .stop = m_stop,
1091 .show = show_numa_map, 1231 .show = show_pid_numa_map,
1232};
1233
1234static const struct seq_operations proc_tid_numa_maps_op = {
1235 .start = m_start,
1236 .next = m_next,
1237 .stop = m_stop,
1238 .show = show_tid_numa_map,
1092}; 1239};
1093 1240
1094static int numa_maps_open(struct inode *inode, struct file *file) 1241static int numa_maps_open(struct inode *inode, struct file *file,
1242 const struct seq_operations *ops)
1095{ 1243{
1096 struct numa_maps_private *priv; 1244 struct numa_maps_private *priv;
1097 int ret = -ENOMEM; 1245 int ret = -ENOMEM;
1098 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 1246 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1099 if (priv) { 1247 if (priv) {
1100 priv->proc_maps.pid = proc_pid(inode); 1248 priv->proc_maps.pid = proc_pid(inode);
1101 ret = seq_open(file, &proc_pid_numa_maps_op); 1249 ret = seq_open(file, ops);
1102 if (!ret) { 1250 if (!ret) {
1103 struct seq_file *m = file->private_data; 1251 struct seq_file *m = file->private_data;
1104 m->private = priv; 1252 m->private = priv;
@@ -1109,8 +1257,25 @@ static int numa_maps_open(struct inode *inode, struct file *file)
1109 return ret; 1257 return ret;
1110} 1258}
1111 1259
1112const struct file_operations proc_numa_maps_operations = { 1260static int pid_numa_maps_open(struct inode *inode, struct file *file)
1113 .open = numa_maps_open, 1261{
1262 return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
1263}
1264
1265static int tid_numa_maps_open(struct inode *inode, struct file *file)
1266{
1267 return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
1268}
1269
1270const struct file_operations proc_pid_numa_maps_operations = {
1271 .open = pid_numa_maps_open,
1272 .read = seq_read,
1273 .llseek = seq_lseek,
1274 .release = seq_release_private,
1275};
1276
1277const struct file_operations proc_tid_numa_maps_operations = {
1278 .open = tid_numa_maps_open,
1114 .read = seq_read, 1279 .read = seq_read,
1115 .llseek = seq_lseek, 1280 .llseek = seq_lseek,
1116 .release = seq_release_private, 1281 .release = seq_release_private,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 980de547c070..74fe164d1b23 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -134,9 +134,11 @@ static void pad_len_spaces(struct seq_file *m, int len)
134/* 134/*
135 * display a single VMA to a sequenced file 135 * display a single VMA to a sequenced file
136 */ 136 */
137static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 137static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
138 int is_pid)
138{ 139{
139 struct mm_struct *mm = vma->vm_mm; 140 struct mm_struct *mm = vma->vm_mm;
141 struct proc_maps_private *priv = m->private;
140 unsigned long ino = 0; 142 unsigned long ino = 0;
141 struct file *file; 143 struct file *file;
142 dev_t dev = 0; 144 dev_t dev = 0;
@@ -168,10 +170,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
168 pad_len_spaces(m, len); 170 pad_len_spaces(m, len);
169 seq_path(m, &file->f_path, ""); 171 seq_path(m, &file->f_path, "");
170 } else if (mm) { 172 } else if (mm) {
171 if (vma->vm_start <= mm->start_stack && 173 pid_t tid = vm_is_stack(priv->task, vma, is_pid);
172 vma->vm_end >= mm->start_stack) { 174
175 if (tid != 0) {
173 pad_len_spaces(m, len); 176 pad_len_spaces(m, len);
174 seq_puts(m, "[stack]"); 177 /*
178 * Thread stack in /proc/PID/task/TID/maps or
179 * the main process stack.
180 */
181 if (!is_pid || (vma->vm_start <= mm->start_stack &&
182 vma->vm_end >= mm->start_stack))
183 seq_printf(m, "[stack]");
184 else
185 seq_printf(m, "[stack:%d]", tid);
175 } 186 }
176 } 187 }
177 188
@@ -182,11 +193,22 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
182/* 193/*
183 * display mapping lines for a particular process's /proc/pid/maps 194 * display mapping lines for a particular process's /proc/pid/maps
184 */ 195 */
185static int show_map(struct seq_file *m, void *_p) 196static int show_map(struct seq_file *m, void *_p, int is_pid)
186{ 197{
187 struct rb_node *p = _p; 198 struct rb_node *p = _p;
188 199
189 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); 200 return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
201 is_pid);
202}
203
204static int show_pid_map(struct seq_file *m, void *_p)
205{
206 return show_map(m, _p, 1);
207}
208
209static int show_tid_map(struct seq_file *m, void *_p)
210{
211 return show_map(m, _p, 0);
190} 212}
191 213
192static void *m_start(struct seq_file *m, loff_t *pos) 214static void *m_start(struct seq_file *m, loff_t *pos)
@@ -240,10 +262,18 @@ static const struct seq_operations proc_pid_maps_ops = {
240 .start = m_start, 262 .start = m_start,
241 .next = m_next, 263 .next = m_next,
242 .stop = m_stop, 264 .stop = m_stop,
243 .show = show_map 265 .show = show_pid_map
266};
267
268static const struct seq_operations proc_tid_maps_ops = {
269 .start = m_start,
270 .next = m_next,
271 .stop = m_stop,
272 .show = show_tid_map
244}; 273};
245 274
246static int maps_open(struct inode *inode, struct file *file) 275static int maps_open(struct inode *inode, struct file *file,
276 const struct seq_operations *ops)
247{ 277{
248 struct proc_maps_private *priv; 278 struct proc_maps_private *priv;
249 int ret = -ENOMEM; 279 int ret = -ENOMEM;
@@ -251,7 +281,7 @@ static int maps_open(struct inode *inode, struct file *file)
251 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 281 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
252 if (priv) { 282 if (priv) {
253 priv->pid = proc_pid(inode); 283 priv->pid = proc_pid(inode);
254 ret = seq_open(file, &proc_pid_maps_ops); 284 ret = seq_open(file, ops);
255 if (!ret) { 285 if (!ret) {
256 struct seq_file *m = file->private_data; 286 struct seq_file *m = file->private_data;
257 m->private = priv; 287 m->private = priv;
@@ -262,8 +292,25 @@ static int maps_open(struct inode *inode, struct file *file)
262 return ret; 292 return ret;
263} 293}
264 294
265const struct file_operations proc_maps_operations = { 295static int pid_maps_open(struct inode *inode, struct file *file)
266 .open = maps_open, 296{
297 return maps_open(inode, file, &proc_pid_maps_ops);
298}
299
300static int tid_maps_open(struct inode *inode, struct file *file)
301{
302 return maps_open(inode, file, &proc_tid_maps_ops);
303}
304
305const struct file_operations proc_pid_maps_operations = {
306 .open = pid_maps_open,
307 .read = seq_read,
308 .llseek = seq_lseek,
309 .release = seq_release_private,
310};
311
312const struct file_operations proc_tid_maps_operations = {
313 .open = tid_maps_open,
267 .read = seq_read, 314 .read = seq_read,
268 .llseek = seq_lseek, 315 .llseek = seq_lseek,
269 .release = seq_release_private, 316 .release = seq_release_private,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b0f450a2bb7c..0d5071d29985 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -700,3 +700,26 @@ static int __init vmcore_init(void)
700 return 0; 700 return 0;
701} 701}
702module_init(vmcore_init) 702module_init(vmcore_init)
703
704/* Cleanup function for vmcore module. */
705void vmcore_cleanup(void)
706{
707 struct list_head *pos, *next;
708
709 if (proc_vmcore) {
710 remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
711 proc_vmcore = NULL;
712 }
713
714 /* clear the vmcore list. */
715 list_for_each_safe(pos, next, &vmcore_list) {
716 struct vmcore *m;
717
718 m = list_entry(pos, struct vmcore, list);
719 list_del(&m->list);
720 kfree(m);
721 }
722 kfree(elfcorebuf);
723 elfcorebuf = NULL;
724}
725EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b3b426edb2fd..f37c32b94525 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -278,9 +278,7 @@ fail:
278 278
279int pstore_fill_super(struct super_block *sb, void *data, int silent) 279int pstore_fill_super(struct super_block *sb, void *data, int silent)
280{ 280{
281 struct inode *inode = NULL; 281 struct inode *inode;
282 struct dentry *root;
283 int err;
284 282
285 save_mount_options(sb, data); 283 save_mount_options(sb, data);
286 284
@@ -296,26 +294,17 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
296 parse_options(data); 294 parse_options(data);
297 295
298 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); 296 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
299 if (!inode) { 297 if (inode) {
300 err = -ENOMEM; 298 /* override ramfs "dir" options so we catch unlink(2) */
301 goto fail; 299 inode->i_op = &pstore_dir_inode_operations;
302 }
303 /* override ramfs "dir" options so we catch unlink(2) */
304 inode->i_op = &pstore_dir_inode_operations;
305
306 root = d_alloc_root(inode);
307 sb->s_root = root;
308 if (!root) {
309 err = -ENOMEM;
310 goto fail;
311 } 300 }
301 sb->s_root = d_make_root(inode);
302 if (!sb->s_root)
303 return -ENOMEM;
312 304
313 pstore_get_records(0); 305 pstore_get_records(0);
314 306
315 return 0; 307 return 0;
316fail:
317 iput(inode);
318 return err;
319} 308}
320 309
321static struct dentry *pstore_mount(struct file_system_type *fs_type, 310static struct dentry *pstore_mount(struct file_system_type *fs_type,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 6b009548d2e0..552e994e3aa1 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -52,38 +52,6 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
52 return 0; 52 return 0;
53} 53}
54 54
55static struct buffer_head *qnx4_getblk(struct inode *inode, int nr,
56 int create)
57{
58 struct buffer_head *result = NULL;
59
60 if ( nr >= 0 )
61 nr = qnx4_block_map( inode, nr );
62 if (nr) {
63 result = sb_getblk(inode->i_sb, nr);
64 return result;
65 }
66 return NULL;
67}
68
69struct buffer_head *qnx4_bread(struct inode *inode, int block, int create)
70{
71 struct buffer_head *bh;
72
73 bh = qnx4_getblk(inode, block, create);
74 if (!bh || buffer_uptodate(bh)) {
75 return bh;
76 }
77 ll_rw_block(READ, 1, &bh);
78 wait_on_buffer(bh);
79 if (buffer_uptodate(bh)) {
80 return bh;
81 }
82 brelse(bh);
83
84 return NULL;
85}
86
87static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create ) 55static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create )
88{ 56{
89 unsigned long phys; 57 unsigned long phys;
@@ -98,23 +66,31 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
98 return 0; 66 return 0;
99} 67}
100 68
69static inline u32 try_extent(qnx4_xtnt_t *extent, u32 *offset)
70{
71 u32 size = le32_to_cpu(extent->xtnt_size);
72 if (*offset < size)
73 return le32_to_cpu(extent->xtnt_blk) + *offset - 1;
74 *offset -= size;
75 return 0;
76}
77
101unsigned long qnx4_block_map( struct inode *inode, long iblock ) 78unsigned long qnx4_block_map( struct inode *inode, long iblock )
102{ 79{
103 int ix; 80 int ix;
104 long offset, i_xblk; 81 long i_xblk;
105 unsigned long block = 0;
106 struct buffer_head *bh = NULL; 82 struct buffer_head *bh = NULL;
107 struct qnx4_xblk *xblk = NULL; 83 struct qnx4_xblk *xblk = NULL;
108 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); 84 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
109 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); 85 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
86 u32 offset = iblock;
87 u32 block = try_extent(&qnx4_inode->di_first_xtnt, &offset);
110 88
111 if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) { 89 if (block) {
112 // iblock is in the first extent. This is easy. 90 // iblock is in the first extent. This is easy.
113 block = le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_blk) + iblock - 1;
114 } else { 91 } else {
115 // iblock is beyond first extent. We have to follow the extent chain. 92 // iblock is beyond first extent. We have to follow the extent chain.
116 i_xblk = le32_to_cpu(qnx4_inode->di_xblk); 93 i_xblk = le32_to_cpu(qnx4_inode->di_xblk);
117 offset = iblock - le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size);
118 ix = 0; 94 ix = 0;
119 while ( --nxtnt > 0 ) { 95 while ( --nxtnt > 0 ) {
120 if ( ix == 0 ) { 96 if ( ix == 0 ) {
@@ -130,12 +106,11 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
130 return -EIO; 106 return -EIO;
131 } 107 }
132 } 108 }
133 if ( offset < le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size) ) { 109 block = try_extent(&xblk->xblk_xtnts[ix], &offset);
110 if (block) {
134 // got it! 111 // got it!
135 block = le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_blk) + offset - 1;
136 break; 112 break;
137 } 113 }
138 offset -= le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size);
139 if ( ++ix >= xblk->xblk_num_xtnts ) { 114 if ( ++ix >= xblk->xblk_num_xtnts ) {
140 i_xblk = le32_to_cpu(xblk->xblk_next_xblk); 115 i_xblk = le32_to_cpu(xblk->xblk_next_xblk);
141 ix = 0; 116 ix = 0;
@@ -260,15 +235,13 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
260 } 235 }
261 236
262 ret = -ENOMEM; 237 ret = -ENOMEM;
263 s->s_root = d_alloc_root(root); 238 s->s_root = d_make_root(root);
264 if (s->s_root == NULL) 239 if (s->s_root == NULL)
265 goto outi; 240 goto outb;
266 241
267 brelse(bh); 242 brelse(bh);
268 return 0; 243 return 0;
269 244
270 outi:
271 iput(root);
272 outb: 245 outb:
273 kfree(qs->BitMap); 246 kfree(qs->BitMap);
274 out: 247 out:
@@ -288,44 +261,17 @@ static void qnx4_put_super(struct super_block *sb)
288 return; 261 return;
289} 262}
290 263
291static int qnx4_writepage(struct page *page, struct writeback_control *wbc)
292{
293 return block_write_full_page(page,qnx4_get_block, wbc);
294}
295
296static int qnx4_readpage(struct file *file, struct page *page) 264static int qnx4_readpage(struct file *file, struct page *page)
297{ 265{
298 return block_read_full_page(page,qnx4_get_block); 266 return block_read_full_page(page,qnx4_get_block);
299} 267}
300 268
301static int qnx4_write_begin(struct file *file, struct address_space *mapping,
302 loff_t pos, unsigned len, unsigned flags,
303 struct page **pagep, void **fsdata)
304{
305 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
306 int ret;
307
308 *pagep = NULL;
309 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
310 qnx4_get_block,
311 &qnx4_inode->mmu_private);
312 if (unlikely(ret)) {
313 loff_t isize = mapping->host->i_size;
314 if (pos + len > isize)
315 vmtruncate(mapping->host, isize);
316 }
317
318 return ret;
319}
320static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) 269static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
321{ 270{
322 return generic_block_bmap(mapping,block,qnx4_get_block); 271 return generic_block_bmap(mapping,block,qnx4_get_block);
323} 272}
324static const struct address_space_operations qnx4_aops = { 273static const struct address_space_operations qnx4_aops = {
325 .readpage = qnx4_readpage, 274 .readpage = qnx4_readpage,
326 .writepage = qnx4_writepage,
327 .write_begin = qnx4_write_begin,
328 .write_end = generic_write_end,
329 .bmap = qnx4_bmap 275 .bmap = qnx4_bmap
330}; 276};
331 277
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 275327b5615e..a512c0b30e8e 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -39,10 +39,6 @@ static int qnx4_match(int len, const char *name,
39 } else { 39 } else {
40 namelen = QNX4_SHORT_NAME_MAX; 40 namelen = QNX4_SHORT_NAME_MAX;
41 } 41 }
42 /* "" means "." ---> so paths like "/usr/lib//libc.a" work */
43 if (!len && (de->di_fname[0] == '.') && (de->di_fname[1] == '\0')) {
44 return 1;
45 }
46 thislen = strlen( de->di_fname ); 42 thislen = strlen( de->di_fname );
47 if ( thislen > namelen ) 43 if ( thislen > namelen )
48 thislen = namelen; 44 thislen = namelen;
@@ -72,7 +68,9 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
72 block = offset = blkofs = 0; 68 block = offset = blkofs = 0;
73 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { 69 while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {
74 if (!bh) { 70 if (!bh) {
75 bh = qnx4_bread(dir, blkofs, 0); 71 block = qnx4_block_map(dir, blkofs);
72 if (block)
73 bh = sb_bread(dir->i_sb, block);
76 if (!bh) { 74 if (!bh) {
77 blkofs++; 75 blkofs++;
78 continue; 76 continue;
@@ -80,7 +78,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
80 } 78 }
81 *res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset); 79 *res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset);
82 if (qnx4_match(len, name, bh, &offset)) { 80 if (qnx4_match(len, name, bh, &offset)) {
83 block = qnx4_block_map( dir, blkofs );
84 *ino = block * QNX4_INODES_PER_BLOCK + 81 *ino = block * QNX4_INODES_PER_BLOCK +
85 (offset / QNX4_DIR_ENTRY_SIZE) - 1; 82 (offset / QNX4_DIR_ENTRY_SIZE) - 1;
86 return bh; 83 return bh;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 33a60858203b..244d4620189b 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -27,8 +27,6 @@ extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, stru
27extern unsigned long qnx4_count_free_blocks(struct super_block *sb); 27extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
28extern unsigned long qnx4_block_map(struct inode *inode, long iblock); 28extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
29 29
30extern struct buffer_head *qnx4_bread(struct inode *, int, int);
31
32extern const struct inode_operations qnx4_dir_inode_operations; 30extern const struct inode_operations qnx4_dir_inode_operations;
33extern const struct file_operations qnx4_dir_operations; 31extern const struct file_operations qnx4_dir_operations;
34extern int qnx4_is_free(struct super_block *sb, long block); 32extern int qnx4_is_free(struct super_block *sb, long block);
diff --git a/fs/qnx6/Kconfig b/fs/qnx6/Kconfig
new file mode 100644
index 000000000000..edbba5c17cc8
--- /dev/null
+++ b/fs/qnx6/Kconfig
@@ -0,0 +1,26 @@
1config QNX6FS_FS
2 tristate "QNX6 file system support (read only)"
3 depends on BLOCK && CRC32
4 help
5 This is the file system used by the real-time operating systems
6 QNX 6 (also called QNX RTP).
7 Further information is available at <http://www.qnx.com/>.
8 Say Y if you intend to mount QNX hard disks or floppies formatted
9 with a mkqnx6fs.
10 However, keep in mind that this currently is a readonly driver!
11
12 To compile this file system support as a module, choose M here: the
13 module will be called qnx6.
14
15 If you don't know whether you need it, then you don't need it:
16 answer N.
17
18config QNX6FS_DEBUG
19 bool "QNX6 debugging information"
20 depends on QNX6FS_FS
21 help
22 Turns on extended debugging output.
23
24 If you are not a developer working on the QNX6FS, you probably don't
25 want this:
26 answer N.
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
new file mode 100644
index 000000000000..9dd06199afc9
--- /dev/null
+++ b/fs/qnx6/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the linux qnx4-filesystem routines.
3#
4
5obj-$(CONFIG_QNX6FS_FS) += qnx6.o
6
7qnx6-objs := inode.o dir.o namei.o super_mmi.o
diff --git a/fs/qnx6/README b/fs/qnx6/README
new file mode 100644
index 000000000000..116d622026cc
--- /dev/null
+++ b/fs/qnx6/README
@@ -0,0 +1,8 @@
1
2 This is a snapshot of the QNX6 filesystem for Linux.
3 Please send diffs and remarks to <chaosman@ontika.net> .
4
5Credits :
6
7Al Viro <viro@ZenIV.linux.org.uk> (endless patience with me & support ;))
8Kai Bankett <chaosman@ontika.net> (Maintainer)
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
new file mode 100644
index 000000000000..dc597353db3b
--- /dev/null
+++ b/fs/qnx6/dir.c
@@ -0,0 +1,291 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include "qnx6.h"
14
15static unsigned qnx6_lfile_checksum(char *name, unsigned size)
16{
17 unsigned crc = 0;
18 char *end = name + size;
19 while (name < end) {
20 crc = ((crc >> 1) + *(name++)) ^
21 ((crc & 0x00000001) ? 0x80000000 : 0);
22 }
23 return crc;
24}
25
26static struct page *qnx6_get_page(struct inode *dir, unsigned long n)
27{
28 struct address_space *mapping = dir->i_mapping;
29 struct page *page = read_mapping_page(mapping, n, NULL);
30 if (!IS_ERR(page))
31 kmap(page);
32 return page;
33}
34
35static inline unsigned long dir_pages(struct inode *inode)
36{
37 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
38}
39
40static unsigned last_entry(struct inode *inode, unsigned long page_nr)
41{
42 unsigned long last_byte = inode->i_size;
43 last_byte -= page_nr << PAGE_CACHE_SHIFT;
44 if (last_byte > PAGE_CACHE_SIZE)
45 last_byte = PAGE_CACHE_SIZE;
46 return last_byte / QNX6_DIR_ENTRY_SIZE;
47}
48
49static struct qnx6_long_filename *qnx6_longname(struct super_block *sb,
50 struct qnx6_long_dir_entry *de,
51 struct page **p)
52{
53 struct qnx6_sb_info *sbi = QNX6_SB(sb);
54 u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
55 u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */
56 /* within page */
57 u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK;
58 struct address_space *mapping = sbi->longfile->i_mapping;
59 struct page *page = read_mapping_page(mapping, n, NULL);
60 if (IS_ERR(page))
61 return ERR_CAST(page);
62 kmap(*p = page);
63 return (struct qnx6_long_filename *)(page_address(page) + offs);
64}
65
66static int qnx6_dir_longfilename(struct inode *inode,
67 struct qnx6_long_dir_entry *de,
68 void *dirent, loff_t pos,
69 unsigned de_inode, filldir_t filldir)
70{
71 struct qnx6_long_filename *lf;
72 struct super_block *s = inode->i_sb;
73 struct qnx6_sb_info *sbi = QNX6_SB(s);
74 struct page *page;
75 int lf_size;
76
77 if (de->de_size != 0xff) {
78 /* error - long filename entries always have size 0xff
79 in direntry */
80 printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
81 de->de_size);
82 return 0;
83 }
84 lf = qnx6_longname(s, de, &page);
85 if (IS_ERR(lf)) {
86 printk(KERN_ERR "qnx6:Error reading longname\n");
87 return 0;
88 }
89
90 lf_size = fs16_to_cpu(sbi, lf->lf_size);
91
92 if (lf_size > QNX6_LONG_NAME_MAX) {
93 QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
94 printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
95 qnx6_put_page(page);
96 return 0;
97 }
98
99 /* calc & validate longfilename checksum
100 mmi 3g filesystem does not have that checksum */
101 if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
102 qnx6_lfile_checksum(lf->lf_fname, lf_size))
103 printk(KERN_INFO "qnx6: long filename checksum error.\n");
104
105 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
106 lf_size, lf->lf_fname, de_inode));
107 if (filldir(dirent, lf->lf_fname, lf_size, pos, de_inode,
108 DT_UNKNOWN) < 0) {
109 qnx6_put_page(page);
110 return 0;
111 }
112
113 qnx6_put_page(page);
114 /* success */
115 return 1;
116}
117
118static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
119{
120 struct inode *inode = filp->f_path.dentry->d_inode;
121 struct super_block *s = inode->i_sb;
122 struct qnx6_sb_info *sbi = QNX6_SB(s);
123 loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
124 unsigned long npages = dir_pages(inode);
125 unsigned long n = pos >> PAGE_CACHE_SHIFT;
126 unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
127 bool done = false;
128
129 if (filp->f_pos >= inode->i_size)
130 return 0;
131
132 for ( ; !done && n < npages; n++, start = 0) {
133 struct page *page = qnx6_get_page(inode, n);
134 int limit = last_entry(inode, n);
135 struct qnx6_dir_entry *de;
136 int i = start;
137
138 if (IS_ERR(page)) {
139 printk(KERN_ERR "qnx6_readdir: read failed\n");
140 filp->f_pos = (n + 1) << PAGE_CACHE_SHIFT;
141 return PTR_ERR(page);
142 }
143 de = ((struct qnx6_dir_entry *)page_address(page)) + start;
144 for (; i < limit; i++, de++, pos += QNX6_DIR_ENTRY_SIZE) {
145 int size = de->de_size;
146 u32 no_inode = fs32_to_cpu(sbi, de->de_inode);
147
148 if (!no_inode || !size)
149 continue;
150
151 if (size > QNX6_SHORT_NAME_MAX) {
152 /* long filename detected
153 get the filename from long filename
154 structure / block */
155 if (!qnx6_dir_longfilename(inode,
156 (struct qnx6_long_dir_entry *)de,
157 dirent, pos, no_inode,
158 filldir)) {
159 done = true;
160 break;
161 }
162 } else {
163 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
164 " inode:%u\n", size, de->de_fname,
165 no_inode));
166 if (filldir(dirent, de->de_fname, size,
167 pos, no_inode, DT_UNKNOWN)
168 < 0) {
169 done = true;
170 break;
171 }
172 }
173 }
174 qnx6_put_page(page);
175 }
176 filp->f_pos = pos;
177 return 0;
178}
179
180/*
181 * check if the long filename is correct.
182 */
183static unsigned qnx6_long_match(int len, const char *name,
184 struct qnx6_long_dir_entry *de, struct inode *dir)
185{
186 struct super_block *s = dir->i_sb;
187 struct qnx6_sb_info *sbi = QNX6_SB(s);
188 struct page *page;
189 int thislen;
190 struct qnx6_long_filename *lf = qnx6_longname(s, de, &page);
191
192 if (IS_ERR(lf))
193 return 0;
194
195 thislen = fs16_to_cpu(sbi, lf->lf_size);
196 if (len != thislen) {
197 qnx6_put_page(page);
198 return 0;
199 }
200 if (memcmp(name, lf->lf_fname, len) == 0) {
201 qnx6_put_page(page);
202 return fs32_to_cpu(sbi, de->de_inode);
203 }
204 qnx6_put_page(page);
205 return 0;
206}
207
208/*
209 * check if the filename is correct.
210 */
211static unsigned qnx6_match(struct super_block *s, int len, const char *name,
212 struct qnx6_dir_entry *de)
213{
214 struct qnx6_sb_info *sbi = QNX6_SB(s);
215 if (memcmp(name, de->de_fname, len) == 0)
216 return fs32_to_cpu(sbi, de->de_inode);
217 return 0;
218}
219
220
221unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
222 struct page **res_page)
223{
224 struct super_block *s = dir->i_sb;
225 struct qnx6_inode_info *ei = QNX6_I(dir);
226 struct page *page = NULL;
227 unsigned long start, n;
228 unsigned long npages = dir_pages(dir);
229 unsigned ino;
230 struct qnx6_dir_entry *de;
231 struct qnx6_long_dir_entry *lde;
232
233 *res_page = NULL;
234
235 if (npages == 0)
236 return 0;
237 start = ei->i_dir_start_lookup;
238 if (start >= npages)
239 start = 0;
240 n = start;
241
242 do {
243 page = qnx6_get_page(dir, n);
244 if (!IS_ERR(page)) {
245 int limit = last_entry(dir, n);
246 int i;
247
248 de = (struct qnx6_dir_entry *)page_address(page);
249 for (i = 0; i < limit; i++, de++) {
250 if (len <= QNX6_SHORT_NAME_MAX) {
251 /* short filename */
252 if (len != de->de_size)
253 continue;
254 ino = qnx6_match(s, len, name, de);
255 if (ino)
256 goto found;
257 } else if (de->de_size == 0xff) {
258 /* deal with long filename */
259 lde = (struct qnx6_long_dir_entry *)de;
260 ino = qnx6_long_match(len,
261 name, lde, dir);
262 if (ino)
263 goto found;
264 } else
265 printk(KERN_ERR "qnx6: undefined "
266 "filename size in inode.\n");
267 }
268 qnx6_put_page(page);
269 }
270
271 if (++n >= npages)
272 n = 0;
273 } while (n != start);
274 return 0;
275
276found:
277 *res_page = page;
278 ei->i_dir_start_lookup = n;
279 return ino;
280}
281
282const struct file_operations qnx6_dir_operations = {
283 .llseek = generic_file_llseek,
284 .read = generic_read_dir,
285 .readdir = qnx6_readdir,
286 .fsync = generic_file_fsync,
287};
288
289const struct inode_operations qnx6_dir_inode_operations = {
290 .lookup = qnx6_lookup,
291};
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
new file mode 100644
index 000000000000..e44012dc5645
--- /dev/null
+++ b/fs/qnx6/inode.c
@@ -0,0 +1,698 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/highuid.h>
17#include <linux/pagemap.h>
18#include <linux/buffer_head.h>
19#include <linux/writeback.h>
20#include <linux/statfs.h>
21#include <linux/parser.h>
22#include <linux/seq_file.h>
23#include <linux/mount.h>
24#include <linux/crc32.h>
25#include <linux/mpage.h>
26#include "qnx6.h"
27
28static const struct super_operations qnx6_sops;
29
30static void qnx6_put_super(struct super_block *sb);
31static struct inode *qnx6_alloc_inode(struct super_block *sb);
32static void qnx6_destroy_inode(struct inode *inode);
33static int qnx6_remount(struct super_block *sb, int *flags, char *data);
34static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf);
35static int qnx6_show_options(struct seq_file *seq, struct dentry *root);
36
37static const struct super_operations qnx6_sops = {
38 .alloc_inode = qnx6_alloc_inode,
39 .destroy_inode = qnx6_destroy_inode,
40 .put_super = qnx6_put_super,
41 .statfs = qnx6_statfs,
42 .remount_fs = qnx6_remount,
43 .show_options = qnx6_show_options,
44};
45
46static int qnx6_show_options(struct seq_file *seq, struct dentry *root)
47{
48 struct super_block *sb = root->d_sb;
49 struct qnx6_sb_info *sbi = QNX6_SB(sb);
50
51 if (sbi->s_mount_opt & QNX6_MOUNT_MMI_FS)
52 seq_puts(seq, ",mmi_fs");
53 return 0;
54}
55
56static int qnx6_remount(struct super_block *sb, int *flags, char *data)
57{
58 *flags |= MS_RDONLY;
59 return 0;
60}
61
62static unsigned qnx6_get_devblock(struct super_block *sb, __fs32 block)
63{
64 struct qnx6_sb_info *sbi = QNX6_SB(sb);
65 return fs32_to_cpu(sbi, block) + sbi->s_blks_off;
66}
67
68static unsigned qnx6_block_map(struct inode *inode, unsigned iblock);
69
70static int qnx6_get_block(struct inode *inode, sector_t iblock,
71 struct buffer_head *bh, int create)
72{
73 unsigned phys;
74
75 QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
76 inode->i_ino, (unsigned long)iblock));
77
78 phys = qnx6_block_map(inode, iblock);
79 if (phys) {
80 /* logical block is before EOF */
81 map_bh(bh, inode->i_sb, phys);
82 }
83 return 0;
84}
85
86static int qnx6_check_blockptr(__fs32 ptr)
87{
88 if (ptr == ~(__fs32)0) {
89 printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
90 return 0;
91 }
92 return 1;
93}
94
95static int qnx6_readpage(struct file *file, struct page *page)
96{
97 return mpage_readpage(page, qnx6_get_block);
98}
99
100static int qnx6_readpages(struct file *file, struct address_space *mapping,
101 struct list_head *pages, unsigned nr_pages)
102{
103 return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block);
104}
105
106/*
107 * returns the block number for the no-th element in the tree
108 * inodebits requred as there are multiple inodes in one inode block
109 */
110static unsigned qnx6_block_map(struct inode *inode, unsigned no)
111{
112 struct super_block *s = inode->i_sb;
113 struct qnx6_sb_info *sbi = QNX6_SB(s);
114 struct qnx6_inode_info *ei = QNX6_I(inode);
115 unsigned block = 0;
116 struct buffer_head *bh;
117 __fs32 ptr;
118 int levelptr;
119 int ptrbits = sbi->s_ptrbits;
120 int bitdelta;
121 u32 mask = (1 << ptrbits) - 1;
122 int depth = ei->di_filelevels;
123 int i;
124
125 bitdelta = ptrbits * depth;
126 levelptr = no >> bitdelta;
127
128 if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
129 printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
130 no);
131 return 0;
132 }
133
134 block = qnx6_get_devblock(s, ei->di_block_ptr[levelptr]);
135
136 for (i = 0; i < depth; i++) {
137 bh = sb_bread(s, block);
138 if (!bh) {
139 printk(KERN_ERR "qnx6:Error reading block (%u)\n",
140 block);
141 return 0;
142 }
143 bitdelta -= ptrbits;
144 levelptr = (no >> bitdelta) & mask;
145 ptr = ((__fs32 *)bh->b_data)[levelptr];
146
147 if (!qnx6_check_blockptr(ptr))
148 return 0;
149
150 block = qnx6_get_devblock(s, ptr);
151 brelse(bh);
152 }
153 return block;
154}
155
156static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf)
157{
158 struct super_block *sb = dentry->d_sb;
159 struct qnx6_sb_info *sbi = QNX6_SB(sb);
160 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
161
162 buf->f_type = sb->s_magic;
163 buf->f_bsize = sb->s_blocksize;
164 buf->f_blocks = fs32_to_cpu(sbi, sbi->sb->sb_num_blocks);
165 buf->f_bfree = fs32_to_cpu(sbi, sbi->sb->sb_free_blocks);
166 buf->f_files = fs32_to_cpu(sbi, sbi->sb->sb_num_inodes);
167 buf->f_ffree = fs32_to_cpu(sbi, sbi->sb->sb_free_inodes);
168 buf->f_bavail = buf->f_bfree;
169 buf->f_namelen = QNX6_LONG_NAME_MAX;
170 buf->f_fsid.val[0] = (u32)id;
171 buf->f_fsid.val[1] = (u32)(id >> 32);
172
173 return 0;
174}
175
176/*
177 * Check the root directory of the filesystem to make sure
178 * it really _is_ a qnx6 filesystem, and to check the size
179 * of the directory entry.
180 */
181static const char *qnx6_checkroot(struct super_block *s)
182{
183 static char match_root[2][3] = {".\0\0", "..\0"};
184 int i, error = 0;
185 struct qnx6_dir_entry *dir_entry;
186 struct inode *root = s->s_root->d_inode;
187 struct address_space *mapping = root->i_mapping;
188 struct page *page = read_mapping_page(mapping, 0, NULL);
189 if (IS_ERR(page))
190 return "error reading root directory";
191 kmap(page);
192 dir_entry = page_address(page);
193 for (i = 0; i < 2; i++) {
194 /* maximum 3 bytes - due to match_root limitation */
195 if (strncmp(dir_entry[i].de_fname, match_root[i], 3))
196 error = 1;
197 }
198 qnx6_put_page(page);
199 if (error)
200 return "error reading root directory.";
201 return NULL;
202}
203
204#ifdef CONFIG_QNX6FS_DEBUG
205void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
206{
207 struct qnx6_sb_info *sbi = QNX6_SB(s);
208
209 QNX6DEBUG((KERN_INFO "magic: %08x\n",
210 fs32_to_cpu(sbi, sb->sb_magic)));
211 QNX6DEBUG((KERN_INFO "checksum: %08x\n",
212 fs32_to_cpu(sbi, sb->sb_checksum)));
213 QNX6DEBUG((KERN_INFO "serial: %llx\n",
214 fs64_to_cpu(sbi, sb->sb_serial)));
215 QNX6DEBUG((KERN_INFO "flags: %08x\n",
216 fs32_to_cpu(sbi, sb->sb_flags)));
217 QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
218 fs32_to_cpu(sbi, sb->sb_blocksize)));
219 QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
220 fs32_to_cpu(sbi, sb->sb_num_inodes)));
221 QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
222 fs32_to_cpu(sbi, sb->sb_free_inodes)));
223 QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
224 fs32_to_cpu(sbi, sb->sb_num_blocks)));
225 QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
226 fs32_to_cpu(sbi, sb->sb_free_blocks)));
227 QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
228 sb->Inode.levels));
229}
230#endif
231
232enum {
233 Opt_mmifs,
234 Opt_err
235};
236
237static const match_table_t tokens = {
238 {Opt_mmifs, "mmi_fs"},
239 {Opt_err, NULL}
240};
241
242static int qnx6_parse_options(char *options, struct super_block *sb)
243{
244 char *p;
245 struct qnx6_sb_info *sbi = QNX6_SB(sb);
246 substring_t args[MAX_OPT_ARGS];
247
248 if (!options)
249 return 1;
250
251 while ((p = strsep(&options, ",")) != NULL) {
252 int token;
253 if (!*p)
254 continue;
255
256 token = match_token(p, tokens, args);
257 switch (token) {
258 case Opt_mmifs:
259 set_opt(sbi->s_mount_opt, MMI_FS);
260 break;
261 default:
262 return 0;
263 }
264 }
265 return 1;
266}
267
268static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
269 int offset, int silent)
270{
271 struct qnx6_sb_info *sbi = QNX6_SB(s);
272 struct buffer_head *bh;
273 struct qnx6_super_block *sb;
274
275 /* Check the superblock signatures
276 start with the first superblock */
277 bh = sb_bread(s, offset);
278 if (!bh) {
279 printk(KERN_ERR "qnx6: unable to read the first superblock\n");
280 return NULL;
281 }
282 sb = (struct qnx6_super_block *)bh->b_data;
283 if (fs32_to_cpu(sbi, sb->sb_magic) != QNX6_SUPER_MAGIC) {
284 sbi->s_bytesex = BYTESEX_BE;
285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
286 /* we got a big endian fs */
287 QNX6DEBUG((KERN_INFO "qnx6: fs got different"
288 " endianess.\n"));
289 return bh;
290 } else
291 sbi->s_bytesex = BYTESEX_LE;
292 if (!silent) {
293 if (offset == 0) {
294 printk(KERN_ERR "qnx6: wrong signature (magic)"
295 " in superblock #1.\n");
296 } else {
297 printk(KERN_INFO "qnx6: wrong signature (magic)"
298 " at position (0x%lx) - will try"
299 " alternative position (0x0000).\n",
300 offset * s->s_blocksize);
301 }
302 }
303 brelse(bh);
304 return NULL;
305 }
306 return bh;
307}
308
309static struct inode *qnx6_private_inode(struct super_block *s,
310 struct qnx6_root_node *p);
311
312static int qnx6_fill_super(struct super_block *s, void *data, int silent)
313{
314 struct buffer_head *bh1 = NULL, *bh2 = NULL;
315 struct qnx6_super_block *sb1 = NULL, *sb2 = NULL;
316 struct qnx6_sb_info *sbi;
317 struct inode *root;
318 const char *errmsg;
319 struct qnx6_sb_info *qs;
320 int ret = -EINVAL;
321 u64 offset;
322 int bootblock_offset = QNX6_BOOTBLOCK_SIZE;
323
324 qs = kzalloc(sizeof(struct qnx6_sb_info), GFP_KERNEL);
325 if (!qs)
326 return -ENOMEM;
327 s->s_fs_info = qs;
328
329 /* Superblock always is 512 Byte long */
330 if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
331 printk(KERN_ERR "qnx6: unable to set blocksize\n");
332 goto outnobh;
333 }
334
335 /* parse the mount-options */
336 if (!qnx6_parse_options((char *) data, s)) {
337 printk(KERN_ERR "qnx6: invalid mount options.\n");
338 goto outnobh;
339 }
340 if (test_opt(s, MMI_FS)) {
341 sb1 = qnx6_mmi_fill_super(s, silent);
342 if (sb1)
343 goto mmi_success;
344 else
345 goto outnobh;
346 }
347 sbi = QNX6_SB(s);
348 sbi->s_bytesex = BYTESEX_LE;
349 /* Check the superblock signatures
350 start with the first superblock */
351 bh1 = qnx6_check_first_superblock(s,
352 bootblock_offset / QNX6_SUPERBLOCK_SIZE, silent);
353 if (!bh1) {
354 /* try again without bootblock offset */
355 bh1 = qnx6_check_first_superblock(s, 0, silent);
356 if (!bh1) {
357 printk(KERN_ERR "qnx6: unable to read the first superblock\n");
358 goto outnobh;
359 }
360 /* seems that no bootblock at partition start */
361 bootblock_offset = 0;
362 }
363 sb1 = (struct qnx6_super_block *)bh1->b_data;
364
365#ifdef CONFIG_QNX6FS_DEBUG
366 qnx6_superblock_debug(sb1, s);
367#endif
368
369 /* checksum check - start at byte 8 and end at byte 512 */
370 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
371 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
372 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
373 goto out;
374 }
375
376 /* set new blocksize */
377 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
378 printk(KERN_ERR "qnx6: unable to set blocksize\n");
379 goto out;
380 }
381 /* blocksize invalidates bh - pull it back in */
382 brelse(bh1);
383 bh1 = sb_bread(s, bootblock_offset >> s->s_blocksize_bits);
384 if (!bh1)
385 goto outnobh;
386 sb1 = (struct qnx6_super_block *)bh1->b_data;
387
388 /* calculate second superblock blocknumber */
389 offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) +
390 (bootblock_offset >> s->s_blocksize_bits) +
391 (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
392
393 /* set bootblock offset */
394 sbi->s_blks_off = (bootblock_offset >> s->s_blocksize_bits) +
395 (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
396
397 /* next the second superblock */
398 bh2 = sb_bread(s, offset);
399 if (!bh2) {
400 printk(KERN_ERR "qnx6: unable to read the second superblock\n");
401 goto out;
402 }
403 sb2 = (struct qnx6_super_block *)bh2->b_data;
404 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
405 if (!silent)
406 printk(KERN_ERR "qnx6: wrong signature (magic)"
407 " in superblock #2.\n");
408 goto out;
409 }
410
411 /* checksum check - start at byte 8 and end at byte 512 */
412 if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
413 crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
414 printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
415 goto out;
416 }
417
418 if (fs64_to_cpu(sbi, sb1->sb_serial) >=
419 fs64_to_cpu(sbi, sb2->sb_serial)) {
420 /* superblock #1 active */
421 sbi->sb_buf = bh1;
422 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
423 brelse(bh2);
424 printk(KERN_INFO "qnx6: superblock #1 active\n");
425 } else {
426 /* superblock #2 active */
427 sbi->sb_buf = bh2;
428 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
429 brelse(bh1);
430 printk(KERN_INFO "qnx6: superblock #2 active\n");
431 }
432mmi_success:
433 /* sanity check - limit maximum indirect pointer levels */
434 if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
435 printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
436 QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
437 goto out;
438 }
439 if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
440 printk(KERN_ERR "qnx6: too many longfilename levels"
441 " (max %i, sb %i)\n",
442 QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
443 goto out;
444 }
445 s->s_op = &qnx6_sops;
446 s->s_magic = QNX6_SUPER_MAGIC;
447 s->s_flags |= MS_RDONLY; /* Yup, read-only yet */
448
449 /* ease the later tree level calculations */
450 sbi = QNX6_SB(s);
451 sbi->s_ptrbits = ilog2(s->s_blocksize / 4);
452 sbi->inodes = qnx6_private_inode(s, &sb1->Inode);
453 if (!sbi->inodes)
454 goto out;
455 sbi->longfile = qnx6_private_inode(s, &sb1->Longfile);
456 if (!sbi->longfile)
457 goto out1;
458
459 /* prefetch root inode */
460 root = qnx6_iget(s, QNX6_ROOT_INO);
461 if (IS_ERR(root)) {
462 printk(KERN_ERR "qnx6: get inode failed\n");
463 ret = PTR_ERR(root);
464 goto out2;
465 }
466
467 ret = -ENOMEM;
468 s->s_root = d_make_root(root);
469 if (!s->s_root)
470 goto out2;
471
472 ret = -EINVAL;
473 errmsg = qnx6_checkroot(s);
474 if (errmsg != NULL) {
475 if (!silent)
476 printk(KERN_ERR "qnx6: %s\n", errmsg);
477 goto out3;
478 }
479 return 0;
480
481out3:
482 dput(s->s_root);
483 s->s_root = NULL;
484out2:
485 iput(sbi->longfile);
486out1:
487 iput(sbi->inodes);
488out:
489 if (bh1)
490 brelse(bh1);
491 if (bh2)
492 brelse(bh2);
493outnobh:
494 kfree(qs);
495 s->s_fs_info = NULL;
496 return ret;
497}
498
499static void qnx6_put_super(struct super_block *sb)
500{
501 struct qnx6_sb_info *qs = QNX6_SB(sb);
502 brelse(qs->sb_buf);
503 iput(qs->longfile);
504 iput(qs->inodes);
505 kfree(qs);
506 sb->s_fs_info = NULL;
507 return;
508}
509
510static sector_t qnx6_bmap(struct address_space *mapping, sector_t block)
511{
512 return generic_block_bmap(mapping, block, qnx6_get_block);
513}
514static const struct address_space_operations qnx6_aops = {
515 .readpage = qnx6_readpage,
516 .readpages = qnx6_readpages,
517 .bmap = qnx6_bmap
518};
519
520static struct inode *qnx6_private_inode(struct super_block *s,
521 struct qnx6_root_node *p)
522{
523 struct inode *inode = new_inode(s);
524 if (inode) {
525 struct qnx6_inode_info *ei = QNX6_I(inode);
526 struct qnx6_sb_info *sbi = QNX6_SB(s);
527 inode->i_size = fs64_to_cpu(sbi, p->size);
528 memcpy(ei->di_block_ptr, p->ptr, sizeof(p->ptr));
529 ei->di_filelevels = p->levels;
530 inode->i_mode = S_IFREG | S_IRUSR; /* probably wrong */
531 inode->i_mapping->a_ops = &qnx6_aops;
532 }
533 return inode;
534}
535
536struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
537{
538 struct qnx6_sb_info *sbi = QNX6_SB(sb);
539 struct qnx6_inode_entry *raw_inode;
540 struct inode *inode;
541 struct qnx6_inode_info *ei;
542 struct address_space *mapping;
543 struct page *page;
544 u32 n, offs;
545
546 inode = iget_locked(sb, ino);
547 if (!inode)
548 return ERR_PTR(-ENOMEM);
549 if (!(inode->i_state & I_NEW))
550 return inode;
551
552 ei = QNX6_I(inode);
553
554 inode->i_mode = 0;
555
556 if (ino == 0) {
557 printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
558 "out of range\n",
559 sb->s_id, ino);
560 iget_failed(inode);
561 return ERR_PTR(-EIO);
562 }
563 n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS);
564 offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS);
565 mapping = sbi->inodes->i_mapping;
566 page = read_mapping_page(mapping, n, NULL);
567 if (IS_ERR(page)) {
568 printk(KERN_ERR "qnx6: major problem: unable to read inode from "
569 "dev %s\n", sb->s_id);
570 iget_failed(inode);
571 return ERR_CAST(page);
572 }
573 kmap(page);
574 raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs;
575
576 inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode);
577 inode->i_uid = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid);
578 inode->i_gid = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid);
579 inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size);
580 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime);
581 inode->i_mtime.tv_nsec = 0;
582 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_atime);
583 inode->i_atime.tv_nsec = 0;
584 inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_ctime);
585 inode->i_ctime.tv_nsec = 0;
586
587 /* calc blocks based on 512 byte blocksize */
588 inode->i_blocks = (inode->i_size + 511) >> 9;
589
590 memcpy(&ei->di_block_ptr, &raw_inode->di_block_ptr,
591 sizeof(raw_inode->di_block_ptr));
592 ei->di_filelevels = raw_inode->di_filelevels;
593
594 if (S_ISREG(inode->i_mode)) {
595 inode->i_fop = &generic_ro_fops;
596 inode->i_mapping->a_ops = &qnx6_aops;
597 } else if (S_ISDIR(inode->i_mode)) {
598 inode->i_op = &qnx6_dir_inode_operations;
599 inode->i_fop = &qnx6_dir_operations;
600 inode->i_mapping->a_ops = &qnx6_aops;
601 } else if (S_ISLNK(inode->i_mode)) {
602 inode->i_op = &page_symlink_inode_operations;
603 inode->i_mapping->a_ops = &qnx6_aops;
604 } else
605 init_special_inode(inode, inode->i_mode, 0);
606 qnx6_put_page(page);
607 unlock_new_inode(inode);
608 return inode;
609}
610
611static struct kmem_cache *qnx6_inode_cachep;
612
613static struct inode *qnx6_alloc_inode(struct super_block *sb)
614{
615 struct qnx6_inode_info *ei;
616 ei = kmem_cache_alloc(qnx6_inode_cachep, GFP_KERNEL);
617 if (!ei)
618 return NULL;
619 return &ei->vfs_inode;
620}
621
622static void qnx6_i_callback(struct rcu_head *head)
623{
624 struct inode *inode = container_of(head, struct inode, i_rcu);
625 INIT_LIST_HEAD(&inode->i_dentry);
626 kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
627}
628
629static void qnx6_destroy_inode(struct inode *inode)
630{
631 call_rcu(&inode->i_rcu, qnx6_i_callback);
632}
633
634static void init_once(void *foo)
635{
636 struct qnx6_inode_info *ei = (struct qnx6_inode_info *) foo;
637
638 inode_init_once(&ei->vfs_inode);
639}
640
641static int init_inodecache(void)
642{
643 qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache",
644 sizeof(struct qnx6_inode_info),
645 0, (SLAB_RECLAIM_ACCOUNT|
646 SLAB_MEM_SPREAD),
647 init_once);
648 if (!qnx6_inode_cachep)
649 return -ENOMEM;
650 return 0;
651}
652
653static void destroy_inodecache(void)
654{
655 kmem_cache_destroy(qnx6_inode_cachep);
656}
657
658static struct dentry *qnx6_mount(struct file_system_type *fs_type,
659 int flags, const char *dev_name, void *data)
660{
661 return mount_bdev(fs_type, flags, dev_name, data, qnx6_fill_super);
662}
663
664static struct file_system_type qnx6_fs_type = {
665 .owner = THIS_MODULE,
666 .name = "qnx6",
667 .mount = qnx6_mount,
668 .kill_sb = kill_block_super,
669 .fs_flags = FS_REQUIRES_DEV,
670};
671
672static int __init init_qnx6_fs(void)
673{
674 int err;
675
676 err = init_inodecache();
677 if (err)
678 return err;
679
680 err = register_filesystem(&qnx6_fs_type);
681 if (err) {
682 destroy_inodecache();
683 return err;
684 }
685
686 printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
687 return 0;
688}
689
690static void __exit exit_qnx6_fs(void)
691{
692 unregister_filesystem(&qnx6_fs_type);
693 destroy_inodecache();
694}
695
696module_init(init_qnx6_fs)
697module_exit(exit_qnx6_fs)
698MODULE_LICENSE("GPL");
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
new file mode 100644
index 000000000000..8a97289e04ad
--- /dev/null
+++ b/fs/qnx6/namei.c
@@ -0,0 +1,42 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 pagemap extension by Al Viro
10 *
11 */
12
13#include "qnx6.h"
14
15struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
16 struct nameidata *nd)
17{
18 unsigned ino;
19 struct page *page;
20 struct inode *foundinode = NULL;
21 const char *name = dentry->d_name.name;
22 int len = dentry->d_name.len;
23
24 if (len > QNX6_LONG_NAME_MAX)
25 return ERR_PTR(-ENAMETOOLONG);
26
27 ino = qnx6_find_entry(len, dir, name, &page);
28 if (ino) {
29 foundinode = qnx6_iget(dir->i_sb, ino);
30 qnx6_put_page(page);
31 if (IS_ERR(foundinode)) {
32 QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
33 " error %ld\n", PTR_ERR(foundinode)));
34 return ERR_CAST(foundinode);
35 }
36 } else {
37 QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
38 return NULL;
39 }
40 d_add(dentry, foundinode);
41 return NULL;
42}
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
new file mode 100644
index 000000000000..6c5e02a0b6a8
--- /dev/null
+++ b/fs/qnx6/qnx6.h
@@ -0,0 +1,135 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 * 16-02-2012 page map extension by Al Viro
10 *
11 */
12
13#include <linux/fs.h>
14#include <linux/pagemap.h>
15
16typedef __u16 __bitwise __fs16;
17typedef __u32 __bitwise __fs32;
18typedef __u64 __bitwise __fs64;
19
20#include <linux/qnx6_fs.h>
21
22#ifdef CONFIG_QNX6FS_DEBUG
23#define QNX6DEBUG(X) printk X
24#else
25#define QNX6DEBUG(X) (void) 0
26#endif
27
28struct qnx6_sb_info {
29 struct buffer_head *sb_buf; /* superblock buffer */
30 struct qnx6_super_block *sb; /* our superblock */
31 int s_blks_off; /* blkoffset fs-startpoint */
32 int s_ptrbits; /* indirect pointer bitfield */
33 unsigned long s_mount_opt; /* all mount options */
34 int s_bytesex; /* holds endianess info */
35 struct inode * inodes;
36 struct inode * longfile;
37};
38
39struct qnx6_inode_info {
40 __fs32 di_block_ptr[QNX6_NO_DIRECT_POINTERS];
41 __u8 di_filelevels;
42 __u32 i_dir_start_lookup;
43 struct inode vfs_inode;
44};
45
46extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino);
47extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
48 struct nameidata *nd);
49
50#ifdef CONFIG_QNX6FS_DEBUG
51extern void qnx6_superblock_debug(struct qnx6_super_block *,
52 struct super_block *);
53#endif
54
55extern const struct inode_operations qnx6_dir_inode_operations;
56extern const struct file_operations qnx6_dir_operations;
57
58static inline struct qnx6_sb_info *QNX6_SB(struct super_block *sb)
59{
60 return sb->s_fs_info;
61}
62
63static inline struct qnx6_inode_info *QNX6_I(struct inode *inode)
64{
65 return container_of(inode, struct qnx6_inode_info, vfs_inode);
66}
67
68#define clear_opt(o, opt) (o &= ~(QNX6_MOUNT_##opt))
69#define set_opt(o, opt) (o |= (QNX6_MOUNT_##opt))
70#define test_opt(sb, opt) (QNX6_SB(sb)->s_mount_opt & \
71 QNX6_MOUNT_##opt)
72enum {
73 BYTESEX_LE,
74 BYTESEX_BE,
75};
76
77static inline __u64 fs64_to_cpu(struct qnx6_sb_info *sbi, __fs64 n)
78{
79 if (sbi->s_bytesex == BYTESEX_LE)
80 return le64_to_cpu((__force __le64)n);
81 else
82 return be64_to_cpu((__force __be64)n);
83}
84
85static inline __fs64 cpu_to_fs64(struct qnx6_sb_info *sbi, __u64 n)
86{
87 if (sbi->s_bytesex == BYTESEX_LE)
88 return (__force __fs64)cpu_to_le64(n);
89 else
90 return (__force __fs64)cpu_to_be64(n);
91}
92
93static inline __u32 fs32_to_cpu(struct qnx6_sb_info *sbi, __fs32 n)
94{
95 if (sbi->s_bytesex == BYTESEX_LE)
96 return le32_to_cpu((__force __le32)n);
97 else
98 return be32_to_cpu((__force __be32)n);
99}
100
101static inline __fs32 cpu_to_fs32(struct qnx6_sb_info *sbi, __u32 n)
102{
103 if (sbi->s_bytesex == BYTESEX_LE)
104 return (__force __fs32)cpu_to_le32(n);
105 else
106 return (__force __fs32)cpu_to_be32(n);
107}
108
109static inline __u16 fs16_to_cpu(struct qnx6_sb_info *sbi, __fs16 n)
110{
111 if (sbi->s_bytesex == BYTESEX_LE)
112 return le16_to_cpu((__force __le16)n);
113 else
114 return be16_to_cpu((__force __be16)n);
115}
116
117static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n)
118{
119 if (sbi->s_bytesex == BYTESEX_LE)
120 return (__force __fs16)cpu_to_le16(n);
121 else
122 return (__force __fs16)cpu_to_be16(n);
123}
124
125extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s,
126 int silent);
127
128static inline void qnx6_put_page(struct page *page)
129{
130 kunmap(page);
131 page_cache_release(page);
132}
133
134extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
135 struct page **res_page);
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
new file mode 100644
index 000000000000..29c32cba62d6
--- /dev/null
+++ b/fs/qnx6/super_mmi.c
@@ -0,0 +1,150 @@
1/*
2 * QNX6 file system, Linux implementation.
3 *
4 * Version : 1.0.0
5 *
6 * History :
7 *
8 * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
9 *
10 */
11
12#include <linux/buffer_head.h>
13#include <linux/slab.h>
14#include <linux/crc32.h>
15#include "qnx6.h"
16
17static void qnx6_mmi_copy_sb(struct qnx6_super_block *qsb,
18 struct qnx6_mmi_super_block *sb)
19{
20 qsb->sb_magic = sb->sb_magic;
21 qsb->sb_checksum = sb->sb_checksum;
22 qsb->sb_serial = sb->sb_serial;
23 qsb->sb_blocksize = sb->sb_blocksize;
24 qsb->sb_num_inodes = sb->sb_num_inodes;
25 qsb->sb_free_inodes = sb->sb_free_inodes;
26 qsb->sb_num_blocks = sb->sb_num_blocks;
27 qsb->sb_free_blocks = sb->sb_free_blocks;
28
29 /* the rest of the superblock is the same */
30 memcpy(&qsb->Inode, &sb->Inode, sizeof(sb->Inode));
31 memcpy(&qsb->Bitmap, &sb->Bitmap, sizeof(sb->Bitmap));
32 memcpy(&qsb->Longfile, &sb->Longfile, sizeof(sb->Longfile));
33}
34
35struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
36{
37 struct buffer_head *bh1, *bh2 = NULL;
38 struct qnx6_mmi_super_block *sb1, *sb2;
39 struct qnx6_super_block *qsb = NULL;
40 struct qnx6_sb_info *sbi;
41 __u64 offset;
42
43 /* Check the superblock signatures
44 start with the first superblock */
45 bh1 = sb_bread(s, 0);
46 if (!bh1) {
47 printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
48 return NULL;
49 }
50 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
51 sbi = QNX6_SB(s);
52 if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
53 if (!silent) {
54 printk(KERN_ERR "qnx6: wrong signature (magic) in"
55 " superblock #1.\n");
56 goto out;
57 }
58 }
59
60 /* checksum check - start at byte 8 and end at byte 512 */
61 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
62 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
63 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
64 goto out;
65 }
66
67 /* calculate second superblock blocknumber */
68 offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) + QNX6_SUPERBLOCK_AREA /
69 fs32_to_cpu(sbi, sb1->sb_blocksize);
70
71 /* set new blocksize */
72 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
73 printk(KERN_ERR "qnx6: unable to set blocksize\n");
74 goto out;
75 }
76 /* blocksize invalidates bh - pull it back in */
77 brelse(bh1);
78 bh1 = sb_bread(s, 0);
79 if (!bh1)
80 goto out;
81 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
82
83 /* read second superblock */
84 bh2 = sb_bread(s, offset);
85 if (!bh2) {
86 printk(KERN_ERR "qnx6: unable to read the second superblock\n");
87 goto out;
88 }
89 sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
90 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
91 if (!silent)
92 printk(KERN_ERR "qnx6: wrong signature (magic) in"
93 " superblock #2.\n");
94 goto out;
95 }
96
97 /* checksum check - start at byte 8 and end at byte 512 */
98 if (fs32_to_cpu(sbi, sb2->sb_checksum)
99 != crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
100 printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
101 goto out;
102 }
103
104 qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
105 if (!qsb) {
106 printk(KERN_ERR "qnx6: unable to allocate memory.\n");
107 goto out;
108 }
109
110 if (fs64_to_cpu(sbi, sb1->sb_serial) >
111 fs64_to_cpu(sbi, sb2->sb_serial)) {
112 /* superblock #1 active */
113 qnx6_mmi_copy_sb(qsb, sb1);
114#ifdef CONFIG_QNX6FS_DEBUG
115 qnx6_superblock_debug(qsb, s);
116#endif
117 memcpy(bh1->b_data, qsb, sizeof(struct qnx6_super_block));
118
119 sbi->sb_buf = bh1;
120 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
121 brelse(bh2);
122 printk(KERN_INFO "qnx6: superblock #1 active\n");
123 } else {
124 /* superblock #2 active */
125 qnx6_mmi_copy_sb(qsb, sb2);
126#ifdef CONFIG_QNX6FS_DEBUG
127 qnx6_superblock_debug(qsb, s);
128#endif
129 memcpy(bh2->b_data, qsb, sizeof(struct qnx6_super_block));
130
131 sbi->sb_buf = bh2;
132 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
133 brelse(bh1);
134 printk(KERN_INFO "qnx6: superblock #2 active\n");
135 }
136 kfree(qsb);
137
138 /* offset for mmi_fs is just SUPERBLOCK_AREA bytes */
139 sbi->s_blks_off = QNX6_SUPERBLOCK_AREA / s->s_blocksize;
140
141 /* success */
142 return sbi->sb;
143
144out:
145 if (bh1 != NULL)
146 brelse(bh1);
147 if (bh2 != NULL)
148 brelse(bh2);
149 return NULL;
150}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 46741970371b..8b4f12b33f57 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -71,6 +71,7 @@
71#include <linux/module.h> 71#include <linux/module.h>
72#include <linux/proc_fs.h> 72#include <linux/proc_fs.h>
73#include <linux/security.h> 73#include <linux/security.h>
74#include <linux/sched.h>
74#include <linux/kmod.h> 75#include <linux/kmod.h>
75#include <linux/namei.h> 76#include <linux/namei.h>
76#include <linux/capability.h> 77#include <linux/capability.h>
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 7898cd688a00..fc2c4388d126 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -292,11 +292,26 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
292 } 292 }
293} 293}
294 294
295/* Return 1 if 'cmd' will block on frozen filesystem */
296static int quotactl_cmd_write(int cmd)
297{
298 switch (cmd) {
299 case Q_GETFMT:
300 case Q_GETINFO:
301 case Q_SYNC:
302 case Q_XGETQSTAT:
303 case Q_XGETQUOTA:
304 case Q_XQUOTASYNC:
305 return 0;
306 }
307 return 1;
308}
309
295/* 310/*
296 * look up a superblock on which quota ops will be performed 311 * look up a superblock on which quota ops will be performed
297 * - use the name of a block device to find the superblock thereon 312 * - use the name of a block device to find the superblock thereon
298 */ 313 */
299static struct super_block *quotactl_block(const char __user *special) 314static struct super_block *quotactl_block(const char __user *special, int cmd)
300{ 315{
301#ifdef CONFIG_BLOCK 316#ifdef CONFIG_BLOCK
302 struct block_device *bdev; 317 struct block_device *bdev;
@@ -309,7 +324,10 @@ static struct super_block *quotactl_block(const char __user *special)
309 putname(tmp); 324 putname(tmp);
310 if (IS_ERR(bdev)) 325 if (IS_ERR(bdev))
311 return ERR_CAST(bdev); 326 return ERR_CAST(bdev);
312 sb = get_super(bdev); 327 if (quotactl_cmd_write(cmd))
328 sb = get_super_thawed(bdev);
329 else
330 sb = get_super(bdev);
313 bdput(bdev); 331 bdput(bdev);
314 if (!sb) 332 if (!sb)
315 return ERR_PTR(-ENODEV); 333 return ERR_PTR(-ENODEV);
@@ -361,7 +379,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
361 pathp = &path; 379 pathp = &path;
362 } 380 }
363 381
364 sb = quotactl_block(special); 382 sb = quotactl_block(special, cmds);
365 if (IS_ERR(sb)) { 383 if (IS_ERR(sb)) {
366 ret = PTR_ERR(sb); 384 ret = PTR_ERR(sb);
367 goto out; 385 goto out;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index aec766abe3af..a1fdabe21dec 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -209,22 +209,19 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
209int ramfs_fill_super(struct super_block *sb, void *data, int silent) 209int ramfs_fill_super(struct super_block *sb, void *data, int silent)
210{ 210{
211 struct ramfs_fs_info *fsi; 211 struct ramfs_fs_info *fsi;
212 struct inode *inode = NULL; 212 struct inode *inode;
213 struct dentry *root;
214 int err; 213 int err;
215 214
216 save_mount_options(sb, data); 215 save_mount_options(sb, data);
217 216
218 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); 217 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
219 sb->s_fs_info = fsi; 218 sb->s_fs_info = fsi;
220 if (!fsi) { 219 if (!fsi)
221 err = -ENOMEM; 220 return -ENOMEM;
222 goto fail;
223 }
224 221
225 err = ramfs_parse_options(data, &fsi->mount_opts); 222 err = ramfs_parse_options(data, &fsi->mount_opts);
226 if (err) 223 if (err)
227 goto fail; 224 return err;
228 225
229 sb->s_maxbytes = MAX_LFS_FILESIZE; 226 sb->s_maxbytes = MAX_LFS_FILESIZE;
230 sb->s_blocksize = PAGE_CACHE_SIZE; 227 sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -234,24 +231,11 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent)
234 sb->s_time_gran = 1; 231 sb->s_time_gran = 1;
235 232
236 inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); 233 inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
237 if (!inode) { 234 sb->s_root = d_make_root(inode);
238 err = -ENOMEM; 235 if (!sb->s_root)
239 goto fail; 236 return -ENOMEM;
240 }
241
242 root = d_alloc_root(inode);
243 sb->s_root = root;
244 if (!root) {
245 err = -ENOMEM;
246 goto fail;
247 }
248 237
249 return 0; 238 return 0;
250fail:
251 kfree(fsi);
252 sb->s_fs_info = NULL;
253 iput(inode);
254 return err;
255} 239}
256 240
257struct dentry *ramfs_mount(struct file_system_type *fs_type, 241struct dentry *ramfs_mount(struct file_system_type *fs_type,
diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h
new file mode 100644
index 000000000000..f096b80e73d8
--- /dev/null
+++ b/fs/reiserfs/acl.h
@@ -0,0 +1,76 @@
1#include <linux/init.h>
2#include <linux/posix_acl.h>
3
4#define REISERFS_ACL_VERSION 0x0001
5
6typedef struct {
7 __le16 e_tag;
8 __le16 e_perm;
9 __le32 e_id;
10} reiserfs_acl_entry;
11
12typedef struct {
13 __le16 e_tag;
14 __le16 e_perm;
15} reiserfs_acl_entry_short;
16
17typedef struct {
18 __le32 a_version;
19} reiserfs_acl_header;
20
21static inline size_t reiserfs_acl_size(int count)
22{
23 if (count <= 4) {
24 return sizeof(reiserfs_acl_header) +
25 count * sizeof(reiserfs_acl_entry_short);
26 } else {
27 return sizeof(reiserfs_acl_header) +
28 4 * sizeof(reiserfs_acl_entry_short) +
29 (count - 4) * sizeof(reiserfs_acl_entry);
30 }
31}
32
33static inline int reiserfs_acl_count(size_t size)
34{
35 ssize_t s;
36 size -= sizeof(reiserfs_acl_header);
37 s = size - 4 * sizeof(reiserfs_acl_entry_short);
38 if (s < 0) {
39 if (size % sizeof(reiserfs_acl_entry_short))
40 return -1;
41 return size / sizeof(reiserfs_acl_entry_short);
42 } else {
43 if (s % sizeof(reiserfs_acl_entry))
44 return -1;
45 return s / sizeof(reiserfs_acl_entry) + 4;
46 }
47}
48
49#ifdef CONFIG_REISERFS_FS_POSIX_ACL
50struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
51int reiserfs_acl_chmod(struct inode *inode);
52int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
53 struct inode *dir, struct dentry *dentry,
54 struct inode *inode);
55int reiserfs_cache_default_acl(struct inode *dir);
56extern const struct xattr_handler reiserfs_posix_acl_default_handler;
57extern const struct xattr_handler reiserfs_posix_acl_access_handler;
58
59#else
60
61#define reiserfs_cache_default_acl(inode) 0
62#define reiserfs_get_acl NULL
63
64static inline int reiserfs_acl_chmod(struct inode *inode)
65{
66 return 0;
67}
68
69static inline int
70reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
71 const struct inode *dir, struct dentry *dentry,
72 struct inode *inode)
73{
74 return 0;
75}
76#endif
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 70de42f09f1d..4c0c7d163d15 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -4,14 +4,12 @@
4/* Reiserfs block (de)allocator, bitmap-based. */ 4/* Reiserfs block (de)allocator, bitmap-based. */
5 5
6#include <linux/time.h> 6#include <linux/time.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/vmalloc.h> 12#include <linux/vmalloc.h>
13#include <linux/reiserfs_fs_sb.h>
14#include <linux/reiserfs_fs_i.h>
15#include <linux/quotaops.h> 13#include <linux/quotaops.h>
16#include <linux/seq_file.h> 14#include <linux/seq_file.h>
17 15
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 133e9355dc6f..66c53b642a88 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -5,7 +5,7 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/errno.h> 6#include <linux/errno.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/stat.h> 9#include <linux/stat.h>
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 60c080440661..2b7882b508db 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -17,7 +17,7 @@
17 17
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/reiserfs_fs.h> 20#include "reiserfs.h"
21#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23 23
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ace635053a36..8375c922c0d5 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -3,9 +3,9 @@
3 */ 3 */
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/reiserfs_fs.h> 6#include "reiserfs.h"
7#include <linux/reiserfs_acl.h> 7#include "acl.h"
8#include <linux/reiserfs_xattr.h> 8#include "xattr.h"
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/swap.h> 11#include <linux/swap.h>
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 1e4250bc3a6f..430e0658704c 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -37,7 +37,7 @@
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/reiserfs_fs.h> 40#include "reiserfs.h"
41#include <linux/buffer_head.h> 41#include <linux/buffer_head.h>
42 42
43/* To make any changes in the tree we find a node, that contains item 43/* To make any changes in the tree we find a node, that contains item
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 6471c670743e..91b0cc1242a2 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -19,7 +19,7 @@
19// 19//
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/reiserfs_fs.h> 22#include "reiserfs.h"
23#include <asm/types.h> 23#include <asm/types.h>
24 24
25#define DELTA 0x9E3779B9 25#define DELTA 0x9E3779B9
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 2074fd95046b..e1978fd895f5 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -5,7 +5,7 @@
5#include <asm/uaccess.h> 5#include <asm/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* this is one and only function that is used outside (do_balance.c) */ 11/* this is one and only function that is used outside (do_balance.c) */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9e8cd5acd79c..494c315c7417 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -4,9 +4,9 @@
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/reiserfs_acl.h> 8#include "acl.h"
9#include <linux/reiserfs_xattr.h> 9#include "xattr.h"
10#include <linux/exportfs.h> 10#include <linux/exportfs.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/highmem.h> 12#include <linux/highmem.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 950e3d1b5c9e..0c2185042d5f 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -5,7 +5,7 @@
5#include <linux/capability.h> 5#include <linux/capability.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/time.h> 9#include <linux/time.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 72cb1cc51b87..ee382ef3d300 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -3,7 +3,7 @@
3 */ 3 */
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/reiserfs_fs.h> 6#include "reiserfs.h"
7 7
8// this contains item handlers for old item types: sd, direct, 8// this contains item handlers for old item types: sd, direct,
9// indirect, directory 9// indirect, directory
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c3cf54fd4de3..cf9f4de00a95 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -37,7 +37,7 @@
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/semaphore.h> 38#include <linux/semaphore.h>
39#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
40#include <linux/reiserfs_fs.h> 40#include "reiserfs.h"
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/fcntl.h> 43#include <linux/fcntl.h>
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 03d85cbf90bf..79e5a8b4c226 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -5,7 +5,7 @@
5#include <asm/uaccess.h> 5#include <asm/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* these are used in do_balance.c */ 11/* these are used in do_balance.c */
@@ -975,7 +975,7 @@ static int leaf_cut_entries(struct buffer_head *bh,
975 remove */ 975 remove */
976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); 976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
977 RFALSE(I_ENTRY_COUNT(ih) < from + del_count, 977 RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
978 "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", 978 "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
979 I_ENTRY_COUNT(ih), from, del_count); 979 I_ENTRY_COUNT(ih), from, del_count);
980 980
981 if (del_count == 0) 981 if (del_count == 0)
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index 7df1ce48203a..d735bc8470e3 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -1,4 +1,4 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/mutex.h> 2#include <linux/mutex.h>
3 3
4/* 4/*
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 146378865239..84e8a69cee9d 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -14,9 +14,9 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/reiserfs_fs.h> 17#include "reiserfs.h"
18#include <linux/reiserfs_acl.h> 18#include "acl.h"
19#include <linux/reiserfs_xattr.h> 19#include "xattr.h"
20#include <linux/quotaops.h> 20#include <linux/quotaops.h>
21 21
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 3a6de810bd61..f732d6a5251d 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -5,8 +5,7 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/random.h> 6#include <linux/random.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9#include <linux/reiserfs_fs_sb.h>
10 9
11// find where objectid map starts 10// find where objectid map starts
12#define objectid_map(s,rs) (old_format_only (s) ? \ 11#define objectid_map(s,rs) (old_format_only (s) ? \
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 45de98b59466..c0b1112ab7e3 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -4,7 +4,7 @@
4 4
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/reiserfs_fs.h> 7#include "reiserfs.h"
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
@@ -329,7 +329,7 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it 329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
330 pointless complexity): 330 pointless complexity):
331 331
332 panics in reiserfs_fs.h have numbers from 1000 to 1999 332 panics in reiserfs.h have numbers from 1000 to 1999
333 super.c 2000 to 2999 333 super.c 2000 to 2999
334 preserve.c (unused) 3000 to 3999 334 preserve.c (unused) 3000 to 3999
335 bitmap.c 4000 to 4999 335 bitmap.c 4000 to 4999
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 7a9981196c1c..2c1ade692cc8 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -12,8 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <linux/reiserfs_fs.h> 15#include "reiserfs.h"
16#include <linux/reiserfs_fs_sb.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
19 18
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
new file mode 100644
index 000000000000..445d768eea44
--- /dev/null
+++ b/fs/reiserfs/reiserfs.h
@@ -0,0 +1,2922 @@
1/*
2 * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
3 */
4
5#include <linux/reiserfs_fs.h>
6
7#include <linux/slab.h>
8#include <linux/interrupt.h>
9#include <linux/sched.h>
10#include <linux/workqueue.h>
11#include <asm/unaligned.h>
12#include <linux/bitops.h>
13#include <linux/proc_fs.h>
14#include <linux/buffer_head.h>
15
16/* the 32 bit compat definitions with int argument */
17#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
18#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
19#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
20#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
21#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
22
23struct reiserfs_journal_list;
24
25/** bitmasks for i_flags field in reiserfs-specific part of inode */
26typedef enum {
27 /** this says what format of key do all items (but stat data) of
28 an object have. If this is set, that format is 3.6 otherwise
29 - 3.5 */
30 i_item_key_version_mask = 0x0001,
31 /** If this is unset, object has 3.5 stat data, otherwise, it has
32 3.6 stat data with 64bit size, 32bit nlink etc. */
33 i_stat_data_version_mask = 0x0002,
34 /** file might need tail packing on close */
35 i_pack_on_close_mask = 0x0004,
36 /** don't pack tail of file */
37 i_nopack_mask = 0x0008,
38 /** If those is set, "safe link" was created for this file during
39 truncate or unlink. Safe link is used to avoid leakage of disk
40 space on crash with some files open, but unlinked. */
41 i_link_saved_unlink_mask = 0x0010,
42 i_link_saved_truncate_mask = 0x0020,
43 i_has_xattr_dir = 0x0040,
44 i_data_log = 0x0080,
45} reiserfs_inode_flags;
46
47struct reiserfs_inode_info {
48 __u32 i_key[4]; /* key is still 4 32 bit integers */
49 /** transient inode flags that are never stored on disk. Bitmasks
50 for this field are defined above. */
51 __u32 i_flags;
52
53 __u32 i_first_direct_byte; // offset of first byte stored in direct item.
54
55 /* copy of persistent inode flags read from sd_attrs. */
56 __u32 i_attrs;
57
58 int i_prealloc_block; /* first unused block of a sequence of unused blocks */
59 int i_prealloc_count; /* length of that sequence */
60 struct list_head i_prealloc_list; /* per-transaction list of inodes which
61 * have preallocated blocks */
62
63 unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks
64 * for the contents of this directory should be
65 * displaced */
66
67 /* we use these for fsync or O_SYNC to decide which transaction
68 ** needs to be committed in order for this inode to be properly
69 ** flushed */
70 unsigned int i_trans_id;
71 struct reiserfs_journal_list *i_jl;
72 atomic_t openers;
73 struct mutex tailpack;
74#ifdef CONFIG_REISERFS_FS_XATTR
75 struct rw_semaphore i_xattr_sem;
76#endif
77 struct inode vfs_inode;
78};
79
80typedef enum {
81 reiserfs_attrs_cleared = 0x00000001,
82} reiserfs_super_block_flags;
83
84/* struct reiserfs_super_block accessors/mutators
85 * since this is a disk structure, it will always be in
86 * little endian format. */
87#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count))
88#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
89#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks))
90#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v))
91#define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block))
92#define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v))
93
94#define sb_jp_journal_1st_block(sbp) \
95 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block))
96#define set_sb_jp_journal_1st_block(sbp,v) \
97 ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v))
98#define sb_jp_journal_dev(sbp) \
99 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev))
100#define set_sb_jp_journal_dev(sbp,v) \
101 ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v))
102#define sb_jp_journal_size(sbp) \
103 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size))
104#define set_sb_jp_journal_size(sbp,v) \
105 ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v))
106#define sb_jp_journal_trans_max(sbp) \
107 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max))
108#define set_sb_jp_journal_trans_max(sbp,v) \
109 ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v))
110#define sb_jp_journal_magic(sbp) \
111 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic))
112#define set_sb_jp_journal_magic(sbp,v) \
113 ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v))
114#define sb_jp_journal_max_batch(sbp) \
115 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch))
116#define set_sb_jp_journal_max_batch(sbp,v) \
117 ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v))
118#define sb_jp_jourmal_max_commit_age(sbp) \
119 (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age))
120#define set_sb_jp_journal_max_commit_age(sbp,v) \
121 ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v))
122
123#define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize))
124#define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v))
125#define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize))
126#define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v))
127#define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize))
128#define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v))
129#define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state))
130#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v))
131#define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state))
132#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v))
133#define sb_hash_function_code(sbp) \
134 (le32_to_cpu((sbp)->s_v1.s_hash_function_code))
135#define set_sb_hash_function_code(sbp,v) \
136 ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v))
137#define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height))
138#define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v))
139#define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr))
140#define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v))
141#define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version))
142#define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v))
143
144#define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count))
145#define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v))
146
147#define sb_reserved_for_journal(sbp) \
148 (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal))
149#define set_sb_reserved_for_journal(sbp,v) \
150 ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v))
151
152/* LOGGING -- */
153
154/* These all interelate for performance.
155**
156** If the journal block count is smaller than n transactions, you lose speed.
157** I don't know what n is yet, I'm guessing 8-16.
158**
159** typical transaction size depends on the application, how often fsync is
160** called, and how many metadata blocks you dirty in a 30 second period.
161** The more small files (<16k) you use, the larger your transactions will
162** be.
163**
164** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
165** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough
166** to prevent wrapping before dirty meta blocks get to disk.
167**
168** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
169** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
170**
171** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
172**
173*/
174
175/* don't mess with these for a while */
176 /* we have a node size define somewhere in reiserfs_fs.h. -Hans */
177#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */
178#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
179#define JOURNAL_HASH_SIZE 8192
180#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */
181
182/* One of these for every block in every transaction
183** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a
184** hash of all the in memory transactions.
185** next and prev are used by the current transaction (journal_hash).
186** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash
187** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging
188** to a given transaction.
189*/
190struct reiserfs_journal_cnode {
191 struct buffer_head *bh; /* real buffer head */
192 struct super_block *sb; /* dev of real buffer head */
193 __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */
194 unsigned long state;
195 struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */
196 struct reiserfs_journal_cnode *next; /* next in transaction list */
197 struct reiserfs_journal_cnode *prev; /* prev in transaction list */
198 struct reiserfs_journal_cnode *hprev; /* prev in hash list */
199 struct reiserfs_journal_cnode *hnext; /* next in hash list */
200};
201
202struct reiserfs_bitmap_node {
203 int id;
204 char *data;
205 struct list_head list;
206};
207
208struct reiserfs_list_bitmap {
209 struct reiserfs_journal_list *journal_list;
210 struct reiserfs_bitmap_node **bitmaps;
211};
212
213/*
214** one of these for each transaction. The most important part here is the j_realblock.
215** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
216** real buffer heads dirty once all the commits hit the disk,
217** and to make sure every real block in a transaction is on disk before allowing the log area
218** to be overwritten */
219struct reiserfs_journal_list {
220 unsigned long j_start;
221 unsigned long j_state;
222 unsigned long j_len;
223 atomic_t j_nonzerolen;
224 atomic_t j_commit_left;
225 atomic_t j_older_commits_done; /* all commits older than this on disk */
226 struct mutex j_commit_mutex;
227 unsigned int j_trans_id;
228 time_t j_timestamp;
229 struct reiserfs_list_bitmap *j_list_bitmap;
230 struct buffer_head *j_commit_bh; /* commit buffer head */
231 struct reiserfs_journal_cnode *j_realblock;
232 struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */
233 /* time ordered list of all active transactions */
234 struct list_head j_list;
235
236 /* time ordered list of all transactions we haven't tried to flush yet */
237 struct list_head j_working_list;
238
239 /* list of tail conversion targets in need of flush before commit */
240 struct list_head j_tail_bh_list;
241 /* list of data=ordered buffers in need of flush before commit */
242 struct list_head j_bh_list;
243 int j_refcount;
244};
245
246struct reiserfs_journal {
247 struct buffer_head **j_ap_blocks; /* journal blocks on disk */
248 struct reiserfs_journal_cnode *j_last; /* newest journal block */
249 struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */
250
251 struct block_device *j_dev_bd;
252 fmode_t j_dev_mode;
253 int j_1st_reserved_block; /* first block on s_dev of reserved area journal */
254
255 unsigned long j_state;
256 unsigned int j_trans_id;
257 unsigned long j_mount_id;
258 unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */
259 unsigned long j_len; /* length of current waiting commit */
260 unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */
261 atomic_t j_wcount; /* count of writers for current commit */
262 unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */
263 unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */
264 unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */
265 struct buffer_head *j_header_bh;
266
267 time_t j_trans_start_time; /* time this transaction started */
268 struct mutex j_mutex;
269 struct mutex j_flush_mutex;
270 wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */
271 atomic_t j_jlock; /* lock for j_join_wait */
272 int j_list_bitmap_index; /* number of next list bitmap to use */
273 int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */
274 int j_next_full_flush; /* next journal_end will flush all journal list */
275 int j_next_async_flush; /* next journal_end will flush all async commits */
276
277 int j_cnode_used; /* number of cnodes on the used list */
278 int j_cnode_free; /* number of cnodes on the free list */
279
280 unsigned int j_trans_max; /* max number of blocks in a transaction. */
281 unsigned int j_max_batch; /* max number of blocks to batch into a trans */
282 unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */
283 unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */
284 unsigned int j_default_max_commit_age; /* the default for the max commit age */
285
286 struct reiserfs_journal_cnode *j_cnode_free_list;
287 struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */
288
289 struct reiserfs_journal_list *j_current_jl;
290 int j_free_bitmap_nodes;
291 int j_used_bitmap_nodes;
292
293 int j_num_lists; /* total number of active transactions */
294 int j_num_work_lists; /* number that need attention from kreiserfsd */
295
296 /* debugging to make sure things are flushed in order */
297 unsigned int j_last_flush_id;
298
299 /* debugging to make sure things are committed in order */
300 unsigned int j_last_commit_id;
301
302 struct list_head j_bitmap_nodes;
303 struct list_head j_dirty_buffers;
304 spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */
305
306 /* list of all active transactions */
307 struct list_head j_journal_list;
308 /* lists that haven't been touched by writeback attempts */
309 struct list_head j_working_list;
310
311 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */
312 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */
313 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all
314 the transactions */
315 struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */
316 int j_persistent_trans;
317 unsigned long j_max_trans_size;
318 unsigned long j_max_batch_size;
319
320 int j_errno;
321
322 /* when flushing ordered buffers, throttle new ordered writers */
323 struct delayed_work j_work;
324 struct super_block *j_work_sb;
325 atomic_t j_async_throttle;
326};
327
328enum journal_state_bits {
329 J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */
330 J_WRITERS_QUEUED, /* set when log is full due to too many writers */
331 J_ABORTED, /* set when log is aborted */
332};
333
334#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */
335
336typedef __u32(*hashf_t) (const signed char *, int);
337
338struct reiserfs_bitmap_info {
339 __u32 free_count;
340};
341
342struct proc_dir_entry;
343
344#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
345typedef unsigned long int stat_cnt_t;
346typedef struct reiserfs_proc_info_data {
347 spinlock_t lock;
348 int exiting;
349 int max_hash_collisions;
350
351 stat_cnt_t breads;
352 stat_cnt_t bread_miss;
353 stat_cnt_t search_by_key;
354 stat_cnt_t search_by_key_fs_changed;
355 stat_cnt_t search_by_key_restarted;
356
357 stat_cnt_t insert_item_restarted;
358 stat_cnt_t paste_into_item_restarted;
359 stat_cnt_t cut_from_item_restarted;
360 stat_cnt_t delete_solid_item_restarted;
361 stat_cnt_t delete_item_restarted;
362
363 stat_cnt_t leaked_oid;
364 stat_cnt_t leaves_removable;
365
366 /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */
367 stat_cnt_t balance_at[5]; /* XXX */
368 /* sbk == search_by_key */
369 stat_cnt_t sbk_read_at[5]; /* XXX */
370 stat_cnt_t sbk_fs_changed[5];
371 stat_cnt_t sbk_restarted[5];
372 stat_cnt_t items_at[5]; /* XXX */
373 stat_cnt_t free_at[5]; /* XXX */
374 stat_cnt_t can_node_be_removed[5]; /* XXX */
375 long int lnum[5]; /* XXX */
376 long int rnum[5]; /* XXX */
377 long int lbytes[5]; /* XXX */
378 long int rbytes[5]; /* XXX */
379 stat_cnt_t get_neighbors[5];
380 stat_cnt_t get_neighbors_restart[5];
381 stat_cnt_t need_l_neighbor[5];
382 stat_cnt_t need_r_neighbor[5];
383
384 stat_cnt_t free_block;
385 struct __scan_bitmap_stats {
386 stat_cnt_t call;
387 stat_cnt_t wait;
388 stat_cnt_t bmap;
389 stat_cnt_t retry;
390 stat_cnt_t in_journal_hint;
391 stat_cnt_t in_journal_nohint;
392 stat_cnt_t stolen;
393 } scan_bitmap;
394 struct __journal_stats {
395 stat_cnt_t in_journal;
396 stat_cnt_t in_journal_bitmap;
397 stat_cnt_t in_journal_reusable;
398 stat_cnt_t lock_journal;
399 stat_cnt_t lock_journal_wait;
400 stat_cnt_t journal_being;
401 stat_cnt_t journal_relock_writers;
402 stat_cnt_t journal_relock_wcount;
403 stat_cnt_t mark_dirty;
404 stat_cnt_t mark_dirty_already;
405 stat_cnt_t mark_dirty_notjournal;
406 stat_cnt_t restore_prepared;
407 stat_cnt_t prepare;
408 stat_cnt_t prepare_retry;
409 } journal;
410} reiserfs_proc_info_data_t;
411#else
412typedef struct reiserfs_proc_info_data {
413} reiserfs_proc_info_data_t;
414#endif
415
416/* reiserfs union of in-core super block data */
417struct reiserfs_sb_info {
418 struct buffer_head *s_sbh; /* Buffer containing the super block */
419 /* both the comment and the choice of
420 name are unclear for s_rs -Hans */
421 struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */
422 struct reiserfs_bitmap_info *s_ap_bitmap;
423 struct reiserfs_journal *s_journal; /* pointer to journal information */
424 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
425
426 /* Serialize writers access, replace the old bkl */
427 struct mutex lock;
428 /* Owner of the lock (can be recursive) */
429 struct task_struct *lock_owner;
430 /* Depth of the lock, start from -1 like the bkl */
431 int lock_depth;
432
433 /* Comment? -Hans */
434 void (*end_io_handler) (struct buffer_head *, int);
435 hashf_t s_hash_function; /* pointer to function which is used
436 to sort names in directory. Set on
437 mount */
438 unsigned long s_mount_opt; /* reiserfs's mount options are set
439 here (currently - NOTAIL, NOLOG,
440 REPLAYONLY) */
441
442 struct { /* This is a structure that describes block allocator options */
443 unsigned long bits; /* Bitfield for enable/disable kind of options */
444 unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */
445 int border; /* percentage of disk, border takes */
446 int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */
447 int preallocsize; /* Number of blocks we try to prealloc when file
448 reaches preallocmin size (in blocks) or
449 prealloc_list is empty. */
450 } s_alloc_options;
451
452 /* Comment? -Hans */
453 wait_queue_head_t s_wait;
454 /* To be obsoleted soon by per buffer seals.. -Hans */
455 atomic_t s_generation_counter; // increased by one every time the
456 // tree gets re-balanced
457 unsigned long s_properties; /* File system properties. Currently holds
458 on-disk FS format */
459
460 /* session statistics */
461 int s_disk_reads;
462 int s_disk_writes;
463 int s_fix_nodes;
464 int s_do_balance;
465 int s_unneeded_left_neighbor;
466 int s_good_search_by_key_reada;
467 int s_bmaps;
468 int s_bmaps_without_search;
469 int s_direct2indirect;
470 int s_indirect2direct;
471 /* set up when it's ok for reiserfs_read_inode2() to read from
472 disk inode with nlink==0. Currently this is only used during
473 finish_unfinished() processing at mount time */
474 int s_is_unlinked_ok;
475 reiserfs_proc_info_data_t s_proc_info_data;
476 struct proc_dir_entry *procdir;
477 int reserved_blocks; /* amount of blocks reserved for further allocations */
478 spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */
479 struct dentry *priv_root; /* root of /.reiserfs_priv */
480 struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */
481 int j_errno;
482#ifdef CONFIG_QUOTA
483 char *s_qf_names[MAXQUOTAS];
484 int s_jquota_fmt;
485#endif
486 char *s_jdev; /* Stored jdev for mount option showing */
487#ifdef CONFIG_REISERFS_CHECK
488
489 struct tree_balance *cur_tb; /*
490 * Detects whether more than one
491 * copy of tb exists per superblock
492 * as a means of checking whether
493 * do_balance is executing concurrently
494 * against another tree reader/writer
495 * on a same mount point.
496 */
497#endif
498};
499
500/* Definitions of reiserfs on-disk properties: */
501#define REISERFS_3_5 0
502#define REISERFS_3_6 1
503#define REISERFS_OLD_FORMAT 2
504
505enum reiserfs_mount_options {
506/* Mount options */
507 REISERFS_LARGETAIL, /* large tails will be created in a session */
508 REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */
509 REPLAYONLY, /* replay journal and return 0. Use by fsck */
510 REISERFS_CONVERT, /* -o conv: causes conversion of old
511 format super block to the new
512 format. If not specified - old
513 partition will be dealt with in a
514 manner of 3.5.x */
515
516/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
517** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option
518** is not required. If the normal autodection code can't determine which
519** hash to use (because both hashes had the same value for a file)
520** use this option to force a specific hash. It won't allow you to override
521** the existing hash on the FS, so if you have a tea hash disk, and mount
522** with -o hash=rupasov, the mount will fail.
523*/
524 FORCE_TEA_HASH, /* try to force tea hash on mount */
525 FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */
526 FORCE_R5_HASH, /* try to force rupasov hash on mount */
527 FORCE_HASH_DETECT, /* try to detect hash function on mount */
528
529 REISERFS_DATA_LOG,
530 REISERFS_DATA_ORDERED,
531 REISERFS_DATA_WRITEBACK,
532
533/* used for testing experimental features, makes benchmarking new
534 features with and without more convenient, should never be used by
535 users in any code shipped to users (ideally) */
536
537 REISERFS_NO_BORDER,
538 REISERFS_NO_UNHASHED_RELOCATION,
539 REISERFS_HASHED_RELOCATION,
540 REISERFS_ATTRS,
541 REISERFS_XATTRS_USER,
542 REISERFS_POSIXACL,
543 REISERFS_EXPOSE_PRIVROOT,
544 REISERFS_BARRIER_NONE,
545 REISERFS_BARRIER_FLUSH,
546
547 /* Actions on error */
548 REISERFS_ERROR_PANIC,
549 REISERFS_ERROR_RO,
550 REISERFS_ERROR_CONTINUE,
551
552 REISERFS_USRQUOTA, /* User quota option specified */
553 REISERFS_GRPQUOTA, /* Group quota option specified */
554
555 REISERFS_TEST1,
556 REISERFS_TEST2,
557 REISERFS_TEST3,
558 REISERFS_TEST4,
559 REISERFS_UNSUPPORTED_OPT,
560};
561
562#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH))
563#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH))
564#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH))
565#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT))
566#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER))
567#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
568#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
569#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4))
570
571#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL))
572#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL))
573#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY))
574#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS))
575#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5))
576#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT))
577#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG))
578#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED))
579#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
580#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
581#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
582#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
583#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
584#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
585#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
586
587#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC))
588#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO))
589
590void reiserfs_file_buffer(struct buffer_head *bh, int list);
591extern struct file_system_type reiserfs_fs_type;
592int reiserfs_resize(struct super_block *, unsigned long);
593
594#define CARRY_ON 0
595#define SCHEDULE_OCCURRED 1
596
597#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh)
598#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal)
599#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
600#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
601#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap)
602
603#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->)
604
605/* A safe version of the "bdevname", which returns the "s_id" field of
606 * a superblock or else "Null superblock" if the super block is NULL.
607 */
608static inline char *reiserfs_bdevname(struct super_block *s)
609{
610 return (s == NULL) ? "Null superblock" : s->s_id;
611}
612
613#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal)))
614static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
615 *journal)
616{
617 return test_bit(J_ABORTED, &journal->j_state);
618}
619
620/*
621 * Locking primitives. The write lock is a per superblock
622 * special mutex that has properties close to the Big Kernel Lock
623 * which was used in the previous locking scheme.
624 */
625void reiserfs_write_lock(struct super_block *s);
626void reiserfs_write_unlock(struct super_block *s);
627int reiserfs_write_lock_once(struct super_block *s);
628void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
629
630#ifdef CONFIG_REISERFS_CHECK
631void reiserfs_lock_check_recursive(struct super_block *s);
632#else
633static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
634#endif
635
636/*
637 * Several mutexes depend on the write lock.
638 * However sometimes we want to relax the write lock while we hold
639 * these mutexes, according to the release/reacquire on schedule()
640 * properties of the Bkl that were used.
641 * Reiserfs performances and locking were based on this scheme.
642 * Now that the write lock is a mutex and not the bkl anymore, doing so
643 * may result in a deadlock:
644 *
645 * A acquire write_lock
646 * A acquire j_commit_mutex
647 * A release write_lock and wait for something
648 * B acquire write_lock
649 * B can't acquire j_commit_mutex and sleep
650 * A can't acquire write lock anymore
651 * deadlock
652 *
653 * What we do here is avoiding such deadlock by playing the same game
654 * than the Bkl: if we can't acquire a mutex that depends on the write lock,
655 * we release the write lock, wait a bit and then retry.
656 *
657 * The mutexes concerned by this hack are:
658 * - The commit mutex of a journal list
659 * - The flush mutex
660 * - The journal lock
661 * - The inode mutex
662 */
663static inline void reiserfs_mutex_lock_safe(struct mutex *m,
664 struct super_block *s)
665{
666 reiserfs_lock_check_recursive(s);
667 reiserfs_write_unlock(s);
668 mutex_lock(m);
669 reiserfs_write_lock(s);
670}
671
672static inline void
673reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
674 struct super_block *s)
675{
676 reiserfs_lock_check_recursive(s);
677 reiserfs_write_unlock(s);
678 mutex_lock_nested(m, subclass);
679 reiserfs_write_lock(s);
680}
681
682static inline void
683reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
684{
685 reiserfs_lock_check_recursive(s);
686 reiserfs_write_unlock(s);
687 down_read(sem);
688 reiserfs_write_lock(s);
689}
690
691/*
692 * When we schedule, we usually want to also release the write lock,
693 * according to the previous bkl based locking scheme of reiserfs.
694 */
695static inline void reiserfs_cond_resched(struct super_block *s)
696{
697 if (need_resched()) {
698 reiserfs_write_unlock(s);
699 schedule();
700 reiserfs_write_lock(s);
701 }
702}
703
704struct fid;
705
706/* in reading the #defines, it may help to understand that they employ
707 the following abbreviations:
708
709 B = Buffer
710 I = Item header
711 H = Height within the tree (should be changed to LEV)
712 N = Number of the item in the node
713 STAT = stat data
714 DEH = Directory Entry Header
715 EC = Entry Count
716 E = Entry number
717 UL = Unsigned Long
718 BLKH = BLocK Header
719 UNFM = UNForMatted node
720 DC = Disk Child
721 P = Path
722
723 These #defines are named by concatenating these abbreviations,
724 where first comes the arguments, and last comes the return value,
725 of the macro.
726
727*/
728
729#define USE_INODE_GENERATION_COUNTER
730
731#define REISERFS_PREALLOCATE
732#define DISPLACE_NEW_PACKING_LOCALITIES
733#define PREALLOCATION_SIZE 9
734
735/* n must be power of 2 */
736#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
737
738// to be ok for alpha and others we have to align structures to 8 byte
739// boundary.
740// FIXME: do not change 4 by anything else: there is code which relies on that
741#define ROUND_UP(x) _ROUND_UP(x,8LL)
742
743/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug
744** messages.
745*/
746#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */
747
748void __reiserfs_warning(struct super_block *s, const char *id,
749 const char *func, const char *fmt, ...);
750#define reiserfs_warning(s, id, fmt, args...) \
751 __reiserfs_warning(s, id, __func__, fmt, ##args)
752/* assertions handling */
753
754/** always check a condition and panic if it's false. */
755#define __RASSERT(cond, scond, format, args...) \
756do { \
757 if (!(cond)) \
758 reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
759 __FILE__ ":%i:%s: " format "\n", \
760 in_interrupt() ? -1 : task_pid_nr(current), \
761 __LINE__, __func__ , ##args); \
762} while (0)
763
764#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
765
766#if defined( CONFIG_REISERFS_CHECK )
767#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args)
768#else
769#define RFALSE( cond, format, args... ) do {;} while( 0 )
770#endif
771
772#define CONSTF __attribute_const__
773/*
774 * Disk Data Structures
775 */
776
777/***************************************************************************/
778/* SUPER BLOCK */
779/***************************************************************************/
780
781/*
782 * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs
783 * the version in RAM is part of a larger structure containing fields never written to disk.
784 */
785#define UNSET_HASH 0 // read_super will guess about, what hash names
786 // in directories were sorted with
787#define TEA_HASH 1
788#define YURA_HASH 2
789#define R5_HASH 3
790#define DEFAULT_HASH R5_HASH
791
792struct journal_params {
793 __le32 jp_journal_1st_block; /* where does journal start from on its
794 * device */
795 __le32 jp_journal_dev; /* journal device st_rdev */
796 __le32 jp_journal_size; /* size of the journal */
797 __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */
798 __le32 jp_journal_magic; /* random value made on fs creation (this
799 * was sb_journal_block_count) */
800 __le32 jp_journal_max_batch; /* max number of blocks to batch into a
801 * trans */
802 __le32 jp_journal_max_commit_age; /* in seconds, how old can an async
803 * commit be */
804 __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction
805 * be */
806};
807
808/* this is the super from 3.5.X, where X >= 10 */
809struct reiserfs_super_block_v1 {
810 __le32 s_block_count; /* blocks count */
811 __le32 s_free_blocks; /* free blocks count */
812 __le32 s_root_block; /* root block number */
813 struct journal_params s_journal;
814 __le16 s_blocksize; /* block size */
815 __le16 s_oid_maxsize; /* max size of object id array, see
816 * get_objectid() commentary */
817 __le16 s_oid_cursize; /* current size of object id array */
818 __le16 s_umount_state; /* this is set to 1 when filesystem was
819 * umounted, to 2 - when not */
820 char s_magic[10]; /* reiserfs magic string indicates that
821 * file system is reiserfs:
822 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
823 __le16 s_fs_state; /* it is set to used by fsck to mark which
824 * phase of rebuilding is done */
825 __le32 s_hash_function_code; /* indicate, what hash function is being use
826 * to sort names in a directory*/
827 __le16 s_tree_height; /* height of disk tree */
828 __le16 s_bmap_nr; /* amount of bitmap blocks needed to address
829 * each block of file system */
830 __le16 s_version; /* this field is only reliable on filesystem
831 * with non-standard journal */
832 __le16 s_reserved_for_journal; /* size in blocks of journal area on main
833 * device, we need to keep after
834 * making fs with non-standard journal */
835} __attribute__ ((__packed__));
836
837#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
838
839/* this is the on disk super block */
840struct reiserfs_super_block {
841 struct reiserfs_super_block_v1 s_v1;
842 __le32 s_inode_generation;
843 __le32 s_flags; /* Right now used only by inode-attributes, if enabled */
844 unsigned char s_uuid[16]; /* filesystem unique identifier */
845 unsigned char s_label[16]; /* filesystem volume label */
846 __le16 s_mnt_count; /* Count of mounts since last fsck */
847 __le16 s_max_mnt_count; /* Maximum mounts before check */
848 __le32 s_lastcheck; /* Timestamp of last fsck */
849 __le32 s_check_interval; /* Interval between checks */
850 char s_unused[76]; /* zero filled by mkreiserfs and
851 * reiserfs_convert_objectid_map_v1()
852 * so any additions must be updated
853 * there as well. */
854} __attribute__ ((__packed__));
855
856#define SB_SIZE (sizeof(struct reiserfs_super_block))
857
858#define REISERFS_VERSION_1 0
859#define REISERFS_VERSION_2 2
860
861// on-disk super block fields converted to cpu form
862#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
863#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
864#define SB_BLOCKSIZE(s) \
865 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize))
866#define SB_BLOCK_COUNT(s) \
867 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count))
868#define SB_FREE_BLOCKS(s) \
869 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks))
870#define SB_REISERFS_MAGIC(s) \
871 (SB_V1_DISK_SUPER_BLOCK(s)->s_magic)
872#define SB_ROOT_BLOCK(s) \
873 le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block))
874#define SB_TREE_HEIGHT(s) \
875 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height))
876#define SB_REISERFS_STATE(s) \
877 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state))
878#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version))
879#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr))
880
881#define PUT_SB_BLOCK_COUNT(s, val) \
882 do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
883#define PUT_SB_FREE_BLOCKS(s, val) \
884 do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
885#define PUT_SB_ROOT_BLOCK(s, val) \
886 do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
887#define PUT_SB_TREE_HEIGHT(s, val) \
888 do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
889#define PUT_SB_REISERFS_STATE(s, val) \
890 do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0)
891#define PUT_SB_VERSION(s, val) \
892 do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
893#define PUT_SB_BMAP_NR(s, val) \
894 do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
895
896#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal)
897#define SB_ONDISK_JOURNAL_SIZE(s) \
898 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size))
899#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \
900 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block))
901#define SB_ONDISK_JOURNAL_DEVICE(s) \
902 le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev))
903#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \
904 le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
905
906#define is_block_in_log_or_reserved_area(s, block) \
907 block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \
908 && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \
909 ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \
910 SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s)))
911
912int is_reiserfs_3_5(struct reiserfs_super_block *rs);
913int is_reiserfs_3_6(struct reiserfs_super_block *rs);
914int is_reiserfs_jr(struct reiserfs_super_block *rs);
915
916/* ReiserFS leaves the first 64k unused, so that partition labels have
917 enough space. If someone wants to write a fancy bootloader that
918 needs more than 64k, let us know, and this will be increased in size.
919 This number must be larger than than the largest block size on any
920 platform, or code will break. -Hans */
921#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
922#define REISERFS_FIRST_BLOCK unused_define
923#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
924
925/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
926#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
927
928/* reiserfs internal error code (used by search_by_key and fix_nodes)) */
929#define CARRY_ON 0
930#define REPEAT_SEARCH -1
931#define IO_ERROR -2
932#define NO_DISK_SPACE -3
933#define NO_BALANCING_NEEDED (-4)
934#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5)
935#define QUOTA_EXCEEDED -6
936
937typedef __u32 b_blocknr_t;
938typedef __le32 unp_t;
939
940struct unfm_nodeinfo {
941 unp_t unfm_nodenum;
942 unsigned short unfm_freespace;
943};
944
945/* there are two formats of keys: 3.5 and 3.6
946 */
947#define KEY_FORMAT_3_5 0
948#define KEY_FORMAT_3_6 1
949
950/* there are two stat datas */
951#define STAT_DATA_V1 0
952#define STAT_DATA_V2 1
953
954static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode)
955{
956 return container_of(inode, struct reiserfs_inode_info, vfs_inode);
957}
958
959static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
960{
961 return sb->s_fs_info;
962}
963
964/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
965 * which overflows on large file systems. */
966static inline __u32 reiserfs_bmap_count(struct super_block *sb)
967{
968 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
969}
970
971static inline int bmap_would_wrap(unsigned bmap_nr)
972{
973 return bmap_nr > ((1LL << 16) - 1);
974}
975
976/** this says about version of key of all items (but stat data) the
977 object consists of */
978#define get_inode_item_key_version( inode ) \
979 ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
980
981#define set_inode_item_key_version( inode, version ) \
982 ({ if((version)==KEY_FORMAT_3_6) \
983 REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \
984 else \
985 REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; })
986
987#define get_inode_sd_version(inode) \
988 ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1)
989
990#define set_inode_sd_version(inode, version) \
991 ({ if((version)==STAT_DATA_V2) \
992 REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \
993 else \
994 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
995
996/* This is an aggressive tail suppression policy, I am hoping it
997 improves our benchmarks. The principle behind it is that percentage
998 space saving is what matters, not absolute space saving. This is
999 non-intuitive, but it helps to understand it if you consider that the
1000 cost to access 4 blocks is not much more than the cost to access 1
1001 block, if you have to do a seek and rotate. A tail risks a
1002 non-linear disk access that is significant as a percentage of total
1003 time cost for a 4 block file and saves an amount of space that is
1004 less significant as a percentage of space, or so goes the hypothesis.
1005 -Hans */
1006#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
1007(\
1008 (!(n_tail_size)) || \
1009 (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \
1010 ( (n_file_size) >= (n_block_size) * 4 ) || \
1011 ( ( (n_file_size) >= (n_block_size) * 3 ) && \
1012 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \
1013 ( ( (n_file_size) >= (n_block_size) * 2 ) && \
1014 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \
1015 ( ( (n_file_size) >= (n_block_size) ) && \
1016 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
1017)
1018
1019/* Another strategy for tails, this one means only create a tail if all the
1020 file would fit into one DIRECT item.
1021 Primary intention for this one is to increase performance by decreasing
1022 seeking.
1023*/
1024#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
1025(\
1026 (!(n_tail_size)) || \
1027 (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \
1028)
1029
1030/*
1031 * values for s_umount_state field
1032 */
1033#define REISERFS_VALID_FS 1
1034#define REISERFS_ERROR_FS 2
1035
1036//
1037// there are 5 item types currently
1038//
1039#define TYPE_STAT_DATA 0
1040#define TYPE_INDIRECT 1
1041#define TYPE_DIRECT 2
1042#define TYPE_DIRENTRY 3
1043#define TYPE_MAXTYPE 3
1044#define TYPE_ANY 15 // FIXME: comment is required
1045
1046/***************************************************************************/
1047/* KEY & ITEM HEAD */
1048/***************************************************************************/
1049
1050//
1051// directories use this key as well as old files
1052//
1053struct offset_v1 {
1054 __le32 k_offset;
1055 __le32 k_uniqueness;
1056} __attribute__ ((__packed__));
1057
1058struct offset_v2 {
1059 __le64 v;
1060} __attribute__ ((__packed__));
1061
1062static inline __u16 offset_v2_k_type(const struct offset_v2 *v2)
1063{
1064 __u8 type = le64_to_cpu(v2->v) >> 60;
1065 return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY;
1066}
1067
1068static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type)
1069{
1070 v2->v =
1071 (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60);
1072}
1073
1074static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2)
1075{
1076 return le64_to_cpu(v2->v) & (~0ULL >> 4);
1077}
1078
1079static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset)
1080{
1081 offset &= (~0ULL >> 4);
1082 v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
1083}
1084
1085/* Key of an item determines its location in the S+tree, and
1086 is composed of 4 components */
1087struct reiserfs_key {
1088 __le32 k_dir_id; /* packing locality: by default parent
1089 directory object id */
1090 __le32 k_objectid; /* object identifier */
1091 union {
1092 struct offset_v1 k_offset_v1;
1093 struct offset_v2 k_offset_v2;
1094 } __attribute__ ((__packed__)) u;
1095} __attribute__ ((__packed__));
1096
1097struct in_core_key {
1098 __u32 k_dir_id; /* packing locality: by default parent
1099 directory object id */
1100 __u32 k_objectid; /* object identifier */
1101 __u64 k_offset;
1102 __u8 k_type;
1103};
1104
1105struct cpu_key {
1106 struct in_core_key on_disk_key;
1107 int version;
1108 int key_length; /* 3 in all cases but direct2indirect and
1109 indirect2direct conversion */
1110};
1111
1112/* Our function for comparing keys can compare keys of different
1113 lengths. It takes as a parameter the length of the keys it is to
1114 compare. These defines are used in determining what is to be passed
1115 to it as that parameter. */
1116#define REISERFS_FULL_KEY_LEN 4
1117#define REISERFS_SHORT_KEY_LEN 2
1118
1119/* The result of the key compare */
1120#define FIRST_GREATER 1
1121#define SECOND_GREATER -1
1122#define KEYS_IDENTICAL 0
1123#define KEY_FOUND 1
1124#define KEY_NOT_FOUND 0
1125
1126#define KEY_SIZE (sizeof(struct reiserfs_key))
1127#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
1128
1129/* return values for search_by_key and clones */
1130#define ITEM_FOUND 1
1131#define ITEM_NOT_FOUND 0
1132#define ENTRY_FOUND 1
1133#define ENTRY_NOT_FOUND 0
1134#define DIRECTORY_NOT_FOUND -1
1135#define REGULAR_FILE_FOUND -2
1136#define DIRECTORY_FOUND -3
1137#define BYTE_FOUND 1
1138#define BYTE_NOT_FOUND 0
1139#define FILE_NOT_FOUND -1
1140
1141#define POSITION_FOUND 1
1142#define POSITION_NOT_FOUND 0
1143
1144// return values for reiserfs_find_entry and search_by_entry_key
1145#define NAME_FOUND 1
1146#define NAME_NOT_FOUND 0
1147#define GOTO_PREVIOUS_ITEM 2
1148#define NAME_FOUND_INVISIBLE 3
1149
1150/* Everything in the filesystem is stored as a set of items. The
1151 item head contains the key of the item, its free space (for
1152 indirect items) and specifies the location of the item itself
1153 within the block. */
1154
1155struct item_head {
1156 /* Everything in the tree is found by searching for it based on
1157 * its key.*/
1158 struct reiserfs_key ih_key;
1159 union {
1160 /* The free space in the last unformatted node of an
1161 indirect item if this is an indirect item. This
1162 equals 0xFFFF iff this is a direct item or stat data
1163 item. Note that the key, not this field, is used to
1164 determine the item type, and thus which field this
1165 union contains. */
1166 __le16 ih_free_space_reserved;
1167 /* Iff this is a directory item, this field equals the
1168 number of directory entries in the directory item. */
1169 __le16 ih_entry_count;
1170 } __attribute__ ((__packed__)) u;
1171 __le16 ih_item_len; /* total size of the item body */
1172 __le16 ih_item_location; /* an offset to the item body
1173 * within the block */
1174 __le16 ih_version; /* 0 for all old items, 2 for new
1175 ones. Highest bit is set by fsck
1176 temporary, cleaned after all
1177 done */
1178} __attribute__ ((__packed__));
1179/* size of item header */
1180#define IH_SIZE (sizeof(struct item_head))
1181
1182#define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved)
1183#define ih_version(ih) le16_to_cpu((ih)->ih_version)
1184#define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count)
1185#define ih_location(ih) le16_to_cpu((ih)->ih_item_location)
1186#define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len)
1187
1188#define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0)
1189#define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0)
1190#define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0)
1191#define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0)
1192#define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0)
1193
1194#define unreachable_item(ih) (ih_version(ih) & (1 << 15))
1195
1196#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
1197#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
1198
1199/* these operate on indirect items, where you've got an array of ints
1200** at a possibly unaligned location. These are a noop on ia32
1201**
1202** p is the array of __u32, i is the index into the array, v is the value
1203** to store there.
1204*/
1205#define get_block_num(p, i) get_unaligned_le32((p) + (i))
1206#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
1207
1208//
1209// in old version uniqueness field shows key type
1210//
1211#define V1_SD_UNIQUENESS 0
1212#define V1_INDIRECT_UNIQUENESS 0xfffffffe
1213#define V1_DIRECT_UNIQUENESS 0xffffffff
1214#define V1_DIRENTRY_UNIQUENESS 500
1215#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required
1216
1217//
1218// here are conversion routines
1219//
1220static inline int uniqueness2type(__u32 uniqueness) CONSTF;
1221static inline int uniqueness2type(__u32 uniqueness)
1222{
1223 switch ((int)uniqueness) {
1224 case V1_SD_UNIQUENESS:
1225 return TYPE_STAT_DATA;
1226 case V1_INDIRECT_UNIQUENESS:
1227 return TYPE_INDIRECT;
1228 case V1_DIRECT_UNIQUENESS:
1229 return TYPE_DIRECT;
1230 case V1_DIRENTRY_UNIQUENESS:
1231 return TYPE_DIRENTRY;
1232 case V1_ANY_UNIQUENESS:
1233 default:
1234 return TYPE_ANY;
1235 }
1236}
1237
1238static inline __u32 type2uniqueness(int type) CONSTF;
1239static inline __u32 type2uniqueness(int type)
1240{
1241 switch (type) {
1242 case TYPE_STAT_DATA:
1243 return V1_SD_UNIQUENESS;
1244 case TYPE_INDIRECT:
1245 return V1_INDIRECT_UNIQUENESS;
1246 case TYPE_DIRECT:
1247 return V1_DIRECT_UNIQUENESS;
1248 case TYPE_DIRENTRY:
1249 return V1_DIRENTRY_UNIQUENESS;
1250 case TYPE_ANY:
1251 default:
1252 return V1_ANY_UNIQUENESS;
1253 }
1254}
1255
1256//
1257// key is pointer to on disk key which is stored in le, result is cpu,
1258// there is no way to get version of object from key, so, provide
1259// version to these defines
1260//
1261static inline loff_t le_key_k_offset(int version,
1262 const struct reiserfs_key *key)
1263{
1264 return (version == KEY_FORMAT_3_5) ?
1265 le32_to_cpu(key->u.k_offset_v1.k_offset) :
1266 offset_v2_k_offset(&(key->u.k_offset_v2));
1267}
1268
1269static inline loff_t le_ih_k_offset(const struct item_head *ih)
1270{
1271 return le_key_k_offset(ih_version(ih), &(ih->ih_key));
1272}
1273
1274static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key)
1275{
1276 return (version == KEY_FORMAT_3_5) ?
1277 uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) :
1278 offset_v2_k_type(&(key->u.k_offset_v2));
1279}
1280
1281static inline loff_t le_ih_k_type(const struct item_head *ih)
1282{
1283 return le_key_k_type(ih_version(ih), &(ih->ih_key));
1284}
1285
1286static inline void set_le_key_k_offset(int version, struct reiserfs_key *key,
1287 loff_t offset)
1288{
1289 (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */
1290 (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset));
1291}
1292
1293static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
1294{
1295 set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset);
1296}
1297
1298static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
1299 int type)
1300{
1301 (version == KEY_FORMAT_3_5) ?
1302 (void)(key->u.k_offset_v1.k_uniqueness =
1303 cpu_to_le32(type2uniqueness(type)))
1304 : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type));
1305}
1306
1307static inline void set_le_ih_k_type(struct item_head *ih, int type)
1308{
1309 set_le_key_k_type(ih_version(ih), &(ih->ih_key), type);
1310}
1311
1312static inline int is_direntry_le_key(int version, struct reiserfs_key *key)
1313{
1314 return le_key_k_type(version, key) == TYPE_DIRENTRY;
1315}
1316
1317static inline int is_direct_le_key(int version, struct reiserfs_key *key)
1318{
1319 return le_key_k_type(version, key) == TYPE_DIRECT;
1320}
1321
1322static inline int is_indirect_le_key(int version, struct reiserfs_key *key)
1323{
1324 return le_key_k_type(version, key) == TYPE_INDIRECT;
1325}
1326
1327static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
1328{
1329 return le_key_k_type(version, key) == TYPE_STAT_DATA;
1330}
1331
1332//
1333// item header has version.
1334//
1335static inline int is_direntry_le_ih(struct item_head *ih)
1336{
1337 return is_direntry_le_key(ih_version(ih), &ih->ih_key);
1338}
1339
1340static inline int is_direct_le_ih(struct item_head *ih)
1341{
1342 return is_direct_le_key(ih_version(ih), &ih->ih_key);
1343}
1344
1345static inline int is_indirect_le_ih(struct item_head *ih)
1346{
1347 return is_indirect_le_key(ih_version(ih), &ih->ih_key);
1348}
1349
1350static inline int is_statdata_le_ih(struct item_head *ih)
1351{
1352 return is_statdata_le_key(ih_version(ih), &ih->ih_key);
1353}
1354
1355//
1356// key is pointer to cpu key, result is cpu
1357//
1358static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
1359{
1360 return key->on_disk_key.k_offset;
1361}
1362
1363static inline loff_t cpu_key_k_type(const struct cpu_key *key)
1364{
1365 return key->on_disk_key.k_type;
1366}
1367
1368static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset)
1369{
1370 key->on_disk_key.k_offset = offset;
1371}
1372
1373static inline void set_cpu_key_k_type(struct cpu_key *key, int type)
1374{
1375 key->on_disk_key.k_type = type;
1376}
1377
1378static inline void cpu_key_k_offset_dec(struct cpu_key *key)
1379{
1380 key->on_disk_key.k_offset--;
1381}
1382
1383#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY)
1384#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT)
1385#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT)
1386#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA)
1387
1388/* are these used ? */
1389#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key)))
1390#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key)))
1391#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key)))
1392#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key)))
1393
1394#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \
1395 (!COMP_SHORT_KEYS(ih, key) && \
1396 I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize))
1397
1398/* maximal length of item */
1399#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE)
1400#define MIN_ITEM_LEN 1
1401
1402/* object identifier for root dir */
1403#define REISERFS_ROOT_OBJECTID 2
1404#define REISERFS_ROOT_PARENT_OBJECTID 1
1405
1406extern struct reiserfs_key root_key;
1407
1408/*
1409 * Picture represents a leaf of the S+tree
1410 * ______________________________________________________
1411 * | | Array of | | |
1412 * |Block | Object-Item | F r e e | Objects- |
1413 * | head | Headers | S p a c e | Items |
1414 * |______|_______________|___________________|___________|
1415 */
1416
1417/* Header of a disk block. More precisely, header of a formatted leaf
1418 or internal node, and not the header of an unformatted node. */
1419struct block_head {
1420 __le16 blk_level; /* Level of a block in the tree. */
1421 __le16 blk_nr_item; /* Number of keys/items in a block. */
1422 __le16 blk_free_space; /* Block free space in bytes. */
1423 __le16 blk_reserved;
1424 /* dump this in v4/planA */
1425 struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */
1426};
1427
1428#define BLKH_SIZE (sizeof(struct block_head))
1429#define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level))
1430#define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item))
1431#define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space))
1432#define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved))
1433#define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val))
1434#define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val))
1435#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val))
1436#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val))
1437#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key)
1438#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val)
1439
1440/*
1441 * values for blk_level field of the struct block_head
1442 */
1443
1444#define FREE_LEVEL 0 /* when node gets removed from the tree its
1445 blk_level is set to FREE_LEVEL. It is then
1446 used to see whether the node is still in the
1447 tree */
1448
1449#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
1450
1451/* Given the buffer head of a formatted node, resolve to the block head of that node. */
1452#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data))
1453/* Number of items that are in buffer. */
1454#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh)))
1455#define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh)))
1456#define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh)))
1457
1458#define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0)
1459#define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0)
1460#define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0)
1461
1462/* Get right delimiting key. -- little endian */
1463#define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh))))
1464
1465/* Does the buffer contain a disk leaf. */
1466#define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL)
1467
1468/* Does the buffer contain a disk internal node */
1469#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
1470 && B_LEVEL(bh) <= MAX_HEIGHT)
1471
1472/***************************************************************************/
1473/* STAT DATA */
1474/***************************************************************************/
1475
1476//
1477// old stat data is 32 bytes long. We are going to distinguish new one by
1478// different size
1479//
1480struct stat_data_v1 {
1481 __le16 sd_mode; /* file type, permissions */
1482 __le16 sd_nlink; /* number of hard links */
1483 __le16 sd_uid; /* owner */
1484 __le16 sd_gid; /* group */
1485 __le32 sd_size; /* file size */
1486 __le32 sd_atime; /* time of last access */
1487 __le32 sd_mtime; /* time file was last modified */
1488 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
1489 union {
1490 __le32 sd_rdev;
1491 __le32 sd_blocks; /* number of blocks file uses */
1492 } __attribute__ ((__packed__)) u;
1493 __le32 sd_first_direct_byte; /* first byte of file which is stored
1494 in a direct item: except that if it
1495 equals 1 it is a symlink and if it
1496 equals ~(__u32)0 there is no
1497 direct item. The existence of this
1498 field really grates on me. Let's
1499 replace it with a macro based on
1500 sd_size and our tail suppression
1501 policy. Someday. -Hans */
1502} __attribute__ ((__packed__));
1503
1504#define SD_V1_SIZE (sizeof(struct stat_data_v1))
1505#define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5)
1506#define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode))
1507#define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v))
1508#define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink))
1509#define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v))
1510#define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid))
1511#define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v))
1512#define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid))
1513#define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v))
1514#define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size))
1515#define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v))
1516#define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime))
1517#define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v))
1518#define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime))
1519#define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v))
1520#define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime))
1521#define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v))
1522#define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev))
1523#define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v))
1524#define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks))
1525#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v))
1526#define sd_v1_first_direct_byte(sdp) \
1527 (le32_to_cpu((sdp)->sd_first_direct_byte))
1528#define set_sd_v1_first_direct_byte(sdp,v) \
1529 ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
1530
1531/* inode flags stored in sd_attrs (nee sd_reserved) */
1532
1533/* we want common flags to have the same values as in ext2,
1534 so chattr(1) will work without problems */
1535#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
1536#define REISERFS_APPEND_FL FS_APPEND_FL
1537#define REISERFS_SYNC_FL FS_SYNC_FL
1538#define REISERFS_NOATIME_FL FS_NOATIME_FL
1539#define REISERFS_NODUMP_FL FS_NODUMP_FL
1540#define REISERFS_SECRM_FL FS_SECRM_FL
1541#define REISERFS_UNRM_FL FS_UNRM_FL
1542#define REISERFS_COMPR_FL FS_COMPR_FL
1543#define REISERFS_NOTAIL_FL FS_NOTAIL_FL
1544
1545/* persistent flags that file inherits from the parent directory */
1546#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \
1547 REISERFS_SYNC_FL | \
1548 REISERFS_NOATIME_FL | \
1549 REISERFS_NODUMP_FL | \
1550 REISERFS_SECRM_FL | \
1551 REISERFS_COMPR_FL | \
1552 REISERFS_NOTAIL_FL )
1553
1554/* Stat Data on disk (reiserfs version of UFS disk inode minus the
1555 address blocks) */
1556struct stat_data {
1557 __le16 sd_mode; /* file type, permissions */
1558 __le16 sd_attrs; /* persistent inode flags */
1559 __le32 sd_nlink; /* number of hard links */
1560 __le64 sd_size; /* file size */
1561 __le32 sd_uid; /* owner */
1562 __le32 sd_gid; /* group */
1563 __le32 sd_atime; /* time of last access */
1564 __le32 sd_mtime; /* time file was last modified */
1565 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
1566 __le32 sd_blocks;
1567 union {
1568 __le32 sd_rdev;
1569 __le32 sd_generation;
1570 //__le32 sd_first_direct_byte;
1571 /* first byte of file which is stored in a
1572 direct item: except that if it equals 1
1573 it is a symlink and if it equals
1574 ~(__u32)0 there is no direct item. The
1575 existence of this field really grates
1576 on me. Let's replace it with a macro
1577 based on sd_size and our tail
1578 suppression policy? */
1579 } __attribute__ ((__packed__)) u;
1580} __attribute__ ((__packed__));
1581//
1582// this is 44 bytes long
1583//
1584#define SD_SIZE (sizeof(struct stat_data))
1585#define SD_V2_SIZE SD_SIZE
1586#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6)
1587#define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode))
1588#define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v))
1589/* sd_reserved */
1590/* set_sd_reserved */
1591#define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink))
1592#define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v))
1593#define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size))
1594#define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v))
1595#define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid))
1596#define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v))
1597#define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid))
1598#define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v))
1599#define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime))
1600#define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v))
1601#define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime))
1602#define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v))
1603#define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime))
1604#define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v))
1605#define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks))
1606#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v))
1607#define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev))
1608#define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v))
1609#define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation))
1610#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v))
1611#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs))
1612#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v))
1613
1614/***************************************************************************/
1615/* DIRECTORY STRUCTURE */
1616/***************************************************************************/
1617/*
1618 Picture represents the structure of directory items
1619 ________________________________________________
1620 | Array of | | | | | |
1621 | directory |N-1| N-2 | .... | 1st |0th|
1622 | entry headers | | | | | |
1623 |_______________|___|_____|________|_______|___|
1624 <---- directory entries ------>
1625
1626 First directory item has k_offset component 1. We store "." and ".."
1627 in one item, always, we never split "." and ".." into differing
1628 items. This makes, among other things, the code for removing
1629 directories simpler. */
1630#define SD_OFFSET 0
1631#define SD_UNIQUENESS 0
1632#define DOT_OFFSET 1
1633#define DOT_DOT_OFFSET 2
1634#define DIRENTRY_UNIQUENESS 500
1635
1636/* */
1637#define FIRST_ITEM_OFFSET 1
1638
1639/*
1640 Q: How to get key of object pointed to by entry from entry?
1641
1642 A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key
1643 of object, entry points to */
1644
1645/* NOT IMPLEMENTED:
1646 Directory will someday contain stat data of object */
1647
1648struct reiserfs_de_head {
1649 __le32 deh_offset; /* third component of the directory entry key */
1650 __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced
1651 by directory entry */
1652 __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */
1653 __le16 deh_location; /* offset of name in the whole item */
1654 __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether
1655 entry is hidden (unlinked) */
1656} __attribute__ ((__packed__));
1657#define DEH_SIZE sizeof(struct reiserfs_de_head)
1658#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset))
1659#define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id))
1660#define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid))
1661#define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location))
1662#define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state))
1663
1664#define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v)))
1665#define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v)))
1666#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v)))
1667#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v)))
1668#define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v)))
1669
1670/* empty directory contains two entries "." and ".." and their headers */
1671#define EMPTY_DIR_SIZE \
1672(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen ("..")))
1673
1674/* old format directories have this size when empty */
1675#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3)
1676
1677#define DEH_Statdata 0 /* not used now */
1678#define DEH_Visible 2
1679
1680/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */
1681#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__)
1682# define ADDR_UNALIGNED_BITS (3)
1683#endif
1684
1685/* These are only used to manipulate deh_state.
1686 * Because of this, we'll use the ext2_ bit routines,
1687 * since they are little endian */
1688#ifdef ADDR_UNALIGNED_BITS
1689
1690# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
1691# define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3)
1692
1693# define set_bit_unaligned(nr, addr) \
1694 __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1695# define clear_bit_unaligned(nr, addr) \
1696 __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1697# define test_bit_unaligned(nr, addr) \
1698 test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
1699
1700#else
1701
1702# define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr)
1703# define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr)
1704# define test_bit_unaligned(nr, addr) test_bit_le(nr, addr)
1705
1706#endif
1707
1708#define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1709#define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1710#define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1711#define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1712
1713#define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
1714#define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1715#define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
1716
1717extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
1718 __le32 par_dirid, __le32 par_objid);
1719extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
1720 __le32 par_dirid, __le32 par_objid);
1721
1722/* array of the entry headers */
1723 /* get item body */
1724#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) )
1725#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
1726
1727/* length of the directory entry in directory item. This define
1728 calculates length of i-th directory entry using directory entry
1729 locations from dir entry head. When it calculates length of 0-th
1730 directory entry, it uses length of whole item in place of entry
1731 location of the non-existent following entry in the calculation.
1732 See picture above.*/
1733/*
1734#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
1735((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh))))
1736*/
1737static inline int entry_length(const struct buffer_head *bh,
1738 const struct item_head *ih, int pos_in_item)
1739{
1740 struct reiserfs_de_head *deh;
1741
1742 deh = B_I_DEH(bh, ih) + pos_in_item;
1743 if (pos_in_item)
1744 return deh_location(deh - 1) - deh_location(deh);
1745
1746 return ih_item_len(ih) - deh_location(deh);
1747}
1748
1749/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
1750#define I_ENTRY_COUNT(ih) (ih_entry_count((ih)))
1751
1752/* name by bh, ih and entry_num */
1753#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num))))
1754
1755// two entries per block (at least)
1756#define REISERFS_MAX_NAME(block_size) 255
1757
1758/* this structure is used for operations on directory entries. It is
1759 not a disk structure. */
1760/* When reiserfs_find_entry or search_by_entry_key find directory
1761 entry, they return filled reiserfs_dir_entry structure */
1762struct reiserfs_dir_entry {
1763 struct buffer_head *de_bh;
1764 int de_item_num;
1765 struct item_head *de_ih;
1766 int de_entry_num;
1767 struct reiserfs_de_head *de_deh;
1768 int de_entrylen;
1769 int de_namelen;
1770 char *de_name;
1771 unsigned long *de_gen_number_bit_string;
1772
1773 __u32 de_dir_id;
1774 __u32 de_objectid;
1775
1776 struct cpu_key de_entry_key;
1777};
1778
1779/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */
1780
1781/* pointer to file name, stored in entry */
1782#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh))
1783
1784/* length of name */
1785#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
1786(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0))
1787
1788/* hash value occupies bits from 7 up to 30 */
1789#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL)
1790/* generation number occupies 7 bits starting from 0 up to 6 */
1791#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL)
1792#define MAX_GENERATION_NUMBER 127
1793
1794#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number))
1795
1796/*
1797 * Picture represents an internal node of the reiserfs tree
1798 * ______________________________________________________
1799 * | | Array of | Array of | Free |
1800 * |block | keys | pointers | space |
1801 * | head | N | N+1 | |
1802 * |______|_______________|___________________|___________|
1803 */
1804
1805/***************************************************************************/
1806/* DISK CHILD */
1807/***************************************************************************/
1808/* Disk child pointer: The pointer from an internal node of the tree
1809 to a node that is on disk. */
1810struct disk_child {
1811 __le32 dc_block_number; /* Disk child's block number. */
1812 __le16 dc_size; /* Disk child's used space. */
1813 __le16 dc_reserved;
1814};
1815
1816#define DC_SIZE (sizeof(struct disk_child))
1817#define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number))
1818#define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size))
1819#define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0)
1820#define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0)
1821
1822/* Get disk child by buffer header and position in the tree node. */
1823#define B_N_CHILD(bh, n_pos) ((struct disk_child *)\
1824((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos)))
1825
1826/* Get disk child number by buffer header and position in the tree node. */
1827#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos)))
1828#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \
1829 (put_dc_block_number(B_N_CHILD(bh, n_pos), val))
1830
1831 /* maximal value of field child_size in structure disk_child */
1832 /* child size is the combined size of all items and their headers */
1833#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE ))
1834
1835/* amount of used space in buffer (not including block head) */
1836#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur)))
1837
1838/* max and min number of keys in internal node */
1839#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
1840#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2)
1841
1842/***************************************************************************/
1843/* PATH STRUCTURES AND DEFINES */
1844/***************************************************************************/
1845
1846/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the
1847 key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it
1848 does not find them in the cache it reads them from disk. For each node search_by_key finds using
1849 reiserfs_bread it then uses bin_search to look through that node. bin_search will find the
1850 position of the block_number of the next node if it is looking through an internal node. If it
1851 is looking through a leaf node bin_search will find the position of the item which has key either
1852 equal to given key, or which is the maximal key less than the given key. */
1853
1854struct path_element {
1855 struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */
1856 int pe_position; /* Position in the tree node which is placed in the */
1857 /* buffer above. */
1858};
1859
1860#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */
1861#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
1862#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */
1863
1864#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
1865#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */
1866
1867/* We need to keep track of who the ancestors of nodes are. When we
1868 perform a search we record which nodes were visited while
1869 descending the tree looking for the node we searched for. This list
1870 of nodes is called the path. This information is used while
1871 performing balancing. Note that this path information may become
1872 invalid, and this means we must check it when using it to see if it
1873 is still valid. You'll need to read search_by_key and the comments
1874 in it, especially about decrement_counters_in_path(), to understand
1875 this structure.
1876
1877Paths make the code so much harder to work with and debug.... An
1878enormous number of bugs are due to them, and trying to write or modify
1879code that uses them just makes my head hurt. They are based on an
1880excessive effort to avoid disturbing the precious VFS code.:-( The
1881gods only know how we are going to SMP the code that uses them.
1882znodes are the way! */
1883
1884#define PATH_READA 0x1 /* do read ahead */
1885#define PATH_READA_BACK 0x2 /* read backwards */
1886
1887struct treepath {
1888 int path_length; /* Length of the array above. */
1889 int reada;
1890 struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */
1891 int pos_in_item;
1892};
1893
1894#define pos_in_item(path) ((path)->pos_in_item)
1895
1896#define INITIALIZE_PATH(var) \
1897struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
1898
1899/* Get path element by path and path position. */
1900#define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset))
1901
1902/* Get buffer header at the path by path and path position. */
1903#define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer)
1904
1905/* Get position in the element at the path by path and path position. */
1906#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
1907
1908#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
1909 /* you know, to the person who didn't
1910 write this the macro name does not
1911 at first suggest what it does.
1912 Maybe POSITION_FROM_PATH_END? Or
1913 maybe we should just focus on
1914 dumping paths... -Hans */
1915#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
1916
1917#define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path))
1918
1919/* in do_balance leaf has h == 0 in contrast with path structure,
1920 where root has level == 0. That is why we need these defines */
1921#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */
1922#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */
1923#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))
1924#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */
1925
1926#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
1927
1928#define get_last_bh(path) PATH_PLAST_BUFFER(path)
1929#define get_ih(path) PATH_PITEM_HEAD(path)
1930#define get_item_pos(path) PATH_LAST_POSITION(path)
1931#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
1932#define item_moved(ih,path) comp_items(ih, path)
1933#define path_changed(ih,path) comp_items (ih, path)
1934
1935/***************************************************************************/
1936/* MISC */
1937/***************************************************************************/
1938
1939/* Size of pointer to the unformatted node. */
1940#define UNFM_P_SIZE (sizeof(unp_t))
1941#define UNFM_P_SHIFT 2
1942
1943// in in-core inode key is stored on le form
1944#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
1945
1946#define MAX_UL_INT 0xffffffff
1947#define MAX_INT 0x7ffffff
1948#define MAX_US_INT 0xffff
1949
1950// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
1951#define U32_MAX (~(__u32)0)
1952
1953static inline loff_t max_reiserfs_offset(struct inode *inode)
1954{
1955 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
1956 return (loff_t) U32_MAX;
1957
1958 return (loff_t) ((~(__u64) 0) >> 4);
1959}
1960
1961/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/
1962#define MAX_KEY_OBJECTID MAX_UL_INT
1963
1964#define MAX_B_NUM MAX_UL_INT
1965#define MAX_FC_NUM MAX_US_INT
1966
1967/* the purpose is to detect overflow of an unsigned short */
1968#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
1969
1970/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */
1971#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */
1972#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */
1973
1974#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
1975#define get_generation(s) atomic_read (&fs_generation(s))
1976#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen)
1977#define __fs_changed(gen,s) (gen != get_generation (s))
1978#define fs_changed(gen,s) \
1979({ \
1980 reiserfs_cond_resched(s); \
1981 __fs_changed(gen, s); \
1982})
1983
1984/***************************************************************************/
1985/* FIXATE NODES */
1986/***************************************************************************/
1987
1988#define VI_TYPE_LEFT_MERGEABLE 1
1989#define VI_TYPE_RIGHT_MERGEABLE 2
1990
1991/* To make any changes in the tree we always first find node, that
1992 contains item to be changed/deleted or place to insert a new
1993 item. We call this node S. To do balancing we need to decide what
1994 we will shift to left/right neighbor, or to a new node, where new
1995 item will be etc. To make this analysis simpler we build virtual
1996 node. Virtual node is an array of items, that will replace items of
1997 node S. (For instance if we are going to delete an item, virtual
1998 node does not contain it). Virtual node keeps information about
1999 item sizes and types, mergeability of first and last items, sizes
2000 of all entries in directory item. We use this array of items when
2001 calculating what we can shift to neighbors and how many nodes we
2002 have to have if we do not any shiftings, if we shift to left/right
2003 neighbor or to both. */
2004struct virtual_item {
2005 int vi_index; // index in the array of item operations
2006 unsigned short vi_type; // left/right mergeability
2007 unsigned short vi_item_len; /* length of item that it will have after balancing */
2008 struct item_head *vi_ih;
2009 const char *vi_item; // body of item (old or new)
2010 const void *vi_new_data; // 0 always but paste mode
2011 void *vi_uarea; // item specific area
2012};
2013
2014struct virtual_node {
2015 char *vn_free_ptr; /* this is a pointer to the free space in the buffer */
2016 unsigned short vn_nr_item; /* number of items in virtual node */
2017 short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */
2018 short vn_mode; /* mode of balancing (paste, insert, delete, cut) */
2019 short vn_affected_item_num;
2020 short vn_pos_in_item;
2021 struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */
2022 const void *vn_data;
2023 struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */
2024};
2025
2026/* used by directory items when creating virtual nodes */
2027struct direntry_uarea {
2028 int flags;
2029 __u16 entry_count;
2030 __u16 entry_sizes[1];
2031} __attribute__ ((__packed__));
2032
2033/***************************************************************************/
2034/* TREE BALANCE */
2035/***************************************************************************/
2036
2037/* This temporary structure is used in tree balance algorithms, and
2038 constructed as we go to the extent that its various parts are
2039 needed. It contains arrays of nodes that can potentially be
2040 involved in the balancing of node S, and parameters that define how
2041 each of the nodes must be balanced. Note that in these algorithms
2042 for balancing the worst case is to need to balance the current node
2043 S and the left and right neighbors and all of their parents plus
2044 create a new node. We implement S1 balancing for the leaf nodes
2045 and S0 balancing for the internal nodes (S1 and S0 are defined in
2046 our papers.)*/
2047
2048#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */
2049
2050/* maximum number of FEB blocknrs on a single level */
2051#define MAX_AMOUNT_NEEDED 2
2052
2053/* someday somebody will prefix every field in this struct with tb_ */
2054struct tree_balance {
2055 int tb_mode;
2056 int need_balance_dirty;
2057 struct super_block *tb_sb;
2058 struct reiserfs_transaction_handle *transaction_handle;
2059 struct treepath *tb_path;
2060 struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */
2061 struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */
2062 struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */
2063 struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */
2064 struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */
2065 struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */
2066
2067 struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals
2068 cur_blknum. */
2069 struct buffer_head *used[MAX_FEB_SIZE];
2070 struct buffer_head *thrown[MAX_FEB_SIZE];
2071 int lnum[MAX_HEIGHT]; /* array of number of items which must be
2072 shifted to the left in order to balance the
2073 current node; for leaves includes item that
2074 will be partially shifted; for internal
2075 nodes, it is the number of child pointers
2076 rather than items. It includes the new item
2077 being created. The code sometimes subtracts
2078 one to get the number of wholly shifted
2079 items for other purposes. */
2080 int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */
2081 int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and
2082 S[h] to its item number within the node CFL[h] */
2083 int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */
2084 int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from
2085 S[h]. A negative value means removing. */
2086 int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after
2087 balancing on the level h of the tree. If 0 then S is
2088 being deleted, if 1 then S is remaining and no new nodes
2089 are being created, if 2 or 3 then 1 or 2 new nodes is
2090 being created */
2091
2092 /* fields that are used only for balancing leaves of the tree */
2093 int cur_blknum; /* number of empty blocks having been already allocated */
2094 int s0num; /* number of items that fall into left most node when S[0] splits */
2095 int s1num; /* number of items that fall into first new node when S[0] splits */
2096 int s2num; /* number of items that fall into second new node when S[0] splits */
2097 int lbytes; /* number of bytes which can flow to the left neighbor from the left */
2098 /* most liquid item that cannot be shifted from S[0] entirely */
2099 /* if -1 then nothing will be partially shifted */
2100 int rbytes; /* number of bytes which will flow to the right neighbor from the right */
2101 /* most liquid item that cannot be shifted from S[0] entirely */
2102 /* if -1 then nothing will be partially shifted */
2103 int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */
2104 /* note: if S[0] splits into 3 nodes, then items do not need to be cut */
2105 int s2bytes;
2106 struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */
2107 char *vn_buf; /* kmalloced memory. Used to create
2108 virtual node and keep map of
2109 dirtied bitmap blocks */
2110 int vn_buf_size; /* size of the vn_buf */
2111 struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */
2112
2113 int fs_gen; /* saved value of `reiserfs_generation' counter
2114 see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
2115#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2116 struct in_core_key key; /* key pointer, to pass to block allocator or
2117 another low-level subsystem */
2118#endif
2119};
2120
2121/* These are modes of balancing */
2122
2123/* When inserting an item. */
2124#define M_INSERT 'i'
2125/* When inserting into (directories only) or appending onto an already
2126 existent item. */
2127#define M_PASTE 'p'
2128/* When deleting an item. */
2129#define M_DELETE 'd'
2130/* When truncating an item or removing an entry from a (directory) item. */
2131#define M_CUT 'c'
2132
2133/* used when balancing on leaf level skipped (in reiserfsck) */
2134#define M_INTERNAL 'n'
2135
2136/* When further balancing is not needed, then do_balance does not need
2137 to be called. */
2138#define M_SKIP_BALANCING 's'
2139#define M_CONVERT 'v'
2140
2141/* modes of leaf_move_items */
2142#define LEAF_FROM_S_TO_L 0
2143#define LEAF_FROM_S_TO_R 1
2144#define LEAF_FROM_R_TO_L 2
2145#define LEAF_FROM_L_TO_R 3
2146#define LEAF_FROM_S_TO_SNEW 4
2147
2148#define FIRST_TO_LAST 0
2149#define LAST_TO_FIRST 1
2150
2151/* used in do_balance for passing parent of node information that has
2152 been gotten from tb struct */
2153struct buffer_info {
2154 struct tree_balance *tb;
2155 struct buffer_head *bi_bh;
2156 struct buffer_head *bi_parent;
2157 int bi_position;
2158};
2159
2160static inline struct super_block *sb_from_tb(struct tree_balance *tb)
2161{
2162 return tb ? tb->tb_sb : NULL;
2163}
2164
2165static inline struct super_block *sb_from_bi(struct buffer_info *bi)
2166{
2167 return bi ? sb_from_tb(bi->tb) : NULL;
2168}
2169
2170/* there are 4 types of items: stat data, directory item, indirect, direct.
2171+-------------------+------------+--------------+------------+
2172| | k_offset | k_uniqueness | mergeable? |
2173+-------------------+------------+--------------+------------+
2174| stat data | 0 | 0 | no |
2175+-------------------+------------+--------------+------------+
2176| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no |
2177| non 1st directory | hash value | | yes |
2178| item | | | |
2179+-------------------+------------+--------------+------------+
2180| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object
2181+-------------------+------------+--------------+------------+
2182| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object
2183+-------------------+------------+--------------+------------+
2184*/
2185
2186struct item_operations {
2187 int (*bytes_number) (struct item_head * ih, int block_size);
2188 void (*decrement_key) (struct cpu_key *);
2189 int (*is_left_mergeable) (struct reiserfs_key * ih,
2190 unsigned long bsize);
2191 void (*print_item) (struct item_head *, char *item);
2192 void (*check_item) (struct item_head *, char *item);
2193
2194 int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi,
2195 int is_affected, int insert_size);
2196 int (*check_left) (struct virtual_item * vi, int free,
2197 int start_skip, int end_skip);
2198 int (*check_right) (struct virtual_item * vi, int free);
2199 int (*part_size) (struct virtual_item * vi, int from, int to);
2200 int (*unit_num) (struct virtual_item * vi);
2201 void (*print_vi) (struct virtual_item * vi);
2202};
2203
2204extern struct item_operations *item_ops[TYPE_ANY + 1];
2205
2206#define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
2207#define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
2208#define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item)
2209#define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item)
2210#define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size)
2211#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip)
2212#define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free)
2213#define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to)
2214#define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi)
2215#define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi)
2216
2217#define COMP_SHORT_KEYS comp_short_keys
2218
2219/* number of blocks pointed to by the indirect item */
2220#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE)
2221
2222/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */
2223#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
2224
2225/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */
2226
2227/* get the item header */
2228#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) )
2229
2230/* get key */
2231#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
2232
2233/* get the key */
2234#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
2235
2236/* get item body */
2237#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num))))
2238
2239/* get the stat data by the buffer header and the item order */
2240#define B_N_STAT_DATA(bh,nr) \
2241( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) )
2242
2243 /* following defines use reiserfs buffer header and item header */
2244
2245/* get stat-data */
2246#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
2247
2248// this is 3976 for size==4096
2249#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
2250
2251/* indirect items consist of entries which contain blocknrs, pos
2252 indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
2253 blocknr contained by the entry pos points to */
2254#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)))
2255#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0)
2256
2257struct reiserfs_iget_args {
2258 __u32 objectid;
2259 __u32 dirid;
2260};
2261
2262/***************************************************************************/
2263/* FUNCTION DECLARATIONS */
2264/***************************************************************************/
2265
2266#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
2267
2268#define journal_trans_half(blocksize) \
2269 ((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32))
2270
2271/* journal.c see journal.c for all the comments here */
2272
2273/* first block written in a commit. */
2274struct reiserfs_journal_desc {
2275 __le32 j_trans_id; /* id of commit */
2276 __le32 j_len; /* length of commit. len +1 is the commit block */
2277 __le32 j_mount_id; /* mount id of this trans */
2278 __le32 j_realblock[1]; /* real locations for each block */
2279};
2280
2281#define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id)
2282#define get_desc_trans_len(d) le32_to_cpu((d)->j_len)
2283#define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id)
2284
2285#define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0)
2286#define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0)
2287#define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0)
2288
2289/* last block written in a commit */
2290struct reiserfs_journal_commit {
2291 __le32 j_trans_id; /* must match j_trans_id from the desc block */
2292 __le32 j_len; /* ditto */
2293 __le32 j_realblock[1]; /* real locations for each block */
2294};
2295
2296#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
2297#define get_commit_trans_len(c) le32_to_cpu((c)->j_len)
2298#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id)
2299
2300#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
2301#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0)
2302
2303/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the
2304** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk,
2305** and this transaction does not need to be replayed.
2306*/
2307struct reiserfs_journal_header {
2308 __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */
2309 __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */
2310 __le32 j_mount_id;
2311 /* 12 */ struct journal_params jh_journal;
2312};
2313
2314/* biggest tunable defines are right here */
2315#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */
2316#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */
2317#define JOURNAL_TRANS_MIN_DEFAULT 256
2318#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */
2319#define JOURNAL_MIN_RATIO 2
2320#define JOURNAL_MAX_COMMIT_AGE 30
2321#define JOURNAL_MAX_TRANS_AGE 30
2322#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
2323#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \
2324 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \
2325 REISERFS_QUOTA_TRANS_BLOCKS(sb)))
2326
2327#ifdef CONFIG_QUOTA
2328#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA))
2329/* We need to update data and inode (atime) */
2330#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0)
2331/* 1 balancing, 1 bitmap, 1 data per write + stat data update */
2332#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
2333(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0)
2334/* same as with INIT */
2335#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
2336(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0)
2337#else
2338#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0
2339#define REISERFS_QUOTA_INIT_BLOCKS(s) 0
2340#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
2341#endif
2342
2343/* both of these can be as low as 1, or as high as you want. The min is the
2344** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
2345** as needed, and released when transactions are committed. On release, if
2346** the current number of nodes is > max, the node is freed, otherwise,
2347** it is put on a free list for faster use later.
2348*/
2349#define REISERFS_MIN_BITMAP_NODES 10
2350#define REISERFS_MAX_BITMAP_NODES 100
2351
2352#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */
2353#define JBH_HASH_MASK 8191
2354
2355#define _jhashfn(sb,block) \
2356 (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \
2357 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
2358#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
2359
2360// We need these to make journal.c code more readable
2361#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2362#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2363#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2364
2365enum reiserfs_bh_state_bits {
2366 BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */
2367 BH_JDirty_wait,
2368 BH_JNew, /* disk block was taken off free list before
2369 * being in a finished transaction, or
2370 * written to disk. Can be reused immed. */
2371 BH_JPrepared,
2372 BH_JRestore_dirty,
2373 BH_JTest, // debugging only will go away
2374};
2375
2376BUFFER_FNS(JDirty, journaled);
2377TAS_BUFFER_FNS(JDirty, journaled);
2378BUFFER_FNS(JDirty_wait, journal_dirty);
2379TAS_BUFFER_FNS(JDirty_wait, journal_dirty);
2380BUFFER_FNS(JNew, journal_new);
2381TAS_BUFFER_FNS(JNew, journal_new);
2382BUFFER_FNS(JPrepared, journal_prepared);
2383TAS_BUFFER_FNS(JPrepared, journal_prepared);
2384BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2385TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2386BUFFER_FNS(JTest, journal_test);
2387TAS_BUFFER_FNS(JTest, journal_test);
2388
2389/*
2390** transaction handle which is passed around for all journal calls
2391*/
2392struct reiserfs_transaction_handle {
2393 struct super_block *t_super; /* super for this FS when journal_begin was
2394 called. saves calls to reiserfs_get_super
2395 also used by nested transactions to make
2396 sure they are nesting on the right FS
2397 _must_ be first in the handle
2398 */
2399 int t_refcount;
2400 int t_blocks_logged; /* number of blocks this writer has logged */
2401 int t_blocks_allocated; /* number of blocks this writer allocated */
2402 unsigned int t_trans_id; /* sanity check, equals the current trans id */
2403 void *t_handle_save; /* save existing current->journal_info */
2404 unsigned displace_new_blocks:1; /* if new block allocation occurres, that block
2405 should be displaced from others */
2406 struct list_head t_list;
2407};
2408
2409/* used to keep track of ordered and tail writes, attached to the buffer
2410 * head through b_journal_head.
2411 */
2412struct reiserfs_jh {
2413 struct reiserfs_journal_list *jl;
2414 struct buffer_head *bh;
2415 struct list_head list;
2416};
2417
2418void reiserfs_free_jh(struct buffer_head *bh);
2419int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh);
2420int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh);
2421int journal_mark_dirty(struct reiserfs_transaction_handle *,
2422 struct super_block *, struct buffer_head *bh);
2423
2424static inline int reiserfs_file_data_log(struct inode *inode)
2425{
2426 if (reiserfs_data_log(inode->i_sb) ||
2427 (REISERFS_I(inode)->i_flags & i_data_log))
2428 return 1;
2429 return 0;
2430}
2431
2432static inline int reiserfs_transaction_running(struct super_block *s)
2433{
2434 struct reiserfs_transaction_handle *th = current->journal_info;
2435 if (th && th->t_super == s)
2436 return 1;
2437 if (th && th->t_super == NULL)
2438 BUG();
2439 return 0;
2440}
2441
2442static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th)
2443{
2444 return th->t_blocks_allocated - th->t_blocks_logged;
2445}
2446
2447struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
2448 super_block
2449 *,
2450 int count);
2451int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *);
2452int reiserfs_commit_page(struct inode *inode, struct page *page,
2453 unsigned from, unsigned to);
2454int reiserfs_flush_old_commits(struct super_block *);
2455int reiserfs_commit_for_inode(struct inode *);
2456int reiserfs_inode_needs_commit(struct inode *);
2457void reiserfs_update_inode_transaction(struct inode *);
2458void reiserfs_wait_on_write_block(struct super_block *s);
2459void reiserfs_block_writes(struct reiserfs_transaction_handle *th);
2460void reiserfs_allow_writes(struct super_block *s);
2461void reiserfs_check_lock_depth(struct super_block *s, char *caller);
2462int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh,
2463 int wait);
2464void reiserfs_restore_prepared_buffer(struct super_block *,
2465 struct buffer_head *bh);
2466int journal_init(struct super_block *, const char *j_dev_name, int old_format,
2467 unsigned int);
2468int journal_release(struct reiserfs_transaction_handle *, struct super_block *);
2469int journal_release_error(struct reiserfs_transaction_handle *,
2470 struct super_block *);
2471int journal_end(struct reiserfs_transaction_handle *, struct super_block *,
2472 unsigned long);
2473int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
2474 unsigned long);
2475int journal_mark_freed(struct reiserfs_transaction_handle *,
2476 struct super_block *, b_blocknr_t blocknr);
2477int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
2478int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr,
2479 int bit_nr, int searchall, b_blocknr_t *next);
2480int journal_begin(struct reiserfs_transaction_handle *,
2481 struct super_block *sb, unsigned long);
2482int journal_join_abort(struct reiserfs_transaction_handle *,
2483 struct super_block *sb, unsigned long);
2484void reiserfs_abort_journal(struct super_block *sb, int errno);
2485void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
2486int reiserfs_allocate_list_bitmaps(struct super_block *s,
2487 struct reiserfs_list_bitmap *, unsigned int);
2488
2489void add_save_link(struct reiserfs_transaction_handle *th,
2490 struct inode *inode, int truncate);
2491int remove_save_link(struct inode *inode, int truncate);
2492
2493/* objectid.c */
2494__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th);
2495void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
2496 __u32 objectid_to_release);
2497int reiserfs_convert_objectid_map_v1(struct super_block *);
2498
2499/* stree.c */
2500int B_IS_IN_TREE(const struct buffer_head *);
2501extern void copy_item_head(struct item_head *to,
2502 const struct item_head *from);
2503
2504// first key is in cpu form, second - le
2505extern int comp_short_keys(const struct reiserfs_key *le_key,
2506 const struct cpu_key *cpu_key);
2507extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
2508
2509// both are in le form
2510extern int comp_le_keys(const struct reiserfs_key *,
2511 const struct reiserfs_key *);
2512extern int comp_short_le_keys(const struct reiserfs_key *,
2513 const struct reiserfs_key *);
2514
2515//
2516// get key version from on disk key - kludge
2517//
2518static inline int le_key_version(const struct reiserfs_key *key)
2519{
2520 int type;
2521
2522 type = offset_v2_k_type(&(key->u.k_offset_v2));
2523 if (type != TYPE_DIRECT && type != TYPE_INDIRECT
2524 && type != TYPE_DIRENTRY)
2525 return KEY_FORMAT_3_5;
2526
2527 return KEY_FORMAT_3_6;
2528
2529}
2530
2531static inline void copy_key(struct reiserfs_key *to,
2532 const struct reiserfs_key *from)
2533{
2534 memcpy(to, from, KEY_SIZE);
2535}
2536
2537int comp_items(const struct item_head *stored_ih, const struct treepath *path);
2538const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
2539 const struct super_block *sb);
2540int search_by_key(struct super_block *, const struct cpu_key *,
2541 struct treepath *, int);
2542#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL)
2543int search_for_position_by_key(struct super_block *sb,
2544 const struct cpu_key *cpu_key,
2545 struct treepath *search_path);
2546extern void decrement_bcount(struct buffer_head *bh);
2547void decrement_counters_in_path(struct treepath *search_path);
2548void pathrelse(struct treepath *search_path);
2549int reiserfs_check_path(struct treepath *p);
2550void pathrelse_and_restore(struct super_block *s, struct treepath *search_path);
2551
2552int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2553 struct treepath *path,
2554 const struct cpu_key *key,
2555 struct item_head *ih,
2556 struct inode *inode, const char *body);
2557
2558int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2559 struct treepath *path,
2560 const struct cpu_key *key,
2561 struct inode *inode,
2562 const char *body, int paste_size);
2563
2564int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
2565 struct treepath *path,
2566 struct cpu_key *key,
2567 struct inode *inode,
2568 struct page *page, loff_t new_file_size);
2569
2570int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
2571 struct treepath *path,
2572 const struct cpu_key *key,
2573 struct inode *inode, struct buffer_head *un_bh);
2574
2575void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
2576 struct inode *inode, struct reiserfs_key *key);
2577int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
2578 struct inode *inode);
2579int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
2580 struct inode *inode, struct page *,
2581 int update_timestamps);
2582
2583#define i_block_size(inode) ((inode)->i_sb->s_blocksize)
2584#define file_size(inode) ((inode)->i_size)
2585#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1))
2586
2587#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\
2588!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 )
2589
2590void padd_item(char *item, int total_length, int length);
2591
2592/* inode.c */
2593/* args for the create parameter of reiserfs_get_block */
2594#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
2595#define GET_BLOCK_CREATE 1 /* add anything you need to find block */
2596#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
2597#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
2598#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */
2599#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */
2600
2601void reiserfs_read_locked_inode(struct inode *inode,
2602 struct reiserfs_iget_args *args);
2603int reiserfs_find_actor(struct inode *inode, void *p);
2604int reiserfs_init_locked_inode(struct inode *inode, void *p);
2605void reiserfs_evict_inode(struct inode *inode);
2606int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2607int reiserfs_get_block(struct inode *inode, sector_t block,
2608 struct buffer_head *bh_result, int create);
2609struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
2610 int fh_len, int fh_type);
2611struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
2612 int fh_len, int fh_type);
2613int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
2614 int connectable);
2615
2616int reiserfs_truncate_file(struct inode *, int update_timestamps);
2617void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset,
2618 int type, int key_length);
2619void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
2620 int version,
2621 loff_t offset, int type, int length, int entry_count);
2622struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key);
2623
2624struct reiserfs_security_handle;
2625int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
2626 struct inode *dir, umode_t mode,
2627 const char *symname, loff_t i_size,
2628 struct dentry *dentry, struct inode *inode,
2629 struct reiserfs_security_handle *security);
2630
2631void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
2632 struct inode *inode, loff_t size);
2633
2634static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
2635 struct inode *inode)
2636{
2637 reiserfs_update_sd_size(th, inode, inode->i_size);
2638}
2639
2640void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
2641void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
2642int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
2643
2644int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
2645
2646/* namei.c */
2647void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
2648int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
2649 struct treepath *path, struct reiserfs_dir_entry *de);
2650struct dentry *reiserfs_get_parent(struct dentry *);
2651
2652#ifdef CONFIG_REISERFS_PROC_INFO
2653int reiserfs_proc_info_init(struct super_block *sb);
2654int reiserfs_proc_info_done(struct super_block *sb);
2655int reiserfs_proc_info_global_init(void);
2656int reiserfs_proc_info_global_done(void);
2657
2658#define PROC_EXP( e ) e
2659
2660#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data
2661#define PROC_INFO_MAX( sb, field, value ) \
2662 __PINFO( sb ).field = \
2663 max( REISERFS_SB( sb ) -> s_proc_info_data.field, value )
2664#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) )
2665#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) )
2666#define PROC_INFO_BH_STAT( sb, bh, level ) \
2667 PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \
2668 PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \
2669 PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) )
2670#else
2671static inline int reiserfs_proc_info_init(struct super_block *sb)
2672{
2673 return 0;
2674}
2675
2676static inline int reiserfs_proc_info_done(struct super_block *sb)
2677{
2678 return 0;
2679}
2680
2681static inline int reiserfs_proc_info_global_init(void)
2682{
2683 return 0;
2684}
2685
2686static inline int reiserfs_proc_info_global_done(void)
2687{
2688 return 0;
2689}
2690
2691#define PROC_EXP( e )
2692#define VOID_V ( ( void ) 0 )
2693#define PROC_INFO_MAX( sb, field, value ) VOID_V
2694#define PROC_INFO_INC( sb, field ) VOID_V
2695#define PROC_INFO_ADD( sb, field, val ) VOID_V
2696#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V
2697#endif
2698
2699/* dir.c */
2700extern const struct inode_operations reiserfs_dir_inode_operations;
2701extern const struct inode_operations reiserfs_symlink_inode_operations;
2702extern const struct inode_operations reiserfs_special_inode_operations;
2703extern const struct file_operations reiserfs_dir_operations;
2704int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *);
2705
2706/* tail_conversion.c */
2707int direct2indirect(struct reiserfs_transaction_handle *, struct inode *,
2708 struct treepath *, struct buffer_head *, loff_t);
2709int indirect2direct(struct reiserfs_transaction_handle *, struct inode *,
2710 struct page *, struct treepath *, const struct cpu_key *,
2711 loff_t, char *);
2712void reiserfs_unmap_buffer(struct buffer_head *);
2713
2714/* file.c */
2715extern const struct inode_operations reiserfs_file_inode_operations;
2716extern const struct file_operations reiserfs_file_operations;
2717extern const struct address_space_operations reiserfs_address_space_operations;
2718
2719/* fix_nodes.c */
2720
2721int fix_nodes(int n_op_mode, struct tree_balance *tb,
2722 struct item_head *ins_ih, const void *);
2723void unfix_nodes(struct tree_balance *);
2724
2725/* prints.c */
2726void __reiserfs_panic(struct super_block *s, const char *id,
2727 const char *function, const char *fmt, ...)
2728 __attribute__ ((noreturn));
2729#define reiserfs_panic(s, id, fmt, args...) \
2730 __reiserfs_panic(s, id, __func__, fmt, ##args)
2731void __reiserfs_error(struct super_block *s, const char *id,
2732 const char *function, const char *fmt, ...);
2733#define reiserfs_error(s, id, fmt, args...) \
2734 __reiserfs_error(s, id, __func__, fmt, ##args)
2735void reiserfs_info(struct super_block *s, const char *fmt, ...);
2736void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...);
2737void print_indirect_item(struct buffer_head *bh, int item_num);
2738void store_print_tb(struct tree_balance *tb);
2739void print_cur_tb(char *mes);
2740void print_de(struct reiserfs_dir_entry *de);
2741void print_bi(struct buffer_info *bi, char *mes);
2742#define PRINT_LEAF_ITEMS 1 /* print all items */
2743#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */
2744#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */
2745void print_block(struct buffer_head *bh, ...);
2746void print_bmap(struct super_block *s, int silent);
2747void print_bmap_block(int i, char *data, int size, int silent);
2748/*void print_super_block (struct super_block * s, char * mes);*/
2749void print_objectid_map(struct super_block *s);
2750void print_block_head(struct buffer_head *bh, char *mes);
2751void check_leaf(struct buffer_head *bh);
2752void check_internal(struct buffer_head *bh);
2753void print_statistics(struct super_block *s);
2754char *reiserfs_hashname(int code);
2755
2756/* lbalance.c */
2757int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
2758 int mov_bytes, struct buffer_head *Snew);
2759int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes);
2760int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes);
2761void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first,
2762 int del_num, int del_bytes);
2763void leaf_insert_into_buf(struct buffer_info *bi, int before,
2764 struct item_head *inserted_item_ih,
2765 const char *inserted_item_body, int zeros_number);
2766void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
2767 int pos_in_item, int paste_size, const char *body,
2768 int zeros_number);
2769void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
2770 int pos_in_item, int cut_size);
2771void leaf_paste_entries(struct buffer_info *bi, int item_num, int before,
2772 int new_entry_count, struct reiserfs_de_head *new_dehs,
2773 const char *records, int paste_size);
2774/* ibalance.c */
2775int balance_internal(struct tree_balance *, int, int, struct item_head *,
2776 struct buffer_head **);
2777
2778/* do_balance.c */
2779void do_balance_mark_leaf_dirty(struct tree_balance *tb,
2780 struct buffer_head *bh, int flag);
2781#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
2782#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
2783
2784void do_balance(struct tree_balance *tb, struct item_head *ih,
2785 const char *body, int flag);
2786void reiserfs_invalidate_buffer(struct tree_balance *tb,
2787 struct buffer_head *bh);
2788
2789int get_left_neighbor_position(struct tree_balance *tb, int h);
2790int get_right_neighbor_position(struct tree_balance *tb, int h);
2791void replace_key(struct tree_balance *tb, struct buffer_head *, int,
2792 struct buffer_head *, int);
2793void make_empty_node(struct buffer_info *);
2794struct buffer_head *get_FEB(struct tree_balance *);
2795
2796/* bitmap.c */
2797
2798/* structure contains hints for block allocator, and it is a container for
2799 * arguments, such as node, search path, transaction_handle, etc. */
2800struct __reiserfs_blocknr_hint {
2801 struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */
2802 sector_t block; /* file offset, in blocks */
2803 struct in_core_key key;
2804 struct treepath *path; /* search path, used by allocator to deternine search_start by
2805 * various ways */
2806 struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and
2807 * bitmap blocks changes */
2808 b_blocknr_t beg, end;
2809 b_blocknr_t search_start; /* a field used to transfer search start value (block number)
2810 * between different block allocator procedures
2811 * (determine_search_start() and others) */
2812 int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed
2813 * function that do actual allocation */
2814
2815 unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for
2816 * formatted/unformatted blocks with/without preallocation */
2817 unsigned preallocate:1;
2818};
2819
2820typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t;
2821
2822int reiserfs_parse_alloc_options(struct super_block *, char *);
2823void reiserfs_init_alloc_options(struct super_block *s);
2824
2825/*
2826 * given a directory, this will tell you what packing locality
2827 * to use for a new object underneat it. The locality is returned
2828 * in disk byte order (le).
2829 */
2830__le32 reiserfs_choose_packing(struct inode *dir);
2831
2832int reiserfs_init_bitmap_cache(struct super_block *sb);
2833void reiserfs_free_bitmap_cache(struct super_block *sb);
2834void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info);
2835struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap);
2836int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
2837void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *,
2838 b_blocknr_t, int for_unformatted);
2839int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int,
2840 int);
2841static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
2842 b_blocknr_t * new_blocknrs,
2843 int amount_needed)
2844{
2845 reiserfs_blocknr_hint_t hint = {
2846 .th = tb->transaction_handle,
2847 .path = tb->tb_path,
2848 .inode = NULL,
2849 .key = tb->key,
2850 .block = 0,
2851 .formatted_node = 1
2852 };
2853 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed,
2854 0);
2855}
2856
2857static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
2858 *th, struct inode *inode,
2859 b_blocknr_t * new_blocknrs,
2860 struct treepath *path,
2861 sector_t block)
2862{
2863 reiserfs_blocknr_hint_t hint = {
2864 .th = th,
2865 .path = path,
2866 .inode = inode,
2867 .block = block,
2868 .formatted_node = 0,
2869 .preallocate = 0
2870 };
2871 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
2872}
2873
2874#ifdef REISERFS_PREALLOCATE
2875static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
2876 *th, struct inode *inode,
2877 b_blocknr_t * new_blocknrs,
2878 struct treepath *path,
2879 sector_t block)
2880{
2881 reiserfs_blocknr_hint_t hint = {
2882 .th = th,
2883 .path = path,
2884 .inode = inode,
2885 .block = block,
2886 .formatted_node = 0,
2887 .preallocate = 1
2888 };
2889 return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
2890}
2891
2892void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
2893 struct inode *inode);
2894void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th);
2895#endif
2896
2897/* hashes.c */
2898__u32 keyed_hash(const signed char *msg, int len);
2899__u32 yura_hash(const signed char *msg, int len);
2900__u32 r5_hash(const signed char *msg, int len);
2901
2902#define reiserfs_set_le_bit __set_bit_le
2903#define reiserfs_test_and_set_le_bit __test_and_set_bit_le
2904#define reiserfs_clear_le_bit __clear_bit_le
2905#define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le
2906#define reiserfs_test_le_bit test_bit_le
2907#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le
2908
2909/* sometimes reiserfs_truncate may require to allocate few new blocks
2910 to perform indirect2direct conversion. People probably used to
2911 think, that truncate should work without problems on a filesystem
2912 without free disk space. They may complain that they can not
2913 truncate due to lack of free disk space. This spare space allows us
2914 to not worry about it. 500 is probably too much, but it should be
2915 absolutely safe */
2916#define SPARE_SPACE 500
2917
2918/* prototypes from ioctl.c */
2919long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
2920long reiserfs_compat_ioctl(struct file *filp,
2921 unsigned int cmd, unsigned long arg);
2922int reiserfs_unpack(struct inode *inode, struct file *filp);
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 7483279b482d..9a17f63c3fd7 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -13,8 +13,7 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/reiserfs_fs.h> 16#include "reiserfs.h"
17#include <linux/reiserfs_fs_sb.h>
18#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
19 18
20int reiserfs_resize(struct super_block *s, unsigned long block_count_new) 19int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 313d39d639eb..f8afa4b162b8 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -51,7 +51,7 @@
51#include <linux/time.h> 51#include <linux/time.h>
52#include <linux/string.h> 52#include <linux/string.h>
53#include <linux/pagemap.h> 53#include <linux/pagemap.h>
54#include <linux/reiserfs_fs.h> 54#include "reiserfs.h"
55#include <linux/buffer_head.h> 55#include <linux/buffer_head.h>
56#include <linux/quotaops.h> 56#include <linux/quotaops.h>
57 57
@@ -1284,12 +1284,12 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1284 ** -clm 1284 ** -clm
1285 */ 1285 */
1286 1286
1287 data = kmap_atomic(un_bh->b_page, KM_USER0); 1287 data = kmap_atomic(un_bh->b_page);
1288 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); 1288 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1289 memcpy(data + off, 1289 memcpy(data + off,
1290 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), 1290 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1291 ret_value); 1291 ret_value);
1292 kunmap_atomic(data, KM_USER0); 1292 kunmap_atomic(data);
1293 } 1293 }
1294 /* Perform balancing after all resources have been collected at once. */ 1294 /* Perform balancing after all resources have been collected at once. */
1295 do_balance(&s_del_balance, NULL, NULL, M_DELETE); 1295 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e12d8b97cd4d..8b7616ef06d8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -16,9 +16,9 @@
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/time.h> 17#include <linux/time.h>
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <linux/reiserfs_fs.h> 19#include "reiserfs.h"
20#include <linux/reiserfs_acl.h> 20#include "acl.h"
21#include <linux/reiserfs_xattr.h> 21#include "xattr.h"
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/blkdev.h> 23#include <linux/blkdev.h>
24#include <linux/buffer_head.h> 24#include <linux/buffer_head.h>
@@ -1874,11 +1874,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1874 unlock_new_inode(root_inode); 1874 unlock_new_inode(root_inode);
1875 } 1875 }
1876 1876
1877 s->s_root = d_alloc_root(root_inode); 1877 s->s_root = d_make_root(root_inode);
1878 if (!s->s_root) { 1878 if (!s->s_root)
1879 iput(root_inode);
1880 goto error; 1879 goto error;
1881 }
1882 // define and initialize hash function 1880 // define and initialize hash function
1883 sbi->s_hash_function = hash_function(s); 1881 sbi->s_hash_function = hash_function(s);
1884 if (sbi->s_hash_function == NULL) { 1882 if (sbi->s_hash_function == NULL) {
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index d7f6e51bef2a..5e2624d12f70 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -5,7 +5,7 @@
5#include <linux/time.h> 5#include <linux/time.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/buffer_head.h> 7#include <linux/buffer_head.h>
8#include <linux/reiserfs_fs.h> 8#include "reiserfs.h"
9 9
10/* access to tail : when one is going to read tail it must make sure, that is not running. 10/* access to tail : when one is going to read tail it must make sure, that is not running.
11 direct2indirect and indirect2direct can not run concurrently */ 11 direct2indirect and indirect2direct can not run concurrently */
@@ -128,9 +128,9 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
128 if (up_to_date_bh) { 128 if (up_to_date_bh) {
129 unsigned pgoff = 129 unsigned pgoff =
130 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); 130 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
131 char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0); 131 char *kaddr = kmap_atomic(up_to_date_bh->b_page);
132 memset(kaddr + pgoff, 0, blk_size - total_tail); 132 memset(kaddr + pgoff, 0, blk_size - total_tail);
133 kunmap_atomic(kaddr, KM_USER0); 133 kunmap_atomic(kaddr);
134 } 134 }
135 135
136 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 136 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c24deda8a8bc..46fc1c20a6b1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -33,7 +33,7 @@
33 * The xattrs themselves are protected by the xattr_sem. 33 * The xattrs themselves are protected by the xattr_sem.
34 */ 34 */
35 35
36#include <linux/reiserfs_fs.h> 36#include "reiserfs.h"
37#include <linux/capability.h> 37#include <linux/capability.h>
38#include <linux/dcache.h> 38#include <linux/dcache.h>
39#include <linux/namei.h> 39#include <linux/namei.h>
@@ -43,8 +43,8 @@
43#include <linux/file.h> 43#include <linux/file.h>
44#include <linux/pagemap.h> 44#include <linux/pagemap.h>
45#include <linux/xattr.h> 45#include <linux/xattr.h>
46#include <linux/reiserfs_xattr.h> 46#include "xattr.h"
47#include <linux/reiserfs_acl.h> 47#include "acl.h"
48#include <asm/uaccess.h> 48#include <asm/uaccess.h>
49#include <net/checksum.h> 49#include <net/checksum.h>
50#include <linux/stat.h> 50#include <linux/stat.h>
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
new file mode 100644
index 000000000000..f59626c5d33b
--- /dev/null
+++ b/fs/reiserfs/xattr.h
@@ -0,0 +1,122 @@
1#include <linux/reiserfs_xattr.h>
2#include <linux/init.h>
3#include <linux/list.h>
4#include <linux/rwsem.h>
5
6struct inode;
7struct dentry;
8struct iattr;
9struct super_block;
10struct nameidata;
11
12int reiserfs_xattr_register_handlers(void) __init;
13void reiserfs_xattr_unregister_handlers(void);
14int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
15int reiserfs_lookup_privroot(struct super_block *sb);
16int reiserfs_delete_xattrs(struct inode *inode);
17int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
18int reiserfs_permission(struct inode *inode, int mask);
19
20#ifdef CONFIG_REISERFS_FS_XATTR
21#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
22ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name,
23 void *buffer, size_t size);
24int reiserfs_setxattr(struct dentry *dentry, const char *name,
25 const void *value, size_t size, int flags);
26ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
27int reiserfs_removexattr(struct dentry *dentry, const char *name);
28
29int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
30int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
31int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *,
32 struct inode *, const char *, const void *,
33 size_t, int);
34
35extern const struct xattr_handler reiserfs_xattr_user_handler;
36extern const struct xattr_handler reiserfs_xattr_trusted_handler;
37extern const struct xattr_handler reiserfs_xattr_security_handler;
38#ifdef CONFIG_REISERFS_FS_SECURITY
39int reiserfs_security_init(struct inode *dir, struct inode *inode,
40 const struct qstr *qstr,
41 struct reiserfs_security_handle *sec);
42int reiserfs_security_write(struct reiserfs_transaction_handle *th,
43 struct inode *inode,
44 struct reiserfs_security_handle *sec);
45void reiserfs_security_free(struct reiserfs_security_handle *sec);
46#endif
47
48static inline int reiserfs_xattrs_initialized(struct super_block *sb)
49{
50 return REISERFS_SB(sb)->priv_root != NULL;
51}
52
53#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
54static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
55{
56 loff_t ret = 0;
57 if (reiserfs_file_data_log(inode)) {
58 ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize);
59 ret >>= inode->i_sb->s_blocksize_bits;
60 }
61 return ret;
62}
63
64/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
65 * Let's try to be smart about it.
66 * xattr root: We cache it. If it's not cached, we may need to create it.
67 * xattr dir: If anything has been loaded for this inode, we can set a flag
68 * saying so.
69 * xattr file: Since we don't cache xattrs, we can't tell. We always include
70 * blocks for it.
71 *
72 * However, since root and dir can be created between calls - YOU MUST SAVE
73 * THIS VALUE.
74 */
75static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
76{
77 size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
78
79 if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
80 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
81 if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode)
82 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
83 }
84
85 return nblocks;
86}
87
88static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
89{
90 init_rwsem(&REISERFS_I(inode)->i_xattr_sem);
91}
92
93#else
94
95#define reiserfs_getxattr NULL
96#define reiserfs_setxattr NULL
97#define reiserfs_listxattr NULL
98#define reiserfs_removexattr NULL
99
100static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
101{
102}
103#endif /* CONFIG_REISERFS_FS_XATTR */
104
105#ifndef CONFIG_REISERFS_FS_SECURITY
106static inline int reiserfs_security_init(struct inode *dir,
107 struct inode *inode,
108 const struct qstr *qstr,
109 struct reiserfs_security_handle *sec)
110{
111 return 0;
112}
113static inline int
114reiserfs_security_write(struct reiserfs_transaction_handle *th,
115 struct inode *inode,
116 struct reiserfs_security_handle *sec)
117{
118 return 0;
119}
120static inline void reiserfs_security_free(struct reiserfs_security_handle *sec)
121{}
122#endif
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 6da0396e5052..44474f9b990d 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -1,14 +1,14 @@
1#include <linux/capability.h> 1#include <linux/capability.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/posix_acl.h> 3#include <linux/posix_acl.h>
4#include <linux/reiserfs_fs.h> 4#include "reiserfs.h"
5#include <linux/errno.h> 5#include <linux/errno.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/xattr.h> 7#include <linux/xattr.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/posix_acl_xattr.h> 9#include <linux/posix_acl_xattr.h>
10#include <linux/reiserfs_xattr.h> 10#include "xattr.h"
11#include <linux/reiserfs_acl.h> 11#include "acl.h"
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13 13
14static int reiserfs_set_acl(struct reiserfs_transaction_handle *th, 14static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 534668fa41be..800a3cef6f62 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -1,10 +1,10 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/errno.h> 2#include <linux/errno.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <linux/reiserfs_xattr.h> 7#include "xattr.h"
8#include <linux/security.h> 8#include <linux/security.h>
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 9883736ce3ec..a0035719f66b 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -1,10 +1,10 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/capability.h> 2#include <linux/capability.h>
3#include <linux/errno.h> 3#include <linux/errno.h>
4#include <linux/fs.h> 4#include <linux/fs.h>
5#include <linux/pagemap.h> 5#include <linux/pagemap.h>
6#include <linux/xattr.h> 6#include <linux/xattr.h>
7#include <linux/reiserfs_xattr.h> 7#include "xattr.h"
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 45ae1a00013a..8667491ae7c3 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -1,9 +1,9 @@
1#include <linux/reiserfs_fs.h> 1#include "reiserfs.h"
2#include <linux/errno.h> 2#include <linux/errno.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/reiserfs_xattr.h> 6#include "xattr.h"
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9static int 9static int
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index bb36ab74eb45..e64f6b5f7ae5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -538,14 +538,12 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
538 if (IS_ERR(root)) 538 if (IS_ERR(root))
539 goto error; 539 goto error;
540 540
541 sb->s_root = d_alloc_root(root); 541 sb->s_root = d_make_root(root);
542 if (!sb->s_root) 542 if (!sb->s_root)
543 goto error_i; 543 goto error;
544 544
545 return 0; 545 return 0;
546 546
547error_i:
548 iput(root);
549error: 547error:
550 return -EINVAL; 548 return -EINVAL;
551error_rsb_inval: 549error_rsb_inval:
diff --git a/fs/select.c b/fs/select.c
index d33418fdc858..e782258d0de3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
912} 912}
913 913
914SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, 914SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
915 long, timeout_msecs) 915 int, timeout_msecs)
916{ 916{
917 struct timespec end_time, *to = NULL; 917 struct timespec end_time, *to = NULL;
918 int ret; 918 int ret;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4023d6be939b..aa242dc99373 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -140,9 +140,21 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
140 140
141 mutex_lock(&m->lock); 141 mutex_lock(&m->lock);
142 142
143 /*
144 * seq_file->op->..m_start/m_stop/m_next may do special actions
145 * or optimisations based on the file->f_version, so we want to
146 * pass the file->f_version to those methods.
147 *
148 * seq_file->version is just copy of f_version, and seq_file
149 * methods can treat it simply as file version.
150 * It is copied in first and copied out after all operations.
151 * It is convenient to have it as part of structure to avoid the
152 * need of passing another argument to all the seq_file methods.
153 */
154 m->version = file->f_version;
155
143 /* Don't assume *ppos is where we left it */ 156 /* Don't assume *ppos is where we left it */
144 if (unlikely(*ppos != m->read_pos)) { 157 if (unlikely(*ppos != m->read_pos)) {
145 m->read_pos = *ppos;
146 while ((err = traverse(m, *ppos)) == -EAGAIN) 158 while ((err = traverse(m, *ppos)) == -EAGAIN)
147 ; 159 ;
148 if (err) { 160 if (err) {
@@ -152,21 +164,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
152 m->index = 0; 164 m->index = 0;
153 m->count = 0; 165 m->count = 0;
154 goto Done; 166 goto Done;
167 } else {
168 m->read_pos = *ppos;
155 } 169 }
156 } 170 }
157 171
158 /*
159 * seq_file->op->..m_start/m_stop/m_next may do special actions
160 * or optimisations based on the file->f_version, so we want to
161 * pass the file->f_version to those methods.
162 *
163 * seq_file->version is just copy of f_version, and seq_file
164 * methods can treat it simply as file version.
165 * It is copied in first and copied out after all operations.
166 * It is convenient to have it as part of structure to avoid the
167 * need of passing another argument to all the seq_file methods.
168 */
169 m->version = file->f_version;
170 /* grab buffer if we didn't have one */ 172 /* grab buffer if we didn't have one */
171 if (!m->buf) { 173 if (!m->buf) {
172 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); 174 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451dd..7ae2a574cb25 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,21 @@
30#include <linux/signalfd.h> 30#include <linux/signalfd.h>
31#include <linux/syscalls.h> 31#include <linux/syscalls.h>
32 32
33void signalfd_cleanup(struct sighand_struct *sighand)
34{
35 wait_queue_head_t *wqh = &sighand->signalfd_wqh;
36 /*
37 * The lockless check can race with remove_wait_queue() in progress,
38 * but in this case its caller should run under rcu_read_lock() and
39 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
40 */
41 if (likely(!waitqueue_active(wqh)))
42 return;
43
44 /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
45 wake_up_poll(wqh, POLLHUP | POLLFREE);
46}
47
33struct signalfd_ctx { 48struct signalfd_ctx {
34 sigset_t sigmask; 49 sigset_t sigmask;
35}; 50};
diff --git a/fs/splice.c b/fs/splice.c
index 1ec0493266b3..f16402ed915c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -737,15 +737,12 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
737 goto out; 737 goto out;
738 738
739 if (buf->page != page) { 739 if (buf->page != page) {
740 /*
741 * Careful, ->map() uses KM_USER0!
742 */
743 char *src = buf->ops->map(pipe, buf, 1); 740 char *src = buf->ops->map(pipe, buf, 1);
744 char *dst = kmap_atomic(page, KM_USER1); 741 char *dst = kmap_atomic(page);
745 742
746 memcpy(dst + offset, src + buf->offset, this_len); 743 memcpy(dst + offset, src + buf->offset, this_len);
747 flush_dcache_page(page); 744 flush_dcache_page(page);
748 kunmap_atomic(dst, KM_USER1); 745 kunmap_atomic(dst);
749 buf->ops->unmap(pipe, buf, src); 746 buf->ops->unmap(pipe, buf, src);
750 } 747 }
751 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, 748 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 38bb1c640559..8ca62c28fe12 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -464,10 +464,10 @@ static int squashfs_readpage(struct file *file, struct page *page)
464 if (PageUptodate(push_page)) 464 if (PageUptodate(push_page))
465 goto skip_page; 465 goto skip_page;
466 466
467 pageaddr = kmap_atomic(push_page, KM_USER0); 467 pageaddr = kmap_atomic(push_page);
468 squashfs_copy_data(pageaddr, buffer, offset, avail); 468 squashfs_copy_data(pageaddr, buffer, offset, avail);
469 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); 469 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
470 kunmap_atomic(pageaddr, KM_USER0); 470 kunmap_atomic(pageaddr);
471 flush_dcache_page(push_page); 471 flush_dcache_page(push_page);
472 SetPageUptodate(push_page); 472 SetPageUptodate(push_page);
473skip_page: 473skip_page:
@@ -484,9 +484,9 @@ skip_page:
484error_out: 484error_out:
485 SetPageError(page); 485 SetPageError(page);
486out: 486out:
487 pageaddr = kmap_atomic(page, KM_USER0); 487 pageaddr = kmap_atomic(page);
488 memset(pageaddr, 0, PAGE_CACHE_SIZE); 488 memset(pageaddr, 0, PAGE_CACHE_SIZE);
489 kunmap_atomic(pageaddr, KM_USER0); 489 kunmap_atomic(pageaddr);
490 flush_dcache_page(page); 490 flush_dcache_page(page);
491 if (!PageError(page)) 491 if (!PageError(page))
492 SetPageUptodate(page); 492 SetPageUptodate(page);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index ecaa2f7bdb8f..970b1167e7cb 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -316,11 +316,10 @@ check_directory_table:
316 } 316 }
317 insert_inode_hash(root); 317 insert_inode_hash(root);
318 318
319 sb->s_root = d_alloc_root(root); 319 sb->s_root = d_make_root(root);
320 if (sb->s_root == NULL) { 320 if (sb->s_root == NULL) {
321 ERROR("Root inode create failed\n"); 321 ERROR("Root inode create failed\n");
322 err = -ENOMEM; 322 err = -ENOMEM;
323 iput(root);
324 goto failed_mount; 323 goto failed_mount;
325 } 324 }
326 325
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 1191817264cc..12806dffb345 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -90,14 +90,14 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
90 goto error_out; 90 goto error_out;
91 } 91 }
92 92
93 pageaddr = kmap_atomic(page, KM_USER0); 93 pageaddr = kmap_atomic(page);
94 copied = squashfs_copy_data(pageaddr + bytes, entry, offset, 94 copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
95 length - bytes); 95 length - bytes);
96 if (copied == length - bytes) 96 if (copied == length - bytes)
97 memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length); 97 memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
98 else 98 else
99 block = entry->next_index; 99 block = entry->next_index;
100 kunmap_atomic(pageaddr, KM_USER0); 100 kunmap_atomic(pageaddr);
101 squashfs_cache_put(entry); 101 squashfs_cache_put(entry);
102 } 102 }
103 103
diff --git a/fs/stat.c b/fs/stat.c
index 8806b8997d2e..86f13563a463 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -307,7 +307,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
307 if (inode->i_op->readlink) { 307 if (inode->i_op->readlink) {
308 error = security_inode_readlink(path.dentry); 308 error = security_inode_readlink(path.dentry);
309 if (!error) { 309 if (!error) {
310 touch_atime(path.mnt, path.dentry); 310 touch_atime(&path);
311 error = inode->i_op->readlink(path.dentry, 311 error = inode->i_op->readlink(path.dentry,
312 buf, bufsiz); 312 buf, bufsiz);
313 } 313 }
diff --git a/fs/super.c b/fs/super.c
index 6015c02296b7..d90e900a8a0e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -32,6 +32,7 @@
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h> 33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h> 34#include <linux/cleancache.h>
35#include <linux/fsnotify.h>
35#include "internal.h" 36#include "internal.h"
36 37
37 38
@@ -634,6 +635,28 @@ rescan:
634EXPORT_SYMBOL(get_super); 635EXPORT_SYMBOL(get_super);
635 636
636/** 637/**
638 * get_super_thawed - get thawed superblock of a device
639 * @bdev: device to get the superblock for
640 *
641 * Scans the superblock list and finds the superblock of the file system
642 * mounted on the device. The superblock is returned once it is thawed
643 * (or immediately if it was not frozen). %NULL is returned if no match
644 * is found.
645 */
646struct super_block *get_super_thawed(struct block_device *bdev)
647{
648 while (1) {
649 struct super_block *s = get_super(bdev);
650 if (!s || s->s_frozen == SB_UNFROZEN)
651 return s;
652 up_read(&s->s_umount);
653 vfs_check_frozen(s, SB_FREEZE_WRITE);
654 put_super(s);
655 }
656}
657EXPORT_SYMBOL(get_super_thawed);
658
659/**
637 * get_active_super - get an active reference to the superblock of a device 660 * get_active_super - get an active reference to the superblock of a device
638 * @bdev: device to get the superblock for 661 * @bdev: device to get the superblock for
639 * 662 *
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7fdf6a7b7436..2a7a3f5d1ca6 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -22,76 +22,103 @@
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/security.h> 24#include <linux/security.h>
25#include <linux/hash.h>
25#include "sysfs.h" 26#include "sysfs.h"
26 27
27DEFINE_MUTEX(sysfs_mutex); 28DEFINE_MUTEX(sysfs_mutex);
28DEFINE_SPINLOCK(sysfs_assoc_lock); 29DEFINE_SPINLOCK(sysfs_assoc_lock);
29 30
31#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb);
32
30static DEFINE_SPINLOCK(sysfs_ino_lock); 33static DEFINE_SPINLOCK(sysfs_ino_lock);
31static DEFINE_IDA(sysfs_ino_ida); 34static DEFINE_IDA(sysfs_ino_ida);
32 35
33/** 36/**
34 * sysfs_link_sibling - link sysfs_dirent into sibling list 37 * sysfs_name_hash
38 * @ns: Namespace tag to hash
39 * @name: Null terminated string to hash
40 *
41 * Returns 31 bit hash of ns + name (so it fits in an off_t )
42 */
43static unsigned int sysfs_name_hash(const void *ns, const char *name)
44{
45 unsigned long hash = init_name_hash();
46 unsigned int len = strlen(name);
47 while (len--)
48 hash = partial_name_hash(*name++, hash);
49 hash = ( end_name_hash(hash) ^ hash_ptr( (void *)ns, 31 ) );
50 hash &= 0x7fffffffU;
51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
52 if (hash < 1)
53 hash += 2;
54 if (hash >= INT_MAX)
55 hash = INT_MAX - 1;
56 return hash;
57}
58
59static int sysfs_name_compare(unsigned int hash, const void *ns,
60 const char *name, const struct sysfs_dirent *sd)
61{
62 if (hash != sd->s_hash)
63 return hash - sd->s_hash;
64 if (ns != sd->s_ns)
65 return ns - sd->s_ns;
66 return strcmp(name, sd->s_name);
67}
68
69static int sysfs_sd_compare(const struct sysfs_dirent *left,
70 const struct sysfs_dirent *right)
71{
72 return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name,
73 right);
74}
75
76/**
77 * sysfs_link_subling - link sysfs_dirent into sibling rbtree
35 * @sd: sysfs_dirent of interest 78 * @sd: sysfs_dirent of interest
36 * 79 *
37 * Link @sd into its sibling list which starts from 80 * Link @sd into its sibling rbtree which starts from
38 * sd->s_parent->s_dir.children. 81 * sd->s_parent->s_dir.children.
39 * 82 *
40 * Locking: 83 * Locking:
41 * mutex_lock(sysfs_mutex) 84 * mutex_lock(sysfs_mutex)
85 *
86 * RETURNS:
87 * 0 on susccess -EEXIST on failure.
42 */ 88 */
43static void sysfs_link_sibling(struct sysfs_dirent *sd) 89static int sysfs_link_sibling(struct sysfs_dirent *sd)
44{ 90{
45 struct sysfs_dirent *parent_sd = sd->s_parent; 91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
46 92 struct rb_node *parent = NULL;
47 struct rb_node **p;
48 struct rb_node *parent;
49 93
50 if (sysfs_type(sd) == SYSFS_DIR) 94 if (sysfs_type(sd) == SYSFS_DIR)
51 parent_sd->s_dir.subdirs++; 95 sd->s_parent->s_dir.subdirs++;
52 96
53 p = &parent_sd->s_dir.inode_tree.rb_node; 97 while (*node) {
54 parent = NULL; 98 struct sysfs_dirent *pos;
55 while (*p) { 99 int result;
56 parent = *p; 100
57#define node rb_entry(parent, struct sysfs_dirent, inode_node) 101 pos = to_sysfs_dirent(*node);
58 if (sd->s_ino < node->s_ino) { 102 parent = *node;
59 p = &node->inode_node.rb_left; 103 result = sysfs_sd_compare(sd, pos);
60 } else if (sd->s_ino > node->s_ino) { 104 if (result < 0)
61 p = &node->inode_node.rb_right; 105 node = &pos->s_rb.rb_left;
62 } else { 106 else if (result > 0)
63 printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n", 107 node = &pos->s_rb.rb_right;
64 (unsigned long) sd->s_ino); 108 else
65 BUG(); 109 return -EEXIST;
66 }
67#undef node
68 }
69 rb_link_node(&sd->inode_node, parent, p);
70 rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree);
71
72 p = &parent_sd->s_dir.name_tree.rb_node;
73 parent = NULL;
74 while (*p) {
75 int c;
76 parent = *p;
77#define node rb_entry(parent, struct sysfs_dirent, name_node)
78 c = strcmp(sd->s_name, node->s_name);
79 if (c < 0) {
80 p = &node->name_node.rb_left;
81 } else {
82 p = &node->name_node.rb_right;
83 }
84#undef node
85 } 110 }
86 rb_link_node(&sd->name_node, parent, p); 111 /* add new node and rebalance the tree */
87 rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree); 112 rb_link_node(&sd->s_rb, parent, node);
113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
114 return 0;
88} 115}
89 116
90/** 117/**
91 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list 118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
92 * @sd: sysfs_dirent of interest 119 * @sd: sysfs_dirent of interest
93 * 120 *
94 * Unlink @sd from its sibling list which starts from 121 * Unlink @sd from its sibling rbtree which starts from
95 * sd->s_parent->s_dir.children. 122 * sd->s_parent->s_dir.children.
96 * 123 *
97 * Locking: 124 * Locking:
@@ -102,8 +129,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
102 if (sysfs_type(sd) == SYSFS_DIR) 129 if (sysfs_type(sd) == SYSFS_DIR)
103 sd->s_parent->s_dir.subdirs--; 130 sd->s_parent->s_dir.subdirs--;
104 131
105 rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree); 132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
106 rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
107} 133}
108 134
109/** 135/**
@@ -198,7 +224,7 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
198 rwsem_release(&sd->dep_map, 1, _RET_IP_); 224 rwsem_release(&sd->dep_map, 1, _RET_IP_);
199} 225}
200 226
201static int sysfs_alloc_ino(ino_t *pino) 227static int sysfs_alloc_ino(unsigned int *pino)
202{ 228{
203 int ino, rc; 229 int ino, rc;
204 230
@@ -217,7 +243,7 @@ static int sysfs_alloc_ino(ino_t *pino)
217 return rc; 243 return rc;
218} 244}
219 245
220static void sysfs_free_ino(ino_t ino) 246static void sysfs_free_ino(unsigned int ino)
221{ 247{
222 spin_lock(&sysfs_ino_lock); 248 spin_lock(&sysfs_ino_lock);
223 ida_remove(&sysfs_ino_ida, ino); 249 ida_remove(&sysfs_ino_ida, ino);
@@ -402,6 +428,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
402int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 428int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
403{ 429{
404 struct sysfs_inode_attrs *ps_iattr; 430 struct sysfs_inode_attrs *ps_iattr;
431 int ret;
405 432
406 if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { 433 if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
407 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 434 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -410,12 +437,12 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
410 return -EINVAL; 437 return -EINVAL;
411 } 438 }
412 439
413 if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name)) 440 sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
414 return -EEXIST;
415
416 sd->s_parent = sysfs_get(acxt->parent_sd); 441 sd->s_parent = sysfs_get(acxt->parent_sd);
417 442
418 sysfs_link_sibling(sd); 443 ret = sysfs_link_sibling(sd);
444 if (ret)
445 return ret;
419 446
420 /* Update timestamps on the parent */ 447 /* Update timestamps on the parent */
421 ps_iattr = acxt->parent_sd->s_iattr; 448 ps_iattr = acxt->parent_sd->s_iattr;
@@ -565,8 +592,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
565 const void *ns, 592 const void *ns,
566 const unsigned char *name) 593 const unsigned char *name)
567{ 594{
568 struct rb_node *p = parent_sd->s_dir.name_tree.rb_node; 595 struct rb_node *node = parent_sd->s_dir.children.rb_node;
569 struct sysfs_dirent *found = NULL; 596 unsigned int hash;
570 597
571 if (!!sysfs_ns_type(parent_sd) != !!ns) { 598 if (!!sysfs_ns_type(parent_sd) != !!ns) {
572 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 599 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -575,33 +602,21 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
575 return NULL; 602 return NULL;
576 } 603 }
577 604
578 while (p) { 605 hash = sysfs_name_hash(ns, name);
579 int c; 606 while (node) {
580#define node rb_entry(p, struct sysfs_dirent, name_node) 607 struct sysfs_dirent *sd;
581 c = strcmp(name, node->s_name); 608 int result;
582 if (c < 0) { 609
583 p = node->name_node.rb_left; 610 sd = to_sysfs_dirent(node);
584 } else if (c > 0) { 611 result = sysfs_name_compare(hash, ns, name, sd);
585 p = node->name_node.rb_right; 612 if (result < 0)
586 } else { 613 node = node->rb_left;
587 found = node; 614 else if (result > 0)
588 p = node->name_node.rb_left; 615 node = node->rb_right;
589 } 616 else
590#undef node 617 return sd;
591 }
592
593 if (found) {
594 while (found->s_ns != ns) {
595 p = rb_next(&found->name_node);
596 if (!p)
597 return NULL;
598 found = rb_entry(p, struct sysfs_dirent, name_node);
599 if (strcmp(name, found->s_name))
600 return NULL;
601 }
602 } 618 }
603 619 return NULL;
604 return found;
605} 620}
606 621
607/** 622/**
@@ -804,9 +819,9 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
804 819
805 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); 820 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
806 sysfs_addrm_start(&acxt, dir_sd); 821 sysfs_addrm_start(&acxt, dir_sd);
807 pos = rb_first(&dir_sd->s_dir.inode_tree); 822 pos = rb_first(&dir_sd->s_dir.children);
808 while (pos) { 823 while (pos) {
809 struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node); 824 struct sysfs_dirent *sd = to_sysfs_dirent(pos);
810 pos = rb_next(pos); 825 pos = rb_next(pos);
811 if (sysfs_type(sd) != SYSFS_DIR) 826 if (sysfs_type(sd) != SYSFS_DIR)
812 sysfs_remove_one(&acxt, sd); 827 sysfs_remove_one(&acxt, sd);
@@ -863,6 +878,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
863 878
864 dup_name = sd->s_name; 879 dup_name = sd->s_name;
865 sd->s_name = new_name; 880 sd->s_name = new_name;
881 sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
866 } 882 }
867 883
868 /* Move to the appropriate place in the appropriate directories rbtree. */ 884 /* Move to the appropriate place in the appropriate directories rbtree. */
@@ -919,38 +935,36 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
919} 935}
920 936
921static struct sysfs_dirent *sysfs_dir_pos(const void *ns, 937static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
922 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) 938 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
923{ 939{
924 if (pos) { 940 if (pos) {
925 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && 941 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
926 pos->s_parent == parent_sd && 942 pos->s_parent == parent_sd &&
927 ino == pos->s_ino; 943 hash == pos->s_hash;
928 sysfs_put(pos); 944 sysfs_put(pos);
929 if (!valid) 945 if (!valid)
930 pos = NULL; 946 pos = NULL;
931 } 947 }
932 if (!pos && (ino > 1) && (ino < INT_MAX)) { 948 if (!pos && (hash > 1) && (hash < INT_MAX)) {
933 struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node; 949 struct rb_node *node = parent_sd->s_dir.children.rb_node;
934 while (p) { 950 while (node) {
935#define node rb_entry(p, struct sysfs_dirent, inode_node) 951 pos = to_sysfs_dirent(node);
936 if (ino < node->s_ino) { 952
937 pos = node; 953 if (hash < pos->s_hash)
938 p = node->inode_node.rb_left; 954 node = node->rb_left;
939 } else if (ino > node->s_ino) { 955 else if (hash > pos->s_hash)
940 p = node->inode_node.rb_right; 956 node = node->rb_right;
941 } else { 957 else
942 pos = node;
943 break; 958 break;
944 }
945#undef node
946 } 959 }
947 } 960 }
961 /* Skip over entries in the wrong namespace */
948 while (pos && pos->s_ns != ns) { 962 while (pos && pos->s_ns != ns) {
949 struct rb_node *p = rb_next(&pos->inode_node); 963 struct rb_node *node = rb_next(&pos->s_rb);
950 if (!p) 964 if (!node)
951 pos = NULL; 965 pos = NULL;
952 else 966 else
953 pos = rb_entry(p, struct sysfs_dirent, inode_node); 967 pos = to_sysfs_dirent(node);
954 } 968 }
955 return pos; 969 return pos;
956} 970}
@@ -960,11 +974,11 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
960{ 974{
961 pos = sysfs_dir_pos(ns, parent_sd, ino, pos); 975 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
962 if (pos) do { 976 if (pos) do {
963 struct rb_node *p = rb_next(&pos->inode_node); 977 struct rb_node *node = rb_next(&pos->s_rb);
964 if (!p) 978 if (!node)
965 pos = NULL; 979 pos = NULL;
966 else 980 else
967 pos = rb_entry(p, struct sysfs_dirent, inode_node); 981 pos = to_sysfs_dirent(node);
968 } while (pos && pos->s_ns != ns); 982 } while (pos && pos->s_ns != ns);
969 return pos; 983 return pos;
970} 984}
@@ -1006,7 +1020,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1006 len = strlen(name); 1020 len = strlen(name);
1007 ino = pos->s_ino; 1021 ino = pos->s_ino;
1008 type = dt_type(pos); 1022 type = dt_type(pos);
1009 filp->f_pos = ino; 1023 filp->f_pos = pos->s_hash;
1010 filp->private_data = sysfs_get(pos); 1024 filp->private_data = sysfs_get(pos);
1011 1025
1012 mutex_unlock(&sysfs_mutex); 1026 mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 85eb81683a29..feb2d69396cf 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec
136 void *old_secdata; 136 void *old_secdata;
137 size_t old_secdata_len; 137 size_t old_secdata_len;
138 138
139 iattrs = sd->s_iattr; 139 if (!sd->s_iattr) {
140 if (!iattrs) 140 sd->s_iattr = sysfs_init_inode_attrs(sd);
141 iattrs = sysfs_init_inode_attrs(sd); 141 if (!sd->s_iattr)
142 if (!iattrs) 142 return -ENOMEM;
143 return -ENOMEM; 143 }
144 144
145 iattrs = sd->s_iattr;
145 old_secdata = iattrs->ia_secdata; 146 old_secdata = iattrs->ia_secdata;
146 old_secdata_len = iattrs->ia_secdata_len; 147 old_secdata_len = iattrs->ia_secdata_len;
147 148
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index e34f0d99ea4e..52c3bdb66a84 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -36,7 +36,7 @@ struct sysfs_dirent sysfs_root = {
36 .s_name = "", 36 .s_name = "",
37 .s_count = ATOMIC_INIT(1), 37 .s_count = ATOMIC_INIT(1),
38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT), 38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
39 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 39 .s_mode = S_IFDIR | S_IRUGO | S_IXUGO,
40 .s_ino = 1, 40 .s_ino = 1,
41}; 41};
42 42
@@ -61,10 +61,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
61 } 61 }
62 62
63 /* instantiate and link root dentry */ 63 /* instantiate and link root dentry */
64 root = d_alloc_root(inode); 64 root = d_make_root(inode);
65 if (!root) { 65 if (!root) {
66 pr_debug("%s: could not get root dentry!\n",__func__); 66 pr_debug("%s: could not get root dentry!\n",__func__);
67 iput(inode);
68 return -ENOMEM; 67 return -ENOMEM;
69 } 68 }
70 root->d_fsdata = &sysfs_root; 69 root->d_fsdata = &sysfs_root;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7484a36ee678..661a9639570b 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -20,9 +20,8 @@ struct sysfs_elem_dir {
20 struct kobject *kobj; 20 struct kobject *kobj;
21 21
22 unsigned long subdirs; 22 unsigned long subdirs;
23 23 /* children rbtree starts here and goes through sd->s_rb */
24 struct rb_root inode_tree; 24 struct rb_root children;
25 struct rb_root name_tree;
26}; 25};
27 26
28struct sysfs_elem_symlink { 27struct sysfs_elem_symlink {
@@ -62,8 +61,7 @@ struct sysfs_dirent {
62 struct sysfs_dirent *s_parent; 61 struct sysfs_dirent *s_parent;
63 const char *s_name; 62 const char *s_name;
64 63
65 struct rb_node inode_node; 64 struct rb_node s_rb;
66 struct rb_node name_node;
67 65
68 union { 66 union {
69 struct completion *completion; 67 struct completion *completion;
@@ -71,6 +69,7 @@ struct sysfs_dirent {
71 } u; 69 } u;
72 70
73 const void *s_ns; /* namespace tag */ 71 const void *s_ns; /* namespace tag */
72 unsigned int s_hash; /* ns + name hash */
74 union { 73 union {
75 struct sysfs_elem_dir s_dir; 74 struct sysfs_elem_dir s_dir;
76 struct sysfs_elem_symlink s_symlink; 75 struct sysfs_elem_symlink s_symlink;
@@ -78,9 +77,9 @@ struct sysfs_dirent {
78 struct sysfs_elem_bin_attr s_bin_attr; 77 struct sysfs_elem_bin_attr s_bin_attr;
79 }; 78 };
80 79
81 unsigned int s_flags; 80 unsigned short s_flags;
82 umode_t s_mode; 81 umode_t s_mode;
83 ino_t s_ino; 82 unsigned int s_ino;
84 struct sysfs_inode_attrs *s_iattr; 83 struct sysfs_inode_attrs *s_iattr;
85}; 84};
86 85
@@ -95,11 +94,11 @@ struct sysfs_dirent {
95#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) 94#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
96 95
97/* identify any namespace tag on sysfs_dirents */ 96/* identify any namespace tag on sysfs_dirents */
98#define SYSFS_NS_TYPE_MASK 0xff00 97#define SYSFS_NS_TYPE_MASK 0xf00
99#define SYSFS_NS_TYPE_SHIFT 8 98#define SYSFS_NS_TYPE_SHIFT 8
100 99
101#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK) 100#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
102#define SYSFS_FLAG_REMOVED 0x020000 101#define SYSFS_FLAG_REMOVED 0x02000
103 102
104static inline unsigned int sysfs_type(struct sysfs_dirent *sd) 103static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
105{ 104{
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b217797e621b..d7466e293614 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -121,9 +121,6 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
121{ 121{
122 struct inode *inode = old_dentry->d_inode; 122 struct inode *inode = old_dentry->d_inode;
123 123
124 if (inode->i_nlink >= SYSV_SB(inode->i_sb)->s_link_max)
125 return -EMLINK;
126
127 inode->i_ctime = CURRENT_TIME_SEC; 124 inode->i_ctime = CURRENT_TIME_SEC;
128 inode_inc_link_count(inode); 125 inode_inc_link_count(inode);
129 ihold(inode); 126 ihold(inode);
@@ -134,10 +131,8 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
134static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode) 131static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
135{ 132{
136 struct inode * inode; 133 struct inode * inode;
137 int err = -EMLINK; 134 int err;
138 135
139 if (dir->i_nlink >= SYSV_SB(dir->i_sb)->s_link_max)
140 goto out;
141 inode_inc_link_count(dir); 136 inode_inc_link_count(dir);
142 137
143 inode = sysv_new_inode(dir, S_IFDIR|mode); 138 inode = sysv_new_inode(dir, S_IFDIR|mode);
@@ -251,11 +246,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
251 drop_nlink(new_inode); 246 drop_nlink(new_inode);
252 inode_dec_link_count(new_inode); 247 inode_dec_link_count(new_inode);
253 } else { 248 } else {
254 if (dir_de) {
255 err = -EMLINK;
256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
257 goto out_dir;
258 }
259 err = sysv_add_link(new_dentry, old_inode); 249 err = sysv_add_link(new_dentry, old_inode);
260 if (err) 250 if (err)
261 goto out_dir; 251 goto out_dir;
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index f60c196913ea..7491c33b6468 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -44,7 +44,7 @@ enum {
44 JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60 44 JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60
45}; 45};
46 46
47static void detected_xenix(struct sysv_sb_info *sbi) 47static void detected_xenix(struct sysv_sb_info *sbi, unsigned *max_links)
48{ 48{
49 struct buffer_head *bh1 = sbi->s_bh1; 49 struct buffer_head *bh1 = sbi->s_bh1;
50 struct buffer_head *bh2 = sbi->s_bh2; 50 struct buffer_head *bh2 = sbi->s_bh2;
@@ -59,7 +59,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
59 sbd2 = (struct xenix_super_block *) (bh2->b_data - 512); 59 sbd2 = (struct xenix_super_block *) (bh2->b_data - 512);
60 } 60 }
61 61
62 sbi->s_link_max = XENIX_LINK_MAX; 62 *max_links = XENIX_LINK_MAX;
63 sbi->s_fic_size = XENIX_NICINOD; 63 sbi->s_fic_size = XENIX_NICINOD;
64 sbi->s_flc_size = XENIX_NICFREE; 64 sbi->s_flc_size = XENIX_NICFREE;
65 sbi->s_sbd1 = (char *)sbd1; 65 sbi->s_sbd1 = (char *)sbd1;
@@ -75,7 +75,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
75 sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize); 75 sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize);
76} 76}
77 77
78static void detected_sysv4(struct sysv_sb_info *sbi) 78static void detected_sysv4(struct sysv_sb_info *sbi, unsigned *max_links)
79{ 79{
80 struct sysv4_super_block * sbd; 80 struct sysv4_super_block * sbd;
81 struct buffer_head *bh1 = sbi->s_bh1; 81 struct buffer_head *bh1 = sbi->s_bh1;
@@ -86,7 +86,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
86 else 86 else
87 sbd = (struct sysv4_super_block *) bh2->b_data; 87 sbd = (struct sysv4_super_block *) bh2->b_data;
88 88
89 sbi->s_link_max = SYSV_LINK_MAX; 89 *max_links = SYSV_LINK_MAX;
90 sbi->s_fic_size = SYSV_NICINOD; 90 sbi->s_fic_size = SYSV_NICINOD;
91 sbi->s_flc_size = SYSV_NICFREE; 91 sbi->s_flc_size = SYSV_NICFREE;
92 sbi->s_sbd1 = (char *)sbd; 92 sbi->s_sbd1 = (char *)sbd;
@@ -103,7 +103,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
103 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 103 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
104} 104}
105 105
106static void detected_sysv2(struct sysv_sb_info *sbi) 106static void detected_sysv2(struct sysv_sb_info *sbi, unsigned *max_links)
107{ 107{
108 struct sysv2_super_block *sbd; 108 struct sysv2_super_block *sbd;
109 struct buffer_head *bh1 = sbi->s_bh1; 109 struct buffer_head *bh1 = sbi->s_bh1;
@@ -114,7 +114,7 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
114 else 114 else
115 sbd = (struct sysv2_super_block *) bh2->b_data; 115 sbd = (struct sysv2_super_block *) bh2->b_data;
116 116
117 sbi->s_link_max = SYSV_LINK_MAX; 117 *max_links = SYSV_LINK_MAX;
118 sbi->s_fic_size = SYSV_NICINOD; 118 sbi->s_fic_size = SYSV_NICINOD;
119 sbi->s_flc_size = SYSV_NICFREE; 119 sbi->s_flc_size = SYSV_NICFREE;
120 sbi->s_sbd1 = (char *)sbd; 120 sbi->s_sbd1 = (char *)sbd;
@@ -131,14 +131,14 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
131 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 131 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
132} 132}
133 133
134static void detected_coherent(struct sysv_sb_info *sbi) 134static void detected_coherent(struct sysv_sb_info *sbi, unsigned *max_links)
135{ 135{
136 struct coh_super_block * sbd; 136 struct coh_super_block * sbd;
137 struct buffer_head *bh1 = sbi->s_bh1; 137 struct buffer_head *bh1 = sbi->s_bh1;
138 138
139 sbd = (struct coh_super_block *) bh1->b_data; 139 sbd = (struct coh_super_block *) bh1->b_data;
140 140
141 sbi->s_link_max = COH_LINK_MAX; 141 *max_links = COH_LINK_MAX;
142 sbi->s_fic_size = COH_NICINOD; 142 sbi->s_fic_size = COH_NICINOD;
143 sbi->s_flc_size = COH_NICFREE; 143 sbi->s_flc_size = COH_NICFREE;
144 sbi->s_sbd1 = (char *)sbd; 144 sbi->s_sbd1 = (char *)sbd;
@@ -154,12 +154,12 @@ static void detected_coherent(struct sysv_sb_info *sbi)
154 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize); 154 sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
155} 155}
156 156
157static void detected_v7(struct sysv_sb_info *sbi) 157static void detected_v7(struct sysv_sb_info *sbi, unsigned *max_links)
158{ 158{
159 struct buffer_head *bh2 = sbi->s_bh2; 159 struct buffer_head *bh2 = sbi->s_bh2;
160 struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data; 160 struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data;
161 161
162 sbi->s_link_max = V7_LINK_MAX; 162 *max_links = V7_LINK_MAX;
163 sbi->s_fic_size = V7_NICINOD; 163 sbi->s_fic_size = V7_NICINOD;
164 sbi->s_flc_size = V7_NICFREE; 164 sbi->s_flc_size = V7_NICFREE;
165 sbi->s_sbd1 = (char *)sbd; 165 sbi->s_sbd1 = (char *)sbd;
@@ -290,7 +290,7 @@ static char *flavour_names[] = {
290 [FSTYPE_AFS] = "AFS", 290 [FSTYPE_AFS] = "AFS",
291}; 291};
292 292
293static void (*flavour_setup[])(struct sysv_sb_info *) = { 293static void (*flavour_setup[])(struct sysv_sb_info *, unsigned *) = {
294 [FSTYPE_XENIX] = detected_xenix, 294 [FSTYPE_XENIX] = detected_xenix,
295 [FSTYPE_SYSV4] = detected_sysv4, 295 [FSTYPE_SYSV4] = detected_sysv4,
296 [FSTYPE_SYSV2] = detected_sysv2, 296 [FSTYPE_SYSV2] = detected_sysv2,
@@ -310,7 +310,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
310 310
311 sbi->s_firstinodezone = 2; 311 sbi->s_firstinodezone = 2;
312 312
313 flavour_setup[sbi->s_type](sbi); 313 flavour_setup[sbi->s_type](sbi, &sb->s_max_links);
314 314
315 sbi->s_truncate = 1; 315 sbi->s_truncate = 1;
316 sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone; 316 sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone;
@@ -341,9 +341,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
341 printk("SysV FS: get root inode failed\n"); 341 printk("SysV FS: get root inode failed\n");
342 return 0; 342 return 0;
343 } 343 }
344 sb->s_root = d_alloc_root(root_inode); 344 sb->s_root = d_make_root(root_inode);
345 if (!sb->s_root) { 345 if (!sb->s_root) {
346 iput(root_inode);
347 printk("SysV FS: get root dentry failed\n"); 346 printk("SysV FS: get root dentry failed\n");
348 return 0; 347 return 0;
349 } 348 }
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 0e4b821c5691..11b07672f6c5 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -24,7 +24,6 @@ struct sysv_sb_info {
24 char s_bytesex; /* bytesex (le/be/pdp) */ 24 char s_bytesex; /* bytesex (le/be/pdp) */
25 char s_truncate; /* if 1: names > SYSV_NAMELEN chars are truncated */ 25 char s_truncate; /* if 1: names > SYSV_NAMELEN chars are truncated */
26 /* if 0: they are disallowed (ENAMETOOLONG) */ 26 /* if 0: they are disallowed (ENAMETOOLONG) */
27 nlink_t s_link_max; /* max number of hard links to a file */
28 unsigned int s_inodes_per_block; /* number of inodes per block */ 27 unsigned int s_inodes_per_block; /* number of inodes per block */
29 unsigned int s_inodes_per_block_1; /* inodes_per_block - 1 */ 28 unsigned int s_inodes_per_block_1; /* inodes_per_block - 1 */
30 unsigned int s_inodes_per_block_bits; /* log2(inodes_per_block) */ 29 unsigned int s_inodes_per_block_bits; /* log2(inodes_per_block) */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f9c234bf33d3..5c8f6dc1d28b 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1042,10 +1042,10 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
1042 * the page size, the remaining memory is zeroed when mapped, and 1042 * the page size, the remaining memory is zeroed when mapped, and
1043 * writes to that region are not written out to the file." 1043 * writes to that region are not written out to the file."
1044 */ 1044 */
1045 kaddr = kmap_atomic(page, KM_USER0); 1045 kaddr = kmap_atomic(page);
1046 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); 1046 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
1047 flush_dcache_page(page); 1047 flush_dcache_page(page);
1048 kunmap_atomic(kaddr, KM_USER0); 1048 kunmap_atomic(kaddr);
1049 1049
1050 if (i_size > synced_i_size) { 1050 if (i_size > synced_i_size) {
1051 err = inode->i_sb->s_op->write_inode(inode, NULL); 1051 err = inode->i_sb->s_op->write_inode(inode, NULL);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 63765d58445b..76e4e0566ad6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2076,15 +2076,13 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
2076 goto out_umount; 2076 goto out_umount;
2077 } 2077 }
2078 2078
2079 sb->s_root = d_alloc_root(root); 2079 sb->s_root = d_make_root(root);
2080 if (!sb->s_root) 2080 if (!sb->s_root)
2081 goto out_iput; 2081 goto out_umount;
2082 2082
2083 mutex_unlock(&c->umount_mutex); 2083 mutex_unlock(&c->umount_mutex);
2084 return 0; 2084 return 0;
2085 2085
2086out_iput:
2087 iput(root);
2088out_umount: 2086out_umount:
2089 ubifs_umount(c); 2087 ubifs_umount(c);
2090out_unlock: 2088out_unlock:
diff --git a/fs/udf/file.c b/fs/udf/file.c
index dca0c3881e82..7f3f7ba3df6e 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -87,10 +87,10 @@ static int udf_adinicb_write_end(struct file *file,
87 char *kaddr; 87 char *kaddr;
88 struct udf_inode_info *iinfo = UDF_I(inode); 88 struct udf_inode_info *iinfo = UDF_I(inode);
89 89
90 kaddr = kmap_atomic(page, KM_USER0); 90 kaddr = kmap_atomic(page);
91 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, 91 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset,
92 kaddr + offset, copied); 92 kaddr + offset, copied);
93 kunmap_atomic(kaddr, KM_USER0); 93 kunmap_atomic(kaddr);
94 94
95 return simple_write_end(file, mapping, pos, len, copied, page, fsdata); 95 return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
96} 96}
@@ -201,12 +201,10 @@ out:
201static int udf_release_file(struct inode *inode, struct file *filp) 201static int udf_release_file(struct inode *inode, struct file *filp)
202{ 202{
203 if (filp->f_mode & FMODE_WRITE) { 203 if (filp->f_mode & FMODE_WRITE) {
204 mutex_lock(&inode->i_mutex);
205 down_write(&UDF_I(inode)->i_data_sem); 204 down_write(&UDF_I(inode)->i_data_sem);
206 udf_discard_prealloc(inode); 205 udf_discard_prealloc(inode);
207 udf_truncate_tail_extent(inode); 206 udf_truncate_tail_extent(inode);
208 up_write(&UDF_I(inode)->i_data_sem); 207 up_write(&UDF_I(inode)->i_data_sem);
209 mutex_unlock(&inode->i_mutex);
210 } 208 }
211 return 0; 209 return 0;
212} 210}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 08bf46edf9c4..38de8f234b94 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,8 +32,6 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
37static inline int udf_match(int len1, const unsigned char *name1, int len2, 35static inline int udf_match(int len1, const unsigned char *name1, int len2,
38 const unsigned char *name2) 36 const unsigned char *name2)
39{ 37{
@@ -649,10 +647,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
649 struct udf_inode_info *dinfo = UDF_I(dir); 647 struct udf_inode_info *dinfo = UDF_I(dir);
650 struct udf_inode_info *iinfo; 648 struct udf_inode_info *iinfo;
651 649
652 err = -EMLINK;
653 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out;
655
656 err = -EIO; 650 err = -EIO;
657 inode = udf_new_inode(dir, S_IFDIR | mode, &err); 651 inode = udf_new_inode(dir, S_IFDIR | mode, &err);
658 if (!inode) 652 if (!inode)
@@ -1032,9 +1026,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1032 struct fileIdentDesc cfi, *fi; 1026 struct fileIdentDesc cfi, *fi;
1033 int err; 1027 int err;
1034 1028
1035 if (inode->i_nlink >= UDF_MAX_LINKS)
1036 return -EMLINK;
1037
1038 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1029 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1039 if (!fi) { 1030 if (!fi) {
1040 return err; 1031 return err;
@@ -1126,10 +1117,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1126 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) != 1117 if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
1127 old_dir->i_ino) 1118 old_dir->i_ino)
1128 goto end_rename; 1119 goto end_rename;
1129
1130 retval = -EMLINK;
1131 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1132 goto end_rename;
1133 } 1120 }
1134 if (!nfi) { 1121 if (!nfi) {
1135 nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, 1122 nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index c09a84daaf50..85067b4c7e14 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,6 +75,8 @@
75 75
76#define UDF_DEFAULT_BLOCKSIZE 2048 76#define UDF_DEFAULT_BLOCKSIZE 2048
77 77
78enum { UDF_MAX_LINKS = 0xffff };
79
78/* These are the "meat" - everything else is stuffing */ 80/* These are the "meat" - everything else is stuffing */
79static int udf_fill_super(struct super_block *, void *, int); 81static int udf_fill_super(struct super_block *, void *, int);
80static void udf_put_super(struct super_block *); 82static void udf_put_super(struct super_block *);
@@ -2035,13 +2037,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2035 } 2037 }
2036 2038
2037 /* Allocate a dentry for the root inode */ 2039 /* Allocate a dentry for the root inode */
2038 sb->s_root = d_alloc_root(inode); 2040 sb->s_root = d_make_root(inode);
2039 if (!sb->s_root) { 2041 if (!sb->s_root) {
2040 udf_err(sb, "Couldn't allocate root dentry\n"); 2042 udf_err(sb, "Couldn't allocate root dentry\n");
2041 iput(inode);
2042 goto error_out; 2043 goto error_out;
2043 } 2044 }
2044 sb->s_maxbytes = MAX_LFS_FILESIZE; 2045 sb->s_maxbytes = MAX_LFS_FILESIZE;
2046 sb->s_max_links = UDF_MAX_LINKS;
2045 return 0; 2047 return 0;
2046 2048
2047error_out: 2049error_out:
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 38cac199edff..a2281cadefa1 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -166,10 +166,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
166 int error; 166 int error;
167 167
168 lock_ufs(dir->i_sb); 168 lock_ufs(dir->i_sb);
169 if (inode->i_nlink >= UFS_LINK_MAX) {
170 unlock_ufs(dir->i_sb);
171 return -EMLINK;
172 }
173 169
174 inode->i_ctime = CURRENT_TIME_SEC; 170 inode->i_ctime = CURRENT_TIME_SEC;
175 inode_inc_link_count(inode); 171 inode_inc_link_count(inode);
@@ -183,10 +179,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
183static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) 179static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
184{ 180{
185 struct inode * inode; 181 struct inode * inode;
186 int err = -EMLINK; 182 int err;
187
188 if (dir->i_nlink >= UFS_LINK_MAX)
189 goto out;
190 183
191 lock_ufs(dir->i_sb); 184 lock_ufs(dir->i_sb);
192 inode_inc_link_count(dir); 185 inode_inc_link_count(dir);
@@ -305,11 +298,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
305 drop_nlink(new_inode); 298 drop_nlink(new_inode);
306 inode_dec_link_count(new_inode); 299 inode_dec_link_count(new_inode);
307 } else { 300 } else {
308 if (dir_de) {
309 err = -EMLINK;
310 if (new_dir->i_nlink >= UFS_LINK_MAX)
311 goto out_dir;
312 }
313 err = ufs_add_link(new_dentry, old_inode); 301 err = ufs_add_link(new_dentry, old_inode);
314 if (err) 302 if (err)
315 goto out_dir; 303 goto out_dir;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 5246ee3e5607..f636f6b460d0 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1157,16 +1157,17 @@ magic_found:
1157 "fast symlink size (%u)\n", uspi->s_maxsymlinklen); 1157 "fast symlink size (%u)\n", uspi->s_maxsymlinklen);
1158 uspi->s_maxsymlinklen = maxsymlen; 1158 uspi->s_maxsymlinklen = maxsymlen;
1159 } 1159 }
1160 sb->s_max_links = UFS_LINK_MAX;
1160 1161
1161 inode = ufs_iget(sb, UFS_ROOTINO); 1162 inode = ufs_iget(sb, UFS_ROOTINO);
1162 if (IS_ERR(inode)) { 1163 if (IS_ERR(inode)) {
1163 ret = PTR_ERR(inode); 1164 ret = PTR_ERR(inode);
1164 goto failed; 1165 goto failed;
1165 } 1166 }
1166 sb->s_root = d_alloc_root(inode); 1167 sb->s_root = d_make_root(inode);
1167 if (!sb->s_root) { 1168 if (!sb->s_root) {
1168 ret = -ENOMEM; 1169 ret = -ENOMEM;
1169 goto dalloc_failed; 1170 goto failed;
1170 } 1171 }
1171 1172
1172 ufs_setup_cstotal(sb); 1173 ufs_setup_cstotal(sb);
@@ -1180,8 +1181,6 @@ magic_found:
1180 UFSD("EXIT\n"); 1181 UFSD("EXIT\n");
1181 return 0; 1182 return 0;
1182 1183
1183dalloc_failed:
1184 iput(inode);
1185failed: 1184failed:
1186 if (ubh) 1185 if (ubh)
1187 ubh_brelse_uspi (uspi); 1186 ubh_brelse_uspi (uspi);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index cbcb7bea38e2..53db20ee3e77 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -139,10 +139,10 @@ xfs_qm_adjust_dqtimers(
139 139
140 if (!d->d_btimer) { 140 if (!d->d_btimer) {
141 if ((d->d_blk_softlimit && 141 if ((d->d_blk_softlimit &&
142 (be64_to_cpu(d->d_bcount) >= 142 (be64_to_cpu(d->d_bcount) >
143 be64_to_cpu(d->d_blk_softlimit))) || 143 be64_to_cpu(d->d_blk_softlimit))) ||
144 (d->d_blk_hardlimit && 144 (d->d_blk_hardlimit &&
145 (be64_to_cpu(d->d_bcount) >= 145 (be64_to_cpu(d->d_bcount) >
146 be64_to_cpu(d->d_blk_hardlimit)))) { 146 be64_to_cpu(d->d_blk_hardlimit)))) {
147 d->d_btimer = cpu_to_be32(get_seconds() + 147 d->d_btimer = cpu_to_be32(get_seconds() +
148 mp->m_quotainfo->qi_btimelimit); 148 mp->m_quotainfo->qi_btimelimit);
@@ -151,10 +151,10 @@ xfs_qm_adjust_dqtimers(
151 } 151 }
152 } else { 152 } else {
153 if ((!d->d_blk_softlimit || 153 if ((!d->d_blk_softlimit ||
154 (be64_to_cpu(d->d_bcount) < 154 (be64_to_cpu(d->d_bcount) <=
155 be64_to_cpu(d->d_blk_softlimit))) && 155 be64_to_cpu(d->d_blk_softlimit))) &&
156 (!d->d_blk_hardlimit || 156 (!d->d_blk_hardlimit ||
157 (be64_to_cpu(d->d_bcount) < 157 (be64_to_cpu(d->d_bcount) <=
158 be64_to_cpu(d->d_blk_hardlimit)))) { 158 be64_to_cpu(d->d_blk_hardlimit)))) {
159 d->d_btimer = 0; 159 d->d_btimer = 0;
160 } 160 }
@@ -162,10 +162,10 @@ xfs_qm_adjust_dqtimers(
162 162
163 if (!d->d_itimer) { 163 if (!d->d_itimer) {
164 if ((d->d_ino_softlimit && 164 if ((d->d_ino_softlimit &&
165 (be64_to_cpu(d->d_icount) >= 165 (be64_to_cpu(d->d_icount) >
166 be64_to_cpu(d->d_ino_softlimit))) || 166 be64_to_cpu(d->d_ino_softlimit))) ||
167 (d->d_ino_hardlimit && 167 (d->d_ino_hardlimit &&
168 (be64_to_cpu(d->d_icount) >= 168 (be64_to_cpu(d->d_icount) >
169 be64_to_cpu(d->d_ino_hardlimit)))) { 169 be64_to_cpu(d->d_ino_hardlimit)))) {
170 d->d_itimer = cpu_to_be32(get_seconds() + 170 d->d_itimer = cpu_to_be32(get_seconds() +
171 mp->m_quotainfo->qi_itimelimit); 171 mp->m_quotainfo->qi_itimelimit);
@@ -174,10 +174,10 @@ xfs_qm_adjust_dqtimers(
174 } 174 }
175 } else { 175 } else {
176 if ((!d->d_ino_softlimit || 176 if ((!d->d_ino_softlimit ||
177 (be64_to_cpu(d->d_icount) < 177 (be64_to_cpu(d->d_icount) <=
178 be64_to_cpu(d->d_ino_softlimit))) && 178 be64_to_cpu(d->d_ino_softlimit))) &&
179 (!d->d_ino_hardlimit || 179 (!d->d_ino_hardlimit ||
180 (be64_to_cpu(d->d_icount) < 180 (be64_to_cpu(d->d_icount) <=
181 be64_to_cpu(d->d_ino_hardlimit)))) { 181 be64_to_cpu(d->d_ino_hardlimit)))) {
182 d->d_itimer = 0; 182 d->d_itimer = 0;
183 } 183 }
@@ -185,10 +185,10 @@ xfs_qm_adjust_dqtimers(
185 185
186 if (!d->d_rtbtimer) { 186 if (!d->d_rtbtimer) {
187 if ((d->d_rtb_softlimit && 187 if ((d->d_rtb_softlimit &&
188 (be64_to_cpu(d->d_rtbcount) >= 188 (be64_to_cpu(d->d_rtbcount) >
189 be64_to_cpu(d->d_rtb_softlimit))) || 189 be64_to_cpu(d->d_rtb_softlimit))) ||
190 (d->d_rtb_hardlimit && 190 (d->d_rtb_hardlimit &&
191 (be64_to_cpu(d->d_rtbcount) >= 191 (be64_to_cpu(d->d_rtbcount) >
192 be64_to_cpu(d->d_rtb_hardlimit)))) { 192 be64_to_cpu(d->d_rtb_hardlimit)))) {
193 d->d_rtbtimer = cpu_to_be32(get_seconds() + 193 d->d_rtbtimer = cpu_to_be32(get_seconds() +
194 mp->m_quotainfo->qi_rtbtimelimit); 194 mp->m_quotainfo->qi_rtbtimelimit);
@@ -197,10 +197,10 @@ xfs_qm_adjust_dqtimers(
197 } 197 }
198 } else { 198 } else {
199 if ((!d->d_rtb_softlimit || 199 if ((!d->d_rtb_softlimit ||
200 (be64_to_cpu(d->d_rtbcount) < 200 (be64_to_cpu(d->d_rtbcount) <=
201 be64_to_cpu(d->d_rtb_softlimit))) && 201 be64_to_cpu(d->d_rtb_softlimit))) &&
202 (!d->d_rtb_hardlimit || 202 (!d->d_rtb_hardlimit ||
203 (be64_to_cpu(d->d_rtbcount) < 203 (be64_to_cpu(d->d_rtbcount) <=
204 be64_to_cpu(d->d_rtb_hardlimit)))) { 204 be64_to_cpu(d->d_rtb_hardlimit)))) {
205 d->d_rtbtimer = 0; 205 d->d_rtbtimer = 0;
206 } 206 }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 15ff5392fb65..0ed9ee77937c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1981,7 +1981,7 @@ xfs_qm_dqcheck(
1981 1981
1982 if (!errs && ddq->d_id) { 1982 if (!errs && ddq->d_id) {
1983 if (ddq->d_blk_softlimit && 1983 if (ddq->d_blk_softlimit &&
1984 be64_to_cpu(ddq->d_bcount) >= 1984 be64_to_cpu(ddq->d_bcount) >
1985 be64_to_cpu(ddq->d_blk_softlimit)) { 1985 be64_to_cpu(ddq->d_blk_softlimit)) {
1986 if (!ddq->d_btimer) { 1986 if (!ddq->d_btimer) {
1987 if (flags & XFS_QMOPT_DOWARN) 1987 if (flags & XFS_QMOPT_DOWARN)
@@ -1992,7 +1992,7 @@ xfs_qm_dqcheck(
1992 } 1992 }
1993 } 1993 }
1994 if (ddq->d_ino_softlimit && 1994 if (ddq->d_ino_softlimit &&
1995 be64_to_cpu(ddq->d_icount) >= 1995 be64_to_cpu(ddq->d_icount) >
1996 be64_to_cpu(ddq->d_ino_softlimit)) { 1996 be64_to_cpu(ddq->d_ino_softlimit)) {
1997 if (!ddq->d_itimer) { 1997 if (!ddq->d_itimer) {
1998 if (flags & XFS_QMOPT_DOWARN) 1998 if (flags & XFS_QMOPT_DOWARN)
@@ -2003,7 +2003,7 @@ xfs_qm_dqcheck(
2003 } 2003 }
2004 } 2004 }
2005 if (ddq->d_rtb_softlimit && 2005 if (ddq->d_rtb_softlimit &&
2006 be64_to_cpu(ddq->d_rtbcount) >= 2006 be64_to_cpu(ddq->d_rtbcount) >
2007 be64_to_cpu(ddq->d_rtb_softlimit)) { 2007 be64_to_cpu(ddq->d_rtb_softlimit)) {
2008 if (!ddq->d_rtbtimer) { 2008 if (!ddq->d_rtbtimer) {
2009 if (flags & XFS_QMOPT_DOWARN) 2009 if (flags & XFS_QMOPT_DOWARN)
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index eafbcff81f3a..711a86e39ff0 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -813,11 +813,11 @@ xfs_qm_export_dquot(
813 (XFS_IS_OQUOTA_ENFORCED(mp) && 813 (XFS_IS_OQUOTA_ENFORCED(mp) &&
814 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && 814 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
815 dst->d_id != 0) { 815 dst->d_id != 0) {
816 if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && 816 if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) &&
817 (dst->d_blk_softlimit > 0)) { 817 (dst->d_blk_softlimit > 0)) {
818 ASSERT(dst->d_btimer != 0); 818 ASSERT(dst->d_btimer != 0);
819 } 819 }
820 if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && 820 if (((int) dst->d_icount > (int) dst->d_ino_softlimit) &&
821 (dst->d_ino_softlimit > 0)) { 821 (dst->d_ino_softlimit > 0)) {
822 ASSERT(dst->d_itimer != 0); 822 ASSERT(dst->d_itimer != 0);
823 } 823 }
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 866de277079a..e44ef7ee8ce8 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -118,17 +118,6 @@ xfs_rename(
118 new_parent = (src_dp != target_dp); 118 new_parent = (src_dp != target_dp);
119 src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 119 src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
120 120
121 if (src_is_directory) {
122 /*
123 * Check for link count overflow on target_dp
124 */
125 if (target_ip == NULL && new_parent &&
126 target_dp->i_d.di_nlink >= XFS_MAXLINK) {
127 error = XFS_ERROR(EMLINK);
128 goto std_return;
129 }
130 }
131
132 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 121 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
133 inodes, &num_inodes); 122 inodes, &num_inodes);
134 123
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ee5b695c99a7..baf40e378d35 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1341,6 +1341,7 @@ xfs_fs_fill_super(
1341 sb->s_blocksize = mp->m_sb.sb_blocksize; 1341 sb->s_blocksize = mp->m_sb.sb_blocksize;
1342 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1342 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1343 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); 1343 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1344 sb->s_max_links = XFS_MAXLINK;
1344 sb->s_time_gran = 1; 1345 sb->s_time_gran = 1;
1345 set_posix_acl_flag(sb); 1346 set_posix_acl_flag(sb);
1346 1347
@@ -1361,10 +1362,10 @@ xfs_fs_fill_super(
1361 error = EINVAL; 1362 error = EINVAL;
1362 goto out_syncd_stop; 1363 goto out_syncd_stop;
1363 } 1364 }
1364 sb->s_root = d_alloc_root(root); 1365 sb->s_root = d_make_root(root);
1365 if (!sb->s_root) { 1366 if (!sb->s_root) {
1366 error = ENOMEM; 1367 error = ENOMEM;
1367 goto out_iput; 1368 goto out_syncd_stop;
1368 } 1369 }
1369 1370
1370 return 0; 1371 return 0;
@@ -1383,8 +1384,6 @@ xfs_fs_fill_super(
1383 out: 1384 out:
1384 return -error; 1385 return -error;
1385 1386
1386 out_iput:
1387 iput(root);
1388 out_syncd_stop: 1387 out_syncd_stop:
1389 xfs_syncd_stop(mp); 1388 xfs_syncd_stop(mp);
1390 out_unmount: 1389 out_unmount:
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 329b06aba1c2..7adcdf15ae0c 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1151,8 +1151,8 @@ xfs_trans_add_item(
1151{ 1151{
1152 struct xfs_log_item_desc *lidp; 1152 struct xfs_log_item_desc *lidp;
1153 1153
1154 ASSERT(lip->li_mountp = tp->t_mountp); 1154 ASSERT(lip->li_mountp == tp->t_mountp);
1155 ASSERT(lip->li_ailp = tp->t_mountp->m_ail); 1155 ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
1156 1156
1157 lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS); 1157 lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
1158 1158
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 4d00ee67792d..c4ba366d24e6 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -649,12 +649,12 @@ xfs_trans_dqresv(
649 * nblks. 649 * nblks.
650 */ 650 */
651 if (hardlimit > 0ULL && 651 if (hardlimit > 0ULL &&
652 hardlimit <= nblks + *resbcountp) { 652 hardlimit < nblks + *resbcountp) {
653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); 653 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
654 goto error_return; 654 goto error_return;
655 } 655 }
656 if (softlimit > 0ULL && 656 if (softlimit > 0ULL &&
657 softlimit <= nblks + *resbcountp) { 657 softlimit < nblks + *resbcountp) {
658 if ((timer != 0 && get_seconds() > timer) || 658 if ((timer != 0 && get_seconds() > timer) ||
659 (warns != 0 && warns >= warnlimit)) { 659 (warns != 0 && warns >= warnlimit)) {
660 xfs_quota_warn(mp, dqp, 660 xfs_quota_warn(mp, dqp,
@@ -677,11 +677,13 @@ xfs_trans_dqresv(
677 if (!softlimit) 677 if (!softlimit)
678 softlimit = q->qi_isoftlimit; 678 softlimit = q->qi_isoftlimit;
679 679
680 if (hardlimit > 0ULL && count >= hardlimit) { 680 if (hardlimit > 0ULL &&
681 hardlimit < ninos + count) {
681 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); 682 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
682 goto error_return; 683 goto error_return;
683 } 684 }
684 if (softlimit > 0ULL && count >= softlimit) { 685 if (softlimit > 0ULL &&
686 softlimit < ninos + count) {
685 if ((timer != 0 && get_seconds() > timer) || 687 if ((timer != 0 && get_seconds() > timer) ||
686 (warns != 0 && warns >= warnlimit)) { 688 (warns != 0 && warns >= warnlimit)) {
687 xfs_quota_warn(mp, dqp, 689 xfs_quota_warn(mp, dqp,
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 89dbb4a50872..79c05ac85bfe 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -296,8 +296,6 @@ xfs_bumplink(
296 xfs_trans_t *tp, 296 xfs_trans_t *tp,
297 xfs_inode_t *ip) 297 xfs_inode_t *ip)
298{ 298{
299 if (ip->i_d.di_nlink >= XFS_MAXLINK)
300 return XFS_ERROR(EMLINK);
301 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 299 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
302 300
303 ASSERT(ip->i_d.di_nlink > 0); 301 ASSERT(ip->i_d.di_nlink > 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ebdb88840a47..64981d7e7375 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -917,14 +917,6 @@ xfs_create(
917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
918 unlock_dp_on_error = B_TRUE; 918 unlock_dp_on_error = B_TRUE;
919 919
920 /*
921 * Check for directory link count overflow.
922 */
923 if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) {
924 error = XFS_ERROR(EMLINK);
925 goto out_trans_cancel;
926 }
927
928 xfs_bmap_init(&free_list, &first_block); 920 xfs_bmap_init(&free_list, &first_block);
929 921
930 /* 922 /*
@@ -1429,14 +1421,6 @@ xfs_link(
1429 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1421 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1430 1422
1431 /* 1423 /*
1432 * If the source has too many links, we can't make any more to it.
1433 */
1434 if (sip->i_d.di_nlink >= XFS_MAXLINK) {
1435 error = XFS_ERROR(EMLINK);
1436 goto error_return;
1437 }
1438
1439 /*
1440 * If we are using project inheritance, we only allow hard link 1424 * If we are using project inheritance, we only allow hard link
1441 * creation in our tree when the project IDs are the same; else 1425 * creation in our tree when the project IDs are the same; else
1442 * the tree quota mechanism could be circumvented. 1426 * the tree quota mechanism could be circumvented.