aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2008-09-01 06:32:13 -0400
committerDavid Woodhouse <David.Woodhouse@intel.com>2008-09-01 06:32:13 -0400
commit9d7548d4ca3c52ecb58f098a32b0756cdf8f96ee (patch)
tree651f7058bbaa2d8b2855286380d614afcf505118 /fs
parent31db6e9ea1dbdcf66b8227b4f7035dee1b1dd8c0 (diff)
parentbef69ea0dcce574a425feb0a5aa4c63dd108b9a6 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/Kconfig21
-rw-r--r--fs/adfs/dir.c1
-rw-r--r--fs/affs/dir.c1
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/binfmt_flat.c4
-rw-r--r--fs/binfmt_misc.c4
-rw-r--r--fs/bio.c48
-rw-r--r--fs/buffer.c13
-rw-r--r--fs/cifs/CHANGES7
-rw-r--r--fs/cifs/README30
-rw-r--r--fs/cifs/asn1.c11
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifs_spnego.h2
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsglob.h3
-rw-r--r--fs/cifs/connect.c33
-rw-r--r--fs/cifs/dns_resolve.c7
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/compat.c8
-rw-r--r--fs/configfs/dir.c17
-rw-r--r--fs/cramfs/inode.c84
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/dlm/config.c203
-rw-r--r--fs/dlm/user.c10
-rw-r--r--fs/efs/namei.c3
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/dir.c20
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/ext4_jbd2.h8
-rw-r--r--fs/ext4/extents.c113
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c478
-rw-r--r--fs/ext4/mballoc.c53
-rw-r--r--fs/ext4/migrate.c3
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/inode.c1
-rw-r--r--fs/ioprio.c8
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/jffs2_fs_i.h1
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/ntfs/namei.c89
-rw-r--r--fs/ocfs2/cluster/netdebug.c26
-rw-r--r--fs/ocfs2/cluster/tcp.c44
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h32
-rw-r--r--fs/ocfs2/dir.c11
-rw-r--r--fs/ocfs2/journal.c23
-rw-r--r--fs/ocfs2/stackglue.c7
-rw-r--r--fs/omfs/bitmap.c5
-rw-r--r--fs/omfs/file.c33
-rw-r--r--fs/omfs/inode.c3
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/nommu.c4
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/readdir.c8
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/seq_file.c25
-rw-r--r--fs/ubifs/budget.c33
-rw-r--r--fs/ubifs/commit.c3
-rw-r--r--fs/ubifs/debug.c27
-rw-r--r--fs/ubifs/debug.h143
-rw-r--r--fs/ubifs/dir.c24
-rw-r--r--fs/ubifs/file.c8
-rw-r--r--fs/ubifs/find.c9
-rw-r--r--fs/ubifs/io.c14
-rw-r--r--fs/ubifs/journal.c110
-rw-r--r--fs/ubifs/log.c4
-rw-r--r--fs/ubifs/misc.h16
-rw-r--r--fs/ubifs/orphan.c4
-rw-r--r--fs/ubifs/super.c48
-rw-r--r--fs/ubifs/tnc_commit.c37
-rw-r--r--fs/ubifs/ubifs-media.h4
-rw-r--r--fs/ubifs/ubifs.h33
-rw-r--r--fs/ubifs/xattr.c54
-rw-r--r--fs/xfs/linux-2.6/sema.h52
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c192
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c189
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h65
-rw-r--r--fs/xfs/quota/xfs_dquot.c38
-rw-r--r--fs/xfs/quota/xfs_dquot.h29
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c8
-rw-r--r--fs/xfs/quota/xfs_qm.c14
-rw-r--r--fs/xfs/quota/xfs_qm.h2
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/xfs_acl.c52
-rw-r--r--fs/xfs/xfs_acl.h14
-rw-r--r--fs/xfs/xfs_arch.h68
-rw-r--r--fs/xfs/xfs_attr.c110
-rw-r--r--fs/xfs/xfs_attr.h1
-rw-r--r--fs/xfs/xfs_attr_leaf.c75
-rw-r--r--fs/xfs/xfs_attr_leaf.h2
-rw-r--r--fs/xfs/xfs_bit.c103
-rw-r--r--fs/xfs/xfs_bit.h34
-rw-r--r--fs/xfs/xfs_bmap.c34
-rw-r--r--fs/xfs/xfs_btree.c105
-rw-r--r--fs/xfs/xfs_btree.h8
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_dfrag.c33
-rw-r--r--fs/xfs/xfs_dmapi.h1
-rw-r--r--fs/xfs/xfs_error.c5
-rw-r--r--fs/xfs/xfs_error.h12
-rw-r--r--fs/xfs/xfs_filestream.c2
-rw-r--r--fs/xfs/xfs_ialloc_btree.c30
-rw-r--r--fs/xfs/xfs_iget.c48
-rw-r--r--fs/xfs/xfs_inode.c70
-rw-r--r--fs/xfs/xfs_inode.h46
-rw-r--r--fs/xfs/xfs_inode_item.c11
-rw-r--r--fs/xfs/xfs_itable.c4
-rw-r--r--fs/xfs/xfs_log.c86
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_priv.h14
-rw-r--r--fs/xfs/xfs_log_recover.c7
-rw-r--r--fs/xfs/xfs_mount.c82
-rw-r--r--fs/xfs/xfs_mount.h17
-rw-r--r--fs/xfs/xfs_rtalloc.c19
-rw-r--r--fs/xfs/xfs_rw.c2
-rw-r--r--fs/xfs/xfs_trans.c75
-rw-r--r--fs/xfs/xfs_trans.h12
-rw-r--r--fs/xfs/xfs_trans_buf.c12
-rw-r--r--fs/xfs/xfs_trans_item.c66
-rw-r--r--fs/xfs/xfs_utils.c4
-rw-r--r--fs/xfs/xfs_utils.h3
-rw-r--r--fs/xfs/xfs_vfsops.c13
-rw-r--r--fs/xfs/xfs_vnodeops.c198
146 files changed, 2103 insertions, 2240 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 88e3787c6ea9..e298fe194093 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
119 119
120const struct file_operations v9fs_dir_operations = { 120const struct file_operations v9fs_dir_operations = {
121 .read = generic_read_dir, 121 .read = generic_read_dir,
122 .llseek = generic_file_llseek,
122 .readdir = v9fs_dir_readdir, 123 .readdir = v9fs_dir_readdir,
123 .open = v9fs_file_open, 124 .open = v9fs_file_open,
124 .release = v9fs_dir_release, 125 .release = v9fs_dir_release,
diff --git a/fs/Kconfig b/fs/Kconfig
index 5831f9c38841..b7c88e1f0161 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1742,6 +1742,16 @@ config CIFS_WEAK_PW_HASH
1742 1742
1743 If unsure, say N. 1743 If unsure, say N.
1744 1744
1745config CIFS_UPCALL
1746 bool "Kerberos/SPNEGO advanced session setup"
1747 depends on CIFS && KEYS
1748 help
1749 Enables an upcall mechanism for CIFS which accesses
1750 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
1751 Kerberos tickets which are needed to mount to certain secure servers
1752 (for which more secure Kerberos authentication is required). If
1753 unsure, say N.
1754
1745config CIFS_XATTR 1755config CIFS_XATTR
1746 bool "CIFS extended attributes" 1756 bool "CIFS extended attributes"
1747 depends on CIFS 1757 depends on CIFS
@@ -1794,17 +1804,6 @@ config CIFS_EXPERIMENTAL
1794 (which is disabled by default). See the file fs/cifs/README 1804 (which is disabled by default). See the file fs/cifs/README
1795 for more details. If unsure, say N. 1805 for more details. If unsure, say N.
1796 1806
1797config CIFS_UPCALL
1798 bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
1799 depends on CIFS_EXPERIMENTAL
1800 depends on KEYS
1801 help
1802 Enables an upcall mechanism for CIFS which accesses
1803 userspace helper utilities to provide SPNEGO packaged (RFC 4178)
1804 Kerberos tickets which are needed to mount to certain secure servers
1805 (for which more secure Kerberos authentication is required). If
1806 unsure, say N.
1807
1808config CIFS_DFS_UPCALL 1807config CIFS_DFS_UPCALL
1809 bool "DFS feature support (EXPERIMENTAL)" 1808 bool "DFS feature support (EXPERIMENTAL)"
1810 depends on CIFS_EXPERIMENTAL 1809 depends on CIFS_EXPERIMENTAL
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index fc1a8dc64d78..85a30e929800 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -197,6 +197,7 @@ out:
197 197
198const struct file_operations adfs_dir_operations = { 198const struct file_operations adfs_dir_operations = {
199 .read = generic_read_dir, 199 .read = generic_read_dir,
200 .llseek = generic_file_llseek,
200 .readdir = adfs_readdir, 201 .readdir = adfs_readdir,
201 .fsync = file_fsync, 202 .fsync = file_fsync,
202}; 203};
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 6e3f282424b0..7b36904dbeac 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t);
19 19
20const struct file_operations affs_dir_operations = { 20const struct file_operations affs_dir_operations = {
21 .read = generic_read_dir, 21 .read = generic_read_dir,
22 .llseek = generic_file_llseek,
22 .readdir = affs_readdir, 23 .readdir = affs_readdir,
23 .fsync = file_fsync, 24 .fsync = file_fsync,
24}; 25};
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index bcfb2dc0a61b..2a41c2a7fc52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = {
36 .release = dcache_dir_close, 36 .release = dcache_dir_close,
37 .read = generic_read_dir, 37 .read = generic_read_dir,
38 .readdir = dcache_readdir, 38 .readdir = dcache_readdir,
39 .llseek = dcache_dir_lseek,
39 .ioctl = autofs4_root_ioctl, 40 .ioctl = autofs4_root_ioctl,
40}; 41};
41 42
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = {
44 .release = dcache_dir_close, 45 .release = dcache_dir_close,
45 .read = generic_read_dir, 46 .read = generic_read_dir,
46 .readdir = dcache_readdir, 47 .readdir = dcache_readdir,
48 .llseek = dcache_dir_lseek,
47}; 49};
48 50
49const struct inode_operations autofs4_indirect_root_inode_operations = { 51const struct inode_operations autofs4_indirect_root_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 02c6e62b72f8..740f53672a8a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep;
66static const struct file_operations befs_dir_operations = { 66static const struct file_operations befs_dir_operations = {
67 .read = generic_read_dir, 67 .read = generic_read_dir,
68 .readdir = befs_readdir, 68 .readdir = befs_readdir,
69 .llseek = generic_file_llseek,
69}; 70};
70 71
71static const struct inode_operations befs_dir_inode_operations = { 72static const struct inode_operations befs_dir_inode_operations = {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 56372ecf1690..dfc0197905ca 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
914 /* Stash our initial stack pointer into the mm structure */ 914 /* Stash our initial stack pointer into the mm structure */
915 current->mm->start_stack = (unsigned long )sp; 915 current->mm->start_stack = (unsigned long )sp;
916 916
917 917#ifdef FLAT_PLAT_INIT
918 FLAT_PLAT_INIT(regs);
919#endif
918 DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", 920 DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n",
919 (int)regs, (int)start_addr, (int)current->mm->start_stack); 921 (int)regs, (int)start_addr, (int)current->mm->start_stack);
920 922
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 756205314c24..8d7e88e02e0f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
120 if (bprm->misc_bang) 120 if (bprm->misc_bang)
121 goto _ret; 121 goto _ret;
122 122
123 bprm->misc_bang = 1;
124
125 /* to keep locking time low, we copy the interpreter string */ 123 /* to keep locking time low, we copy the interpreter string */
126 read_lock(&entries_lock); 124 read_lock(&entries_lock);
127 fmt = check_file(bprm); 125 fmt = check_file(bprm);
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
199 if (retval < 0) 197 if (retval < 0)
200 goto _error; 198 goto _error;
201 199
200 bprm->misc_bang = 1;
201
202 retval = search_binary_handler (bprm, regs); 202 retval = search_binary_handler (bprm, regs);
203 if (retval < 0) 203 if (retval < 0)
204 goto _error; 204 goto _error;
diff --git a/fs/bio.c b/fs/bio.c
index 8000e2fa16cb..3cba7ae34d75 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -469,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd)
469 kfree(bmd); 469 kfree(bmd);
470} 470}
471 471
472static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) 472static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
473 gfp_t gfp_mask)
473{ 474{
474 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); 475 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
475 476
476 if (!bmd) 477 if (!bmd)
477 return NULL; 478 return NULL;
478 479
479 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); 480 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
480 if (!bmd->iovecs) { 481 if (!bmd->iovecs) {
481 kfree(bmd); 482 kfree(bmd);
482 return NULL; 483 return NULL;
483 } 484 }
484 485
485 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); 486 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
486 if (bmd->sgvecs) 487 if (bmd->sgvecs)
487 return bmd; 488 return bmd;
488 489
@@ -491,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
491 return NULL; 492 return NULL;
492} 493}
493 494
494static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, 495static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
495 int uncopy) 496 struct sg_iovec *iov, int iov_count, int uncopy)
496{ 497{
497 int ret = 0, i; 498 int ret = 0, i;
498 struct bio_vec *bvec; 499 struct bio_vec *bvec;
@@ -502,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
502 503
503 __bio_for_each_segment(bvec, bio, i, 0) { 504 __bio_for_each_segment(bvec, bio, i, 0) {
504 char *bv_addr = page_address(bvec->bv_page); 505 char *bv_addr = page_address(bvec->bv_page);
505 unsigned int bv_len = bvec->bv_len; 506 unsigned int bv_len = iovecs[i].bv_len;
506 507
507 while (bv_len && iov_idx < iov_count) { 508 while (bv_len && iov_idx < iov_count) {
508 unsigned int bytes; 509 unsigned int bytes;
@@ -554,7 +555,7 @@ int bio_uncopy_user(struct bio *bio)
554 struct bio_map_data *bmd = bio->bi_private; 555 struct bio_map_data *bmd = bio->bi_private;
555 int ret; 556 int ret;
556 557
557 ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); 558 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
558 559
559 bio_free_map_data(bmd); 560 bio_free_map_data(bmd);
560 bio_put(bio); 561 bio_put(bio);
@@ -596,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
596 len += iov[i].iov_len; 597 len += iov[i].iov_len;
597 } 598 }
598 599
599 bmd = bio_alloc_map_data(nr_pages, iov_count); 600 bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
600 if (!bmd) 601 if (!bmd)
601 return ERR_PTR(-ENOMEM); 602 return ERR_PTR(-ENOMEM);
602 603
@@ -633,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
633 * success 634 * success
634 */ 635 */
635 if (!write_to_vm) { 636 if (!write_to_vm) {
636 ret = __bio_copy_iov(bio, iov, iov_count, 0); 637 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
637 if (ret) 638 if (ret)
638 goto cleanup; 639 goto cleanup;
639 } 640 }
@@ -942,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
942{ 943{
943 struct bio_vec *bvec; 944 struct bio_vec *bvec;
944 const int read = bio_data_dir(bio) == READ; 945 const int read = bio_data_dir(bio) == READ;
945 char *p = bio->bi_private; 946 struct bio_map_data *bmd = bio->bi_private;
946 int i; 947 int i;
948 char *p = bmd->sgvecs[0].iov_base;
947 949
948 __bio_for_each_segment(bvec, bio, i, 0) { 950 __bio_for_each_segment(bvec, bio, i, 0) {
949 char *addr = page_address(bvec->bv_page); 951 char *addr = page_address(bvec->bv_page);
952 int len = bmd->iovecs[i].bv_len;
950 953
951 if (read && !err) 954 if (read && !err)
952 memcpy(p, addr, bvec->bv_len); 955 memcpy(p, addr, len);
953 956
954 __free_page(bvec->bv_page); 957 __free_page(bvec->bv_page);
955 p += bvec->bv_len; 958 p += len;
956 } 959 }
957 960
961 bio_free_map_data(bmd);
958 bio_put(bio); 962 bio_put(bio);
959} 963}
960 964
@@ -978,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
978 const int nr_pages = end - start; 982 const int nr_pages = end - start;
979 struct bio *bio; 983 struct bio *bio;
980 struct bio_vec *bvec; 984 struct bio_vec *bvec;
985 struct bio_map_data *bmd;
981 int i, ret; 986 int i, ret;
987 struct sg_iovec iov;
988
989 iov.iov_base = data;
990 iov.iov_len = len;
991
992 bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
993 if (!bmd)
994 return ERR_PTR(-ENOMEM);
982 995
996 ret = -ENOMEM;
983 bio = bio_alloc(gfp_mask, nr_pages); 997 bio = bio_alloc(gfp_mask, nr_pages);
984 if (!bio) 998 if (!bio)
985 return ERR_PTR(-ENOMEM); 999 goto out_bmd;
986 1000
987 while (len) { 1001 while (len) {
988 struct page *page; 1002 struct page *page;
@@ -1016,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1016 } 1030 }
1017 } 1031 }
1018 1032
1019 bio->bi_private = data; 1033 bio->bi_private = bmd;
1020 bio->bi_end_io = bio_copy_kern_endio; 1034 bio->bi_end_io = bio_copy_kern_endio;
1035
1036 bio_set_map_data(bmd, bio, &iov, 1);
1021 return bio; 1037 return bio;
1022cleanup: 1038cleanup:
1023 bio_for_each_segment(bvec, bio, i) 1039 bio_for_each_segment(bvec, bio, i)
1024 __free_page(bvec->bv_page); 1040 __free_page(bvec->bv_page);
1025 1041
1026 bio_put(bio); 1042 bio_put(bio);
1043out_bmd:
1044 bio_free_map_data(bmd);
1027 1045
1028 return ERR_PTR(ret); 1046 return ERR_PTR(ret);
1029} 1047}
diff --git a/fs/buffer.c b/fs/buffer.c
index 38653e36e225..ac78d4c19b3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh)
2926 BUG_ON(!buffer_mapped(bh)); 2926 BUG_ON(!buffer_mapped(bh));
2927 BUG_ON(!bh->b_end_io); 2927 BUG_ON(!bh->b_end_io);
2928 2928
2929 if (buffer_ordered(bh) && (rw == WRITE)) 2929 /*
2930 rw = WRITE_BARRIER; 2930 * Mask in barrier bit for a write (could be either a WRITE or a
2931 * WRITE_SYNC
2932 */
2933 if (buffer_ordered(bh) && (rw & WRITE))
2934 rw |= WRITE_BARRIER;
2931 2935
2932 /* 2936 /*
2933 * Only clear out a write error when rewriting, should this 2937 * Only clear out a write error when rewriting
2934 * include WRITE_SYNC as well?
2935 */ 2938 */
2936 if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) 2939 if (test_set_buffer_req(bh) && (rw & WRITE))
2937 clear_buffer_write_io_error(bh); 2940 clear_buffer_write_io_error(bh);
2938 2941
2939 /* 2942 /*
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f5d0083e09fa..f9e4ad97a79e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,12 @@ Fix premature write failure on congested networks (we would give up
4on EAGAIN from the socket too quickly on large writes). 4on EAGAIN from the socket too quickly on large writes).
5Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. 5Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
6Fix endian problems in acl (mode from/to cifs acl) on bigendian 6Fix endian problems in acl (mode from/to cifs acl) on bigendian
7architectures. 7architectures. Fix problems with preserving timestamps on copying open
8files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit
9on parent directory when server supports Unix Extensions but not POSIX
10create. Update cifs.upcall version to handle new Kerberos sec flags
11(this requires update of cifs.upcall program from Samba). Fix memory leak
12on dns_upcall (resolving DFS referralls).
8 13
9Version 1.53 14Version 1.53
10------------ 15------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 2bd6fe556f88..68b5c1169d9d 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -642,8 +642,30 @@ The statistics for the number of total SMBs and oplock breaks are different in
642that they represent all for that share, not just those for which the server 642that they represent all for that share, not just those for which the server
643returned success. 643returned success.
644 644
645Also note that "cat /proc/fs/cifs/DebugData" will display information about 645Also note that "cat /proc/fs/cifs/DebugData" will display information about
646the active sessions and the shares that are mounted. 646the active sessions and the shares that are mounted.
647Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is 647
648on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and 648Enabling Kerberos (extended security) works but requires version 1.2 or later
649LANMAN support do not require this helper. 649of the helper program cifs.upcall to be present and to be configured in the
650/etc/request-key.conf file. The cifs.upcall helper program is from the Samba
651project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
652require this helper. Note that NTLMv2 security (which does not require the
653cifs.upcall helper program), instead of using Kerberos, is sufficient for
654some use cases.
655
656Enabling DFS support (used to access shares transparently in an MS-DFS
657global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In
658addition, DFS support for target shares which are specified as UNC
659names which begin with host names (rather than IP addresses) requires
660a user space helper (such as cifs.upcall) to be present in order to
661translate host names to ip address, and the user space helper must also
662be configured in the file /etc/request-key.conf
663
664To use cifs Kerberos and DFS support, the Linux keyutils package should be
665installed and something like the following lines should be added to the
666/etc/request-key.conf file:
667
668create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
669create dns_resolver * * /usr/local/sbin/cifs.upcall %k
670
671
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 5fabd2caf93c..1b09f1670061 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
476 unsigned int cls, con, tag, oidlen, rc; 476 unsigned int cls, con, tag, oidlen, rc;
477 bool use_ntlmssp = false; 477 bool use_ntlmssp = false;
478 bool use_kerberos = false; 478 bool use_kerberos = false;
479 bool use_mskerberos = false;
479 480
480 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ 481 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
481 482
@@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length,
574 *(oid + 1), *(oid + 2), *(oid + 3))); 575 *(oid + 1), *(oid + 2), *(oid + 3)));
575 576
576 if (compare_oid(oid, oidlen, MSKRB5_OID, 577 if (compare_oid(oid, oidlen, MSKRB5_OID,
577 MSKRB5_OID_LEN)) 578 MSKRB5_OID_LEN) &&
578 use_kerberos = true; 579 !use_kerberos)
580 use_mskerberos = true;
579 else if (compare_oid(oid, oidlen, KRB5_OID, 581 else if (compare_oid(oid, oidlen, KRB5_OID,
580 KRB5_OID_LEN)) 582 KRB5_OID_LEN) &&
583 !use_mskerberos)
581 use_kerberos = true; 584 use_kerberos = true;
582 else if (compare_oid(oid, oidlen, NTLMSSP_OID, 585 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
583 NTLMSSP_OID_LEN)) 586 NTLMSSP_OID_LEN))
@@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
630 633
631 if (use_kerberos) 634 if (use_kerberos)
632 *secType = Kerberos; 635 *secType = Kerberos;
636 else if (use_mskerberos)
637 *secType = MSKerberos;
633 else if (use_ntlmssp) 638 else if (use_ntlmssp)
634 *secType = NTLMSSP; 639 *secType = NTLMSSP;
635 640
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 2434ab0e8791..117ef4bba68e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -114,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
114 114
115 dp = description + strlen(description); 115 dp = description + strlen(description);
116 116
117 /* for now, only sec=krb5 is valid */ 117 /* for now, only sec=krb5 and sec=mskrb5 are valid */
118 if (server->secType == Kerberos) 118 if (server->secType == Kerberos)
119 sprintf(dp, ";sec=krb5"); 119 sprintf(dp, ";sec=krb5");
120 else if (server->secType == MSKerberos)
121 sprintf(dp, ";sec=mskrb5");
120 else 122 else
121 goto out; 123 goto out;
122 124
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index 05a34b17a1ab..e4041ec4d712 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -23,7 +23,7 @@
23#ifndef _CIFS_SPNEGO_H 23#ifndef _CIFS_SPNEGO_H
24#define _CIFS_SPNEGO_H 24#define _CIFS_SPNEGO_H
25 25
26#define CIFS_SPNEGO_UPCALL_VERSION 1 26#define CIFS_SPNEGO_UPCALL_VERSION 2
27 27
28/* 28/*
29 * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. 29 * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION.
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e8da4ee761b5..25ecbd5b0404 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -175,6 +175,8 @@ out_no_root:
175 if (inode) 175 if (inode)
176 iput(inode); 176 iput(inode);
177 177
178 cifs_umount(sb, cifs_sb);
179
178out_mount_failed: 180out_mount_failed:
179 if (cifs_sb) { 181 if (cifs_sb) {
180#ifdef CONFIG_CIFS_DFS_UPCALL 182#ifdef CONFIG_CIFS_DFS_UPCALL
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7e1cf262effe..8dfd6f24d488 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -80,7 +80,8 @@ enum securityEnum {
80 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 80 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
81 RawNTLMSSP, /* NTLMSSP without SPNEGO */ 81 RawNTLMSSP, /* NTLMSSP without SPNEGO */
82 NTLMSSP, /* NTLMSSP via SPNEGO */ 82 NTLMSSP, /* NTLMSSP via SPNEGO */
83 Kerberos /* Kerberos via SPNEGO */ 83 Kerberos, /* Kerberos via SPNEGO */
84 MSKerberos, /* MS Kerberos via SPNEGO */
84}; 85};
85 86
86enum protocolEnum { 87enum protocolEnum {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0711db65afe8..4c13bcdb92a5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3598 char ntlm_session_key[CIFS_SESS_KEY_SIZE]; 3598 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
3599 bool ntlmv2_flag = false; 3599 bool ntlmv2_flag = false;
3600 int first_time = 0; 3600 int first_time = 0;
3601 struct TCP_Server_Info *server = pSesInfo->server;
3601 3602
3602 /* what if server changes its buffer size after dropping the session? */ 3603 /* what if server changes its buffer size after dropping the session? */
3603 if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { 3604 if (server->maxBuf == 0) /* no need to send on reconnect */ {
3604 rc = CIFSSMBNegotiate(xid, pSesInfo); 3605 rc = CIFSSMBNegotiate(xid, pSesInfo);
3605 if (rc == -EAGAIN) /* retry only once on 1st time connection */ { 3606 if (rc == -EAGAIN) {
3607 /* retry only once on 1st time connection */
3606 rc = CIFSSMBNegotiate(xid, pSesInfo); 3608 rc = CIFSSMBNegotiate(xid, pSesInfo);
3607 if (rc == -EAGAIN) 3609 if (rc == -EAGAIN)
3608 rc = -EHOSTDOWN; 3610 rc = -EHOSTDOWN;
3609 } 3611 }
3610 if (rc == 0) { 3612 if (rc == 0) {
3611 spin_lock(&GlobalMid_Lock); 3613 spin_lock(&GlobalMid_Lock);
3612 if (pSesInfo->server->tcpStatus != CifsExiting) 3614 if (server->tcpStatus != CifsExiting)
3613 pSesInfo->server->tcpStatus = CifsGood; 3615 server->tcpStatus = CifsGood;
3614 else 3616 else
3615 rc = -EHOSTDOWN; 3617 rc = -EHOSTDOWN;
3616 spin_unlock(&GlobalMid_Lock); 3618 spin_unlock(&GlobalMid_Lock);
@@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3623 goto ss_err_exit; 3625 goto ss_err_exit;
3624 3626
3625 pSesInfo->flags = 0; 3627 pSesInfo->flags = 0;
3626 pSesInfo->capabilities = pSesInfo->server->capabilities; 3628 pSesInfo->capabilities = server->capabilities;
3627 if (linuxExtEnabled == 0) 3629 if (linuxExtEnabled == 0)
3628 pSesInfo->capabilities &= (~CAP_UNIX); 3630 pSesInfo->capabilities &= (~CAP_UNIX);
3629 /* pSesInfo->sequence_number = 0;*/ 3631 /* pSesInfo->sequence_number = 0;*/
3630 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 3632 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
3631 pSesInfo->server->secMode, 3633 server->secMode, server->capabilities, server->timeAdj));
3632 pSesInfo->server->capabilities, 3634
3633 pSesInfo->server->timeAdj));
3634 if (experimEnabled < 2) 3635 if (experimEnabled < 2)
3635 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); 3636 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
3636 else if (extended_security 3637 else if (extended_security
3637 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3638 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3638 && (pSesInfo->server->secType == NTLMSSP)) { 3639 && (server->secType == NTLMSSP)) {
3639 rc = -EOPNOTSUPP; 3640 rc = -EOPNOTSUPP;
3640 } else if (extended_security 3641 } else if (extended_security
3641 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3642 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3642 && (pSesInfo->server->secType == RawNTLMSSP)) { 3643 && (server->secType == RawNTLMSSP)) {
3643 cFYI(1, ("NTLMSSP sesssetup")); 3644 cFYI(1, ("NTLMSSP sesssetup"));
3644 rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, 3645 rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
3645 nls_info); 3646 nls_info);
@@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3668 3669
3669 } else { 3670 } else {
3670 SMBNTencrypt(pSesInfo->password, 3671 SMBNTencrypt(pSesInfo->password,
3671 pSesInfo->server->cryptKey, 3672 server->cryptKey,
3672 ntlm_session_key); 3673 ntlm_session_key);
3673 3674
3674 if (first_time) 3675 if (first_time)
3675 cifs_calculate_mac_key( 3676 cifs_calculate_mac_key(
3676 &pSesInfo->server->mac_signing_key, 3677 &server->mac_signing_key,
3677 ntlm_session_key, 3678 ntlm_session_key,
3678 pSesInfo->password); 3679 pSesInfo->password);
3679 } 3680 }
@@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3686 nls_info); 3687 nls_info);
3687 } 3688 }
3688 } else { /* old style NTLM 0.12 session setup */ 3689 } else { /* old style NTLM 0.12 session setup */
3689 SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey, 3690 SMBNTencrypt(pSesInfo->password, server->cryptKey,
3690 ntlm_session_key); 3691 ntlm_session_key);
3691 3692
3692 if (first_time) 3693 if (first_time)
3693 cifs_calculate_mac_key( 3694 cifs_calculate_mac_key(&server->mac_signing_key,
3694 &pSesInfo->server->mac_signing_key, 3695 ntlm_session_key,
3695 ntlm_session_key, pSesInfo->password); 3696 pSesInfo->password);
3696 3697
3697 rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); 3698 rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
3698 } 3699 }
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index f730ef35499e..a2e0673e1b08 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data,
47 return rc; 47 return rc;
48} 48}
49 49
50static void
51dns_resolver_destroy(struct key *key)
52{
53 kfree(key->payload.data);
54}
55
50struct key_type key_type_dns_resolver = { 56struct key_type key_type_dns_resolver = {
51 .name = "dns_resolver", 57 .name = "dns_resolver",
52 .def_datalen = sizeof(struct in_addr), 58 .def_datalen = sizeof(struct in_addr),
53 .describe = user_describe, 59 .describe = user_describe,
54 .instantiate = dns_resolver_instantiate, 60 .instantiate = dns_resolver_instantiate,
61 .destroy = dns_resolver_destroy,
55 .match = user_match, 62 .match = user_match,
56}; 63};
57 64
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 28a22092d450..9c548f110102 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode,
546 if ((inode->i_mode & S_IWUGO) == 0 && 546 if ((inode->i_mode & S_IWUGO) == 0 &&
547 (attr & ATTR_READONLY) == 0) 547 (attr & ATTR_READONLY) == 0)
548 inode->i_mode |= (S_IWUGO & default_mode); 548 inode->i_mode |= (S_IWUGO & default_mode);
549 inode->i_mode &= ~S_IFMT; 549
550 inode->i_mode &= ~S_IFMT;
550 } 551 }
551 /* clear write bits if ATTR_READONLY is set */ 552 /* clear write bits if ATTR_READONLY is set */
552 if (attr & ATTR_READONLY) 553 if (attr & ATTR_READONLY)
@@ -649,6 +650,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
649 inode->i_fop = &simple_dir_operations; 650 inode->i_fop = &simple_dir_operations;
650 inode->i_uid = cifs_sb->mnt_uid; 651 inode->i_uid = cifs_sb->mnt_uid;
651 inode->i_gid = cifs_sb->mnt_gid; 652 inode->i_gid = cifs_sb->mnt_gid;
653 } else if (rc) {
652 _FreeXid(xid); 654 _FreeXid(xid);
653 iget_failed(inode); 655 iget_failed(inode);
654 return ERR_PTR(rc); 656 return ERR_PTR(rc);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index ed150efbe27c..b537fad3bf50 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -505,7 +505,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
505 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); 505 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
506 } else 506 } else
507 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 507 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
508 } else if (type == Kerberos) { 508 } else if (type == Kerberos || type == MSKerberos) {
509#ifdef CONFIG_CIFS_UPCALL 509#ifdef CONFIG_CIFS_UPCALL
510 struct cifs_spnego_msg *msg; 510 struct cifs_spnego_msg *msg;
511 spnego_key = cifs_get_spnego_key(ses); 511 spnego_key = cifs_get_spnego_key(ses);
@@ -516,6 +516,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
516 } 516 }
517 517
518 msg = spnego_key->payload.data; 518 msg = spnego_key->payload.data;
519 /* check version field to make sure that cifs.upcall is
520 sending us a response in an expected form */
521 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
522 cERROR(1, ("incorrect version of cifs.upcall (expected"
523 " %d but got %d)",
524 CIFS_SPNEGO_UPCALL_VERSION, msg->version));
525 rc = -EKEYREJECTED;
526 goto ssetup_exit;
527 }
519 /* bail out if key is too long */ 528 /* bail out if key is too long */
520 if (msg->sesskey_len > 529 if (msg->sesskey_len >
521 sizeof(ses->server->mac_signing_key.data.krb5)) { 530 sizeof(ses->server->mac_signing_key.data.krb5)) {
diff --git a/fs/compat.c b/fs/compat.c
index c9d1472e65c5..075d0509970d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen,
792 if (buf->result) 792 if (buf->result)
793 return -EINVAL; 793 return -EINVAL;
794 d_ino = ino; 794 d_ino = ino;
795 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 795 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
796 buf->result = -EOVERFLOW;
796 return -EOVERFLOW; 797 return -EOVERFLOW;
798 }
797 buf->result++; 799 buf->result++;
798 dirent = buf->dirent; 800 dirent = buf->dirent;
799 if (!access_ok(VERIFY_WRITE, dirent, 801 if (!access_ok(VERIFY_WRITE, dirent,
@@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
862 if (reclen > buf->count) 864 if (reclen > buf->count)
863 return -EINVAL; 865 return -EINVAL;
864 d_ino = ino; 866 d_ino = ino;
865 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 867 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
868 buf->error = -EOVERFLOW;
866 return -EOVERFLOW; 869 return -EOVERFLOW;
870 }
867 dirent = buf->previous; 871 dirent = buf->previous;
868 if (dirent) { 872 if (dirent) {
869 if (__put_user(offset, &dirent->d_off)) 873 if (__put_user(offset, &dirent->d_off))
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7a8db78a91d2..8e93341f3e82 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1311 * Ensure that no racing symlink() will make detach_prep() fail while 1311 * Ensure that no racing symlink() will make detach_prep() fail while
1312 * the new link is temporarily attached 1312 * the new link is temporarily attached
1313 */ 1313 */
1314 mutex_lock(&configfs_symlink_mutex);
1315 spin_lock(&configfs_dirent_lock);
1316 do { 1314 do {
1317 struct mutex *wait_mutex; 1315 struct mutex *wait_mutex;
1318 1316
1317 mutex_lock(&configfs_symlink_mutex);
1318 spin_lock(&configfs_dirent_lock);
1319 ret = configfs_detach_prep(dentry, &wait_mutex); 1319 ret = configfs_detach_prep(dentry, &wait_mutex);
1320 if (ret) { 1320 if (ret)
1321 configfs_detach_rollback(dentry); 1321 configfs_detach_rollback(dentry);
1322 spin_unlock(&configfs_dirent_lock); 1322 spin_unlock(&configfs_dirent_lock);
1323 mutex_unlock(&configfs_symlink_mutex); 1323 mutex_unlock(&configfs_symlink_mutex);
1324
1325 if (ret) {
1324 if (ret != -EAGAIN) { 1326 if (ret != -EAGAIN) {
1325 config_item_put(parent_item); 1327 config_item_put(parent_item);
1326 return ret; 1328 return ret;
@@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1329 /* Wait until the racing operation terminates */ 1331 /* Wait until the racing operation terminates */
1330 mutex_lock(wait_mutex); 1332 mutex_lock(wait_mutex);
1331 mutex_unlock(wait_mutex); 1333 mutex_unlock(wait_mutex);
1332
1333 mutex_lock(&configfs_symlink_mutex);
1334 spin_lock(&configfs_dirent_lock);
1335 } 1334 }
1336 } while (ret == -EAGAIN); 1335 } while (ret == -EAGAIN);
1337 spin_unlock(&configfs_dirent_lock);
1338 mutex_unlock(&configfs_symlink_mutex);
1339 1336
1340 /* Get a working ref for the duration of this function */ 1337 /* Get a working ref for the duration of this function */
1341 item = configfs_get_config_item(dentry); 1338 item = configfs_get_config_item(dentry);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 0c3b618c15b3..f40423eb1a14 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex);
43static int cramfs_iget5_test(struct inode *inode, void *opaque) 43static int cramfs_iget5_test(struct inode *inode, void *opaque)
44{ 44{
45 struct cramfs_inode *cramfs_inode = opaque; 45 struct cramfs_inode *cramfs_inode = opaque;
46 46 return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
47 if (inode->i_ino != CRAMINO(cramfs_inode))
48 return 0; /* does not match */
49
50 if (inode->i_ino != 1)
51 return 1;
52
53 /* all empty directories, char, block, pipe, and sock, share inode #1 */
54
55 if ((inode->i_mode != cramfs_inode->mode) ||
56 (inode->i_gid != cramfs_inode->gid) ||
57 (inode->i_uid != cramfs_inode->uid))
58 return 0; /* does not match */
59
60 if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
61 (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
62 return 0; /* does not match */
63
64 return 1; /* matches */
65} 47}
66 48
67static int cramfs_iget5_set(struct inode *inode, void *opaque) 49static int cramfs_iget5_set(struct inode *inode, void *opaque)
68{ 50{
69 static struct timespec zerotime;
70 struct cramfs_inode *cramfs_inode = opaque; 51 struct cramfs_inode *cramfs_inode = opaque;
71 inode->i_mode = cramfs_inode->mode;
72 inode->i_uid = cramfs_inode->uid;
73 inode->i_size = cramfs_inode->size;
74 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
75 inode->i_gid = cramfs_inode->gid;
76 /* Struct copy intentional */
77 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
78 inode->i_ino = CRAMINO(cramfs_inode); 52 inode->i_ino = CRAMINO(cramfs_inode);
79 /* inode->i_nlink is left 1 - arguably wrong for directories,
80 but it's the best we can do without reading the directory
81 contents. 1 yields the right result in GNU find, even
82 without -noleaf option. */
83 if (S_ISREG(inode->i_mode)) {
84 inode->i_fop = &generic_ro_fops;
85 inode->i_data.a_ops = &cramfs_aops;
86 } else if (S_ISDIR(inode->i_mode)) {
87 inode->i_op = &cramfs_dir_inode_operations;
88 inode->i_fop = &cramfs_directory_operations;
89 } else if (S_ISLNK(inode->i_mode)) {
90 inode->i_op = &page_symlink_inode_operations;
91 inode->i_data.a_ops = &cramfs_aops;
92 } else {
93 inode->i_size = 0;
94 inode->i_blocks = 0;
95 init_special_inode(inode, inode->i_mode,
96 old_decode_dev(cramfs_inode->size));
97 }
98 return 0; 53 return 0;
99} 54}
100 55
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
104 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), 59 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
105 cramfs_iget5_test, cramfs_iget5_set, 60 cramfs_iget5_test, cramfs_iget5_set,
106 cramfs_inode); 61 cramfs_inode);
62 static struct timespec zerotime;
63
107 if (inode && (inode->i_state & I_NEW)) { 64 if (inode && (inode->i_state & I_NEW)) {
65 inode->i_mode = cramfs_inode->mode;
66 inode->i_uid = cramfs_inode->uid;
67 inode->i_size = cramfs_inode->size;
68 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
69 inode->i_gid = cramfs_inode->gid;
70 /* Struct copy intentional */
71 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
72 /* inode->i_nlink is left 1 - arguably wrong for directories,
73 but it's the best we can do without reading the directory
74 contents. 1 yields the right result in GNU find, even
75 without -noleaf option. */
76 if (S_ISREG(inode->i_mode)) {
77 inode->i_fop = &generic_ro_fops;
78 inode->i_data.a_ops = &cramfs_aops;
79 } else if (S_ISDIR(inode->i_mode)) {
80 inode->i_op = &cramfs_dir_inode_operations;
81 inode->i_fop = &cramfs_directory_operations;
82 } else if (S_ISLNK(inode->i_mode)) {
83 inode->i_op = &page_symlink_inode_operations;
84 inode->i_data.a_ops = &cramfs_aops;
85 } else {
86 inode->i_size = 0;
87 inode->i_blocks = 0;
88 init_special_inode(inode, inode->i_mode,
89 old_decode_dev(cramfs_inode->size));
90 }
108 unlock_new_inode(inode); 91 unlock_new_inode(inode);
109 } 92 }
110 return inode; 93 return inode;
111} 94}
112 95
96static void cramfs_drop_inode(struct inode *inode)
97{
98 if (inode->i_ino == 1)
99 generic_delete_inode(inode);
100 else
101 generic_drop_inode(inode);
102}
103
113/* 104/*
114 * We have our own block cache: don't fill up the buffer cache 105 * We have our own block cache: don't fill up the buffer cache
115 * with the rom-image, because the way the filesystem is set 106 * with the rom-image, because the way the filesystem is set
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = {
534 .put_super = cramfs_put_super, 525 .put_super = cramfs_put_super,
535 .remount_fs = cramfs_remount, 526 .remount_fs = cramfs_remount,
536 .statfs = cramfs_statfs, 527 .statfs = cramfs_statfs,
528 .drop_inode = cramfs_drop_inode,
537}; 529};
538 530
539static int cramfs_get_sb(struct file_system_type *fs_type, 531static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index 101663d15e9f..80e93956aced 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1236 * If no entry exists with the exact case name, allocate new dentry with 1236 * If no entry exists with the exact case name, allocate new dentry with
1237 * the exact case, and return the spliced entry. 1237 * the exact case, and return the spliced entry.
1238 */ 1238 */
1239struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, 1239struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1240 struct qstr *name) 1240 struct qstr *name)
1241{ 1241{
1242 int error; 1242 int error;
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index c4e7d721bd8d..89d2fb7b991a 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -30,16 +30,16 @@
30 30
31static struct config_group *space_list; 31static struct config_group *space_list;
32static struct config_group *comm_list; 32static struct config_group *comm_list;
33static struct comm *local_comm; 33static struct dlm_comm *local_comm;
34 34
35struct clusters; 35struct dlm_clusters;
36struct cluster; 36struct dlm_cluster;
37struct spaces; 37struct dlm_spaces;
38struct space; 38struct dlm_space;
39struct comms; 39struct dlm_comms;
40struct comm; 40struct dlm_comm;
41struct nodes; 41struct dlm_nodes;
42struct node; 42struct dlm_node;
43 43
44static struct config_group *make_cluster(struct config_group *, const char *); 44static struct config_group *make_cluster(struct config_group *, const char *);
45static void drop_cluster(struct config_group *, struct config_item *); 45static void drop_cluster(struct config_group *, struct config_item *);
@@ -68,17 +68,22 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
68static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, 68static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
69 const char *buf, size_t len); 69 const char *buf, size_t len);
70 70
71static ssize_t comm_nodeid_read(struct comm *cm, char *buf); 71static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf);
72static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len); 72static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf,
73static ssize_t comm_local_read(struct comm *cm, char *buf); 73 size_t len);
74static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len); 74static ssize_t comm_local_read(struct dlm_comm *cm, char *buf);
75static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len); 75static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
76static ssize_t node_nodeid_read(struct node *nd, char *buf); 76 size_t len);
77static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); 77static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
78static ssize_t node_weight_read(struct node *nd, char *buf); 78 size_t len);
79static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); 79static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
80 80static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
81struct cluster { 81 size_t len);
82static ssize_t node_weight_read(struct dlm_node *nd, char *buf);
83static ssize_t node_weight_write(struct dlm_node *nd, const char *buf,
84 size_t len);
85
86struct dlm_cluster {
82 struct config_group group; 87 struct config_group group;
83 unsigned int cl_tcp_port; 88 unsigned int cl_tcp_port;
84 unsigned int cl_buffer_size; 89 unsigned int cl_buffer_size;
@@ -109,11 +114,11 @@ enum {
109 114
110struct cluster_attribute { 115struct cluster_attribute {
111 struct configfs_attribute attr; 116 struct configfs_attribute attr;
112 ssize_t (*show)(struct cluster *, char *); 117 ssize_t (*show)(struct dlm_cluster *, char *);
113 ssize_t (*store)(struct cluster *, const char *, size_t); 118 ssize_t (*store)(struct dlm_cluster *, const char *, size_t);
114}; 119};
115 120
116static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, 121static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
117 int *info_field, int check_zero, 122 int *info_field, int check_zero,
118 const char *buf, size_t len) 123 const char *buf, size_t len)
119{ 124{
@@ -134,12 +139,12 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
134} 139}
135 140
136#define CLUSTER_ATTR(name, check_zero) \ 141#define CLUSTER_ATTR(name, check_zero) \
137static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ 142static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \
138{ \ 143{ \
139 return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ 144 return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
140 check_zero, buf, len); \ 145 check_zero, buf, len); \
141} \ 146} \
142static ssize_t name##_read(struct cluster *cl, char *buf) \ 147static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \
143{ \ 148{ \
144 return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ 149 return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
145} \ 150} \
@@ -181,8 +186,8 @@ enum {
181 186
182struct comm_attribute { 187struct comm_attribute {
183 struct configfs_attribute attr; 188 struct configfs_attribute attr;
184 ssize_t (*show)(struct comm *, char *); 189 ssize_t (*show)(struct dlm_comm *, char *);
185 ssize_t (*store)(struct comm *, const char *, size_t); 190 ssize_t (*store)(struct dlm_comm *, const char *, size_t);
186}; 191};
187 192
188static struct comm_attribute comm_attr_nodeid = { 193static struct comm_attribute comm_attr_nodeid = {
@@ -222,8 +227,8 @@ enum {
222 227
223struct node_attribute { 228struct node_attribute {
224 struct configfs_attribute attr; 229 struct configfs_attribute attr;
225 ssize_t (*show)(struct node *, char *); 230 ssize_t (*show)(struct dlm_node *, char *);
226 ssize_t (*store)(struct node *, const char *, size_t); 231 ssize_t (*store)(struct dlm_node *, const char *, size_t);
227}; 232};
228 233
229static struct node_attribute node_attr_nodeid = { 234static struct node_attribute node_attr_nodeid = {
@@ -248,26 +253,26 @@ static struct configfs_attribute *node_attrs[] = {
248 NULL, 253 NULL,
249}; 254};
250 255
251struct clusters { 256struct dlm_clusters {
252 struct configfs_subsystem subsys; 257 struct configfs_subsystem subsys;
253}; 258};
254 259
255struct spaces { 260struct dlm_spaces {
256 struct config_group ss_group; 261 struct config_group ss_group;
257}; 262};
258 263
259struct space { 264struct dlm_space {
260 struct config_group group; 265 struct config_group group;
261 struct list_head members; 266 struct list_head members;
262 struct mutex members_lock; 267 struct mutex members_lock;
263 int members_count; 268 int members_count;
264}; 269};
265 270
266struct comms { 271struct dlm_comms {
267 struct config_group cs_group; 272 struct config_group cs_group;
268}; 273};
269 274
270struct comm { 275struct dlm_comm {
271 struct config_item item; 276 struct config_item item;
272 int nodeid; 277 int nodeid;
273 int local; 278 int local;
@@ -275,11 +280,11 @@ struct comm {
275 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; 280 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
276}; 281};
277 282
278struct nodes { 283struct dlm_nodes {
279 struct config_group ns_group; 284 struct config_group ns_group;
280}; 285};
281 286
282struct node { 287struct dlm_node {
283 struct config_item item; 288 struct config_item item;
284 struct list_head list; /* space->members */ 289 struct list_head list; /* space->members */
285 int nodeid; 290 int nodeid;
@@ -372,38 +377,40 @@ static struct config_item_type node_type = {
372 .ct_owner = THIS_MODULE, 377 .ct_owner = THIS_MODULE,
373}; 378};
374 379
375static struct cluster *to_cluster(struct config_item *i) 380static struct dlm_cluster *to_cluster(struct config_item *i)
376{ 381{
377 return i ? container_of(to_config_group(i), struct cluster, group):NULL; 382 return i ? container_of(to_config_group(i), struct dlm_cluster, group) :
383 NULL;
378} 384}
379 385
380static struct space *to_space(struct config_item *i) 386static struct dlm_space *to_space(struct config_item *i)
381{ 387{
382 return i ? container_of(to_config_group(i), struct space, group) : NULL; 388 return i ? container_of(to_config_group(i), struct dlm_space, group) :
389 NULL;
383} 390}
384 391
385static struct comm *to_comm(struct config_item *i) 392static struct dlm_comm *to_comm(struct config_item *i)
386{ 393{
387 return i ? container_of(i, struct comm, item) : NULL; 394 return i ? container_of(i, struct dlm_comm, item) : NULL;
388} 395}
389 396
390static struct node *to_node(struct config_item *i) 397static struct dlm_node *to_node(struct config_item *i)
391{ 398{
392 return i ? container_of(i, struct node, item) : NULL; 399 return i ? container_of(i, struct dlm_node, item) : NULL;
393} 400}
394 401
395static struct config_group *make_cluster(struct config_group *g, 402static struct config_group *make_cluster(struct config_group *g,
396 const char *name) 403 const char *name)
397{ 404{
398 struct cluster *cl = NULL; 405 struct dlm_cluster *cl = NULL;
399 struct spaces *sps = NULL; 406 struct dlm_spaces *sps = NULL;
400 struct comms *cms = NULL; 407 struct dlm_comms *cms = NULL;
401 void *gps = NULL; 408 void *gps = NULL;
402 409
403 cl = kzalloc(sizeof(struct cluster), GFP_KERNEL); 410 cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL);
404 gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); 411 gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
405 sps = kzalloc(sizeof(struct spaces), GFP_KERNEL); 412 sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL);
406 cms = kzalloc(sizeof(struct comms), GFP_KERNEL); 413 cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL);
407 414
408 if (!cl || !gps || !sps || !cms) 415 if (!cl || !gps || !sps || !cms)
409 goto fail; 416 goto fail;
@@ -443,7 +450,7 @@ static struct config_group *make_cluster(struct config_group *g,
443 450
444static void drop_cluster(struct config_group *g, struct config_item *i) 451static void drop_cluster(struct config_group *g, struct config_item *i)
445{ 452{
446 struct cluster *cl = to_cluster(i); 453 struct dlm_cluster *cl = to_cluster(i);
447 struct config_item *tmp; 454 struct config_item *tmp;
448 int j; 455 int j;
449 456
@@ -461,20 +468,20 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
461 468
462static void release_cluster(struct config_item *i) 469static void release_cluster(struct config_item *i)
463{ 470{
464 struct cluster *cl = to_cluster(i); 471 struct dlm_cluster *cl = to_cluster(i);
465 kfree(cl->group.default_groups); 472 kfree(cl->group.default_groups);
466 kfree(cl); 473 kfree(cl);
467} 474}
468 475
469static struct config_group *make_space(struct config_group *g, const char *name) 476static struct config_group *make_space(struct config_group *g, const char *name)
470{ 477{
471 struct space *sp = NULL; 478 struct dlm_space *sp = NULL;
472 struct nodes *nds = NULL; 479 struct dlm_nodes *nds = NULL;
473 void *gps = NULL; 480 void *gps = NULL;
474 481
475 sp = kzalloc(sizeof(struct space), GFP_KERNEL); 482 sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL);
476 gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); 483 gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL);
477 nds = kzalloc(sizeof(struct nodes), GFP_KERNEL); 484 nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL);
478 485
479 if (!sp || !gps || !nds) 486 if (!sp || !gps || !nds)
480 goto fail; 487 goto fail;
@@ -500,7 +507,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
500 507
501static void drop_space(struct config_group *g, struct config_item *i) 508static void drop_space(struct config_group *g, struct config_item *i)
502{ 509{
503 struct space *sp = to_space(i); 510 struct dlm_space *sp = to_space(i);
504 struct config_item *tmp; 511 struct config_item *tmp;
505 int j; 512 int j;
506 513
@@ -517,16 +524,16 @@ static void drop_space(struct config_group *g, struct config_item *i)
517 524
518static void release_space(struct config_item *i) 525static void release_space(struct config_item *i)
519{ 526{
520 struct space *sp = to_space(i); 527 struct dlm_space *sp = to_space(i);
521 kfree(sp->group.default_groups); 528 kfree(sp->group.default_groups);
522 kfree(sp); 529 kfree(sp);
523} 530}
524 531
525static struct config_item *make_comm(struct config_group *g, const char *name) 532static struct config_item *make_comm(struct config_group *g, const char *name)
526{ 533{
527 struct comm *cm; 534 struct dlm_comm *cm;
528 535
529 cm = kzalloc(sizeof(struct comm), GFP_KERNEL); 536 cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL);
530 if (!cm) 537 if (!cm)
531 return ERR_PTR(-ENOMEM); 538 return ERR_PTR(-ENOMEM);
532 539
@@ -539,7 +546,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
539 546
540static void drop_comm(struct config_group *g, struct config_item *i) 547static void drop_comm(struct config_group *g, struct config_item *i)
541{ 548{
542 struct comm *cm = to_comm(i); 549 struct dlm_comm *cm = to_comm(i);
543 if (local_comm == cm) 550 if (local_comm == cm)
544 local_comm = NULL; 551 local_comm = NULL;
545 dlm_lowcomms_close(cm->nodeid); 552 dlm_lowcomms_close(cm->nodeid);
@@ -550,16 +557,16 @@ static void drop_comm(struct config_group *g, struct config_item *i)
550 557
551static void release_comm(struct config_item *i) 558static void release_comm(struct config_item *i)
552{ 559{
553 struct comm *cm = to_comm(i); 560 struct dlm_comm *cm = to_comm(i);
554 kfree(cm); 561 kfree(cm);
555} 562}
556 563
557static struct config_item *make_node(struct config_group *g, const char *name) 564static struct config_item *make_node(struct config_group *g, const char *name)
558{ 565{
559 struct space *sp = to_space(g->cg_item.ci_parent); 566 struct dlm_space *sp = to_space(g->cg_item.ci_parent);
560 struct node *nd; 567 struct dlm_node *nd;
561 568
562 nd = kzalloc(sizeof(struct node), GFP_KERNEL); 569 nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL);
563 if (!nd) 570 if (!nd)
564 return ERR_PTR(-ENOMEM); 571 return ERR_PTR(-ENOMEM);
565 572
@@ -578,8 +585,8 @@ static struct config_item *make_node(struct config_group *g, const char *name)
578 585
579static void drop_node(struct config_group *g, struct config_item *i) 586static void drop_node(struct config_group *g, struct config_item *i)
580{ 587{
581 struct space *sp = to_space(g->cg_item.ci_parent); 588 struct dlm_space *sp = to_space(g->cg_item.ci_parent);
582 struct node *nd = to_node(i); 589 struct dlm_node *nd = to_node(i);
583 590
584 mutex_lock(&sp->members_lock); 591 mutex_lock(&sp->members_lock);
585 list_del(&nd->list); 592 list_del(&nd->list);
@@ -591,11 +598,11 @@ static void drop_node(struct config_group *g, struct config_item *i)
591 598
592static void release_node(struct config_item *i) 599static void release_node(struct config_item *i)
593{ 600{
594 struct node *nd = to_node(i); 601 struct dlm_node *nd = to_node(i);
595 kfree(nd); 602 kfree(nd);
596} 603}
597 604
598static struct clusters clusters_root = { 605static struct dlm_clusters clusters_root = {
599 .subsys = { 606 .subsys = {
600 .su_group = { 607 .su_group = {
601 .cg_item = { 608 .cg_item = {
@@ -625,7 +632,7 @@ void dlm_config_exit(void)
625static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, 632static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
626 char *buf) 633 char *buf)
627{ 634{
628 struct cluster *cl = to_cluster(i); 635 struct dlm_cluster *cl = to_cluster(i);
629 struct cluster_attribute *cla = 636 struct cluster_attribute *cla =
630 container_of(a, struct cluster_attribute, attr); 637 container_of(a, struct cluster_attribute, attr);
631 return cla->show ? cla->show(cl, buf) : 0; 638 return cla->show ? cla->show(cl, buf) : 0;
@@ -635,7 +642,7 @@ static ssize_t store_cluster(struct config_item *i,
635 struct configfs_attribute *a, 642 struct configfs_attribute *a,
636 const char *buf, size_t len) 643 const char *buf, size_t len)
637{ 644{
638 struct cluster *cl = to_cluster(i); 645 struct dlm_cluster *cl = to_cluster(i);
639 struct cluster_attribute *cla = 646 struct cluster_attribute *cla =
640 container_of(a, struct cluster_attribute, attr); 647 container_of(a, struct cluster_attribute, attr);
641 return cla->store ? cla->store(cl, buf, len) : -EINVAL; 648 return cla->store ? cla->store(cl, buf, len) : -EINVAL;
@@ -644,7 +651,7 @@ static ssize_t store_cluster(struct config_item *i,
644static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, 651static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
645 char *buf) 652 char *buf)
646{ 653{
647 struct comm *cm = to_comm(i); 654 struct dlm_comm *cm = to_comm(i);
648 struct comm_attribute *cma = 655 struct comm_attribute *cma =
649 container_of(a, struct comm_attribute, attr); 656 container_of(a, struct comm_attribute, attr);
650 return cma->show ? cma->show(cm, buf) : 0; 657 return cma->show ? cma->show(cm, buf) : 0;
@@ -653,29 +660,31 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
653static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, 660static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
654 const char *buf, size_t len) 661 const char *buf, size_t len)
655{ 662{
656 struct comm *cm = to_comm(i); 663 struct dlm_comm *cm = to_comm(i);
657 struct comm_attribute *cma = 664 struct comm_attribute *cma =
658 container_of(a, struct comm_attribute, attr); 665 container_of(a, struct comm_attribute, attr);
659 return cma->store ? cma->store(cm, buf, len) : -EINVAL; 666 return cma->store ? cma->store(cm, buf, len) : -EINVAL;
660} 667}
661 668
662static ssize_t comm_nodeid_read(struct comm *cm, char *buf) 669static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf)
663{ 670{
664 return sprintf(buf, "%d\n", cm->nodeid); 671 return sprintf(buf, "%d\n", cm->nodeid);
665} 672}
666 673
667static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len) 674static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf,
675 size_t len)
668{ 676{
669 cm->nodeid = simple_strtol(buf, NULL, 0); 677 cm->nodeid = simple_strtol(buf, NULL, 0);
670 return len; 678 return len;
671} 679}
672 680
673static ssize_t comm_local_read(struct comm *cm, char *buf) 681static ssize_t comm_local_read(struct dlm_comm *cm, char *buf)
674{ 682{
675 return sprintf(buf, "%d\n", cm->local); 683 return sprintf(buf, "%d\n", cm->local);
676} 684}
677 685
678static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) 686static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
687 size_t len)
679{ 688{
680 cm->local= simple_strtol(buf, NULL, 0); 689 cm->local= simple_strtol(buf, NULL, 0);
681 if (cm->local && !local_comm) 690 if (cm->local && !local_comm)
@@ -683,7 +692,7 @@ static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len)
683 return len; 692 return len;
684} 693}
685 694
686static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) 695static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
687{ 696{
688 struct sockaddr_storage *addr; 697 struct sockaddr_storage *addr;
689 698
@@ -705,7 +714,7 @@ static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len)
705static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, 714static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
706 char *buf) 715 char *buf)
707{ 716{
708 struct node *nd = to_node(i); 717 struct dlm_node *nd = to_node(i);
709 struct node_attribute *nda = 718 struct node_attribute *nda =
710 container_of(a, struct node_attribute, attr); 719 container_of(a, struct node_attribute, attr);
711 return nda->show ? nda->show(nd, buf) : 0; 720 return nda->show ? nda->show(nd, buf) : 0;
@@ -714,29 +723,31 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
714static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, 723static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
715 const char *buf, size_t len) 724 const char *buf, size_t len)
716{ 725{
717 struct node *nd = to_node(i); 726 struct dlm_node *nd = to_node(i);
718 struct node_attribute *nda = 727 struct node_attribute *nda =
719 container_of(a, struct node_attribute, attr); 728 container_of(a, struct node_attribute, attr);
720 return nda->store ? nda->store(nd, buf, len) : -EINVAL; 729 return nda->store ? nda->store(nd, buf, len) : -EINVAL;
721} 730}
722 731
723static ssize_t node_nodeid_read(struct node *nd, char *buf) 732static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf)
724{ 733{
725 return sprintf(buf, "%d\n", nd->nodeid); 734 return sprintf(buf, "%d\n", nd->nodeid);
726} 735}
727 736
728static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len) 737static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
738 size_t len)
729{ 739{
730 nd->nodeid = simple_strtol(buf, NULL, 0); 740 nd->nodeid = simple_strtol(buf, NULL, 0);
731 return len; 741 return len;
732} 742}
733 743
734static ssize_t node_weight_read(struct node *nd, char *buf) 744static ssize_t node_weight_read(struct dlm_node *nd, char *buf)
735{ 745{
736 return sprintf(buf, "%d\n", nd->weight); 746 return sprintf(buf, "%d\n", nd->weight);
737} 747}
738 748
739static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) 749static ssize_t node_weight_write(struct dlm_node *nd, const char *buf,
750 size_t len)
740{ 751{
741 nd->weight = simple_strtol(buf, NULL, 0); 752 nd->weight = simple_strtol(buf, NULL, 0);
742 return len; 753 return len;
@@ -746,7 +757,7 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
746 * Functions for the dlm to get the info that's been configured 757 * Functions for the dlm to get the info that's been configured
747 */ 758 */
748 759
749static struct space *get_space(char *name) 760static struct dlm_space *get_space(char *name)
750{ 761{
751 struct config_item *i; 762 struct config_item *i;
752 763
@@ -760,15 +771,15 @@ static struct space *get_space(char *name)
760 return to_space(i); 771 return to_space(i);
761} 772}
762 773
763static void put_space(struct space *sp) 774static void put_space(struct dlm_space *sp)
764{ 775{
765 config_item_put(&sp->group.cg_item); 776 config_item_put(&sp->group.cg_item);
766} 777}
767 778
768static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) 779static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
769{ 780{
770 struct config_item *i; 781 struct config_item *i;
771 struct comm *cm = NULL; 782 struct dlm_comm *cm = NULL;
772 int found = 0; 783 int found = 0;
773 784
774 if (!comm_list) 785 if (!comm_list)
@@ -801,7 +812,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
801 return cm; 812 return cm;
802} 813}
803 814
804static void put_comm(struct comm *cm) 815static void put_comm(struct dlm_comm *cm)
805{ 816{
806 config_item_put(&cm->item); 817 config_item_put(&cm->item);
807} 818}
@@ -810,8 +821,8 @@ static void put_comm(struct comm *cm)
810int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, 821int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
811 int **new_out, int *new_count_out) 822 int **new_out, int *new_count_out)
812{ 823{
813 struct space *sp; 824 struct dlm_space *sp;
814 struct node *nd; 825 struct dlm_node *nd;
815 int i = 0, rv = 0, ids_count = 0, new_count = 0; 826 int i = 0, rv = 0, ids_count = 0, new_count = 0;
816 int *ids, *new; 827 int *ids, *new;
817 828
@@ -874,8 +885,8 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
874 885
875int dlm_node_weight(char *lsname, int nodeid) 886int dlm_node_weight(char *lsname, int nodeid)
876{ 887{
877 struct space *sp; 888 struct dlm_space *sp;
878 struct node *nd; 889 struct dlm_node *nd;
879 int w = -EEXIST; 890 int w = -EEXIST;
880 891
881 sp = get_space(lsname); 892 sp = get_space(lsname);
@@ -897,7 +908,7 @@ int dlm_node_weight(char *lsname, int nodeid)
897 908
898int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) 909int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
899{ 910{
900 struct comm *cm = get_comm(nodeid, NULL); 911 struct dlm_comm *cm = get_comm(nodeid, NULL);
901 if (!cm) 912 if (!cm)
902 return -EEXIST; 913 return -EEXIST;
903 if (!cm->addr_count) 914 if (!cm->addr_count)
@@ -909,7 +920,7 @@ int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
909 920
910int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) 921int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
911{ 922{
912 struct comm *cm = get_comm(0, addr); 923 struct dlm_comm *cm = get_comm(0, addr);
913 if (!cm) 924 if (!cm)
914 return -EEXIST; 925 return -EEXIST;
915 *nodeid = cm->nodeid; 926 *nodeid = cm->nodeid;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 929e48ae7591..34f14a14fb4e 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -527,8 +527,10 @@ static ssize_t device_write(struct file *file, const char __user *buf,
527 k32buf = (struct dlm_write_request32 *)kbuf; 527 k32buf = (struct dlm_write_request32 *)kbuf;
528 kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - 528 kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) -
529 sizeof(struct dlm_write_request32)), GFP_KERNEL); 529 sizeof(struct dlm_write_request32)), GFP_KERNEL);
530 if (!kbuf) 530 if (!kbuf) {
531 kfree(k32buf);
531 return -ENOMEM; 532 return -ENOMEM;
533 }
532 534
533 if (proc) 535 if (proc)
534 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); 536 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
@@ -539,8 +541,10 @@ static ssize_t device_write(struct file *file, const char __user *buf,
539 541
540 /* do we really need this? can a write happen after a close? */ 542 /* do we really need this? can a write happen after a close? */
541 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && 543 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
542 (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) 544 (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) {
543 return -EINVAL; 545 error = -EINVAL;
546 goto out_free;
547 }
544 548
545 sigfillset(&allsigs); 549 sigfillset(&allsigs);
546 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); 550 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 3a404e7fad53..291abb11e20e 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
74 } 74 }
75 unlock_kernel(); 75 unlock_kernel();
76 76
77 d_add(dentry, inode); 77 return d_splice_alias(inode, dentry);
78 return NULL;
79} 78}
80 79
81static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, 80static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0c87474f7917..7cc0eb756b55 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1041,10 +1041,7 @@ retry:
1041} 1041}
1042 1042
1043/* 1043/*
1044 * It opens an eventpoll file descriptor. The "size" parameter is there 1044 * Open an eventpoll file descriptor.
1045 * for historical reasons, when epoll was using an hash instead of an
1046 * RB tree. With the current implementation, the "size" parameter is ignored
1047 * (besides sanity checks).
1048 */ 1045 */
1049asmlinkage long sys_epoll_create1(int flags) 1046asmlinkage long sys_epoll_create1(int flags)
1050{ 1047{
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1ae5004e93fc..e9fa960ba6da 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1626 free_blocks = 1626 free_blocks =
1627 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); 1627 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
1628#endif 1628#endif
1629 if (free_blocks <= root_blocks)
1630 /* we don't have free space */
1631 return 0;
1629 if (free_blocks - root_blocks < nblocks) 1632 if (free_blocks - root_blocks < nblocks)
1630 return free_blocks - root_blocks; 1633 return free_blocks - root_blocks;
1631 return nblocks; 1634 return nblocks;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d3d23d73c08b..ec8e33b45219 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent,
411 get_dtype(sb, fname->file_type)); 411 get_dtype(sb, fname->file_type));
412 if (error) { 412 if (error) {
413 filp->f_pos = curr_pos; 413 filp->f_pos = curr_pos;
414 info->extra_fname = fname->next; 414 info->extra_fname = fname;
415 return error; 415 return error;
416 } 416 }
417 fname = fname->next; 417 fname = fname->next;
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp,
450 * If there are any leftover names on the hash collision 450 * If there are any leftover names on the hash collision
451 * chain, return them first. 451 * chain, return them first.
452 */ 452 */
453 if (info->extra_fname && 453 if (info->extra_fname) {
454 call_filldir(filp, dirent, filldir, info->extra_fname)) 454 if (call_filldir(filp, dirent, filldir, info->extra_fname))
455 goto finished; 455 goto finished;
456 456
457 if (!info->curr_node) 457 info->extra_fname = NULL;
458 info->curr_node = rb_next(info->curr_node);
459 if (!info->curr_node) {
460 if (info->next_hash == ~0) {
461 filp->f_pos = EXT4_HTREE_EOF;
462 goto finished;
463 }
464 info->curr_hash = info->next_hash;
465 info->curr_minor_hash = 0;
466 }
467 } else if (!info->curr_node)
458 info->curr_node = rb_first(&info->root); 468 info->curr_node = rb_first(&info->root);
459 469
460 while (1) { 470 while (1) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6c7924d9e358..295003241d3d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1072,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *);
1072extern void ext4_get_inode_flags(struct ext4_inode_info *); 1072extern void ext4_get_inode_flags(struct ext4_inode_info *);
1073extern void ext4_set_aops(struct inode *inode); 1073extern void ext4_set_aops(struct inode *inode);
1074extern int ext4_writepage_trans_blocks(struct inode *); 1074extern int ext4_writepage_trans_blocks(struct inode *);
1075extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1076extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1075extern int ext4_block_truncate_page(handle_t *handle, 1077extern int ext4_block_truncate_page(handle_t *handle,
1076 struct address_space *mapping, loff_t from); 1078 struct address_space *mapping, loff_t from);
1077extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); 1079extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@@ -1227,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
1227/* extents.c */ 1229/* extents.c */
1228extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 1230extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1229extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 1231extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1232extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
1233 int chunk);
1230extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1234extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1231 ext4_lblk_t iblock, 1235 ext4_lblk_t iblock,
1232 unsigned long max_blocks, struct buffer_head *bh_result, 1236 unsigned long max_blocks, struct buffer_head *bh_result,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 6c166c0a54b7..d33dc56d6986 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
216extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 216extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
217extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 217extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
218extern int ext4_extent_tree_init(handle_t *, struct inode *); 218extern int ext4_extent_tree_init(handle_t *, struct inode *);
219extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); 219extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
220 int num,
221 struct ext4_ext_path *path);
220extern int ext4_ext_try_to_merge(struct inode *inode, 222extern int ext4_ext_try_to_merge(struct inode *inode,
221 struct ext4_ext_path *path, 223 struct ext4_ext_path *path,
222 struct ext4_extent *); 224 struct ext4_extent *);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index eb8bc3afe6e9..b455c685a98b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -51,6 +51,14 @@
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \ 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53 53
54/*
55 * Define the number of metadata blocks we need to account to modify data.
56 *
57 * This include super block, inode block, quota blocks and xattr blocks
58 */
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
61
54/* Delete operations potentially hit one directory's namespace plus an 62/* Delete operations potentially hit one directory's namespace plus an
55 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 * generous. We can grow the delete transaction later if necessary. */ 64 * generous. We can grow the delete transaction later if necessary. */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 612c3d2c3824..b24d3c53f20c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1747} 1747}
1748 1748
1749/* 1749/*
1750 * ext4_ext_calc_credits_for_insert: 1750 * ext4_ext_calc_credits_for_single_extent:
1751 * This routine returns max. credits that the extent tree can consume. 1751 * This routine returns max. credits that needed to insert an extent
1752 * It should be OK for low-performance paths like ->writepage() 1752 * to the extent tree.
1753 * To allow many writing processes to fit into a single transaction, 1753 * When pass the actual path, the caller should calculate credits
1754 * the caller should calculate credits under i_data_sem and 1754 * under i_data_sem.
1755 * pass the actual path.
1756 */ 1755 */
1757int ext4_ext_calc_credits_for_insert(struct inode *inode, 1756int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
1758 struct ext4_ext_path *path) 1757 struct ext4_ext_path *path)
1759{ 1758{
1760 int depth, needed;
1761
1762 if (path) { 1759 if (path) {
1760 int depth = ext_depth(inode);
1761 int ret = 0;
1762
1763 /* probably there is space in leaf? */ 1763 /* probably there is space in leaf? */
1764 depth = ext_depth(inode);
1765 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 1764 if (le16_to_cpu(path[depth].p_hdr->eh_entries)
1766 < le16_to_cpu(path[depth].p_hdr->eh_max)) 1765 < le16_to_cpu(path[depth].p_hdr->eh_max)) {
1767 return 1;
1768 }
1769 1766
1770 /* 1767 /*
1771 * given 32-bit logical block (4294967296 blocks), max. tree 1768 * There are some space in the leaf tree, no
1772 * can be 4 levels in depth -- 4 * 340^4 == 53453440000. 1769 * need to account for leaf block credit
1773 * Let's also add one more level for imbalance. 1770 *
1774 */ 1771 * bitmaps and block group descriptor blocks
1775 depth = 5; 1772 * and other metadat blocks still need to be
1776 1773 * accounted.
1777 /* allocation of new data block(s) */ 1774 */
1778 needed = 2; 1775 /* 1 bitmap, 1 block group descriptor */
1776 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
1777 }
1778 }
1779 1779
1780 /* 1780 return ext4_chunk_trans_blocks(inode, nrblocks);
1781 * tree can be full, so it would need to grow in depth: 1781}
1782 * we need one credit to modify old root, credits for
1783 * new root will be added in split accounting
1784 */
1785 needed += 1;
1786 1782
1787 /* 1783/*
1788 * Index split can happen, we would need: 1784 * How many index/leaf blocks need to change/allocate to modify nrblocks?
1789 * allocate intermediate indexes (bitmap + group) 1785 *
1790 * + change two blocks at each level, but root (already included) 1786 * if nrblocks are fit in a single extent (chunk flag is 1), then
1791 */ 1787 * in the worse case, each tree level index/leaf need to be changed
1792 needed += (depth * 2) + (depth * 2); 1788 * if the tree split due to insert a new extent, then the old tree
1789 * index/leaf need to be updated too
1790 *
1791 * If the nrblocks are discontiguous, they could cause
1792 * the whole tree split more than once, but this is really rare.
1793 */
1794int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
1795{
1796 int index;
1797 int depth = ext_depth(inode);
1793 1798
1794 /* any allocation modifies superblock */ 1799 if (chunk)
1795 needed += 1; 1800 index = depth * 2;
1801 else
1802 index = depth * 3;
1796 1803
1797 return needed; 1804 return index;
1798} 1805}
1799 1806
1800static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 1807static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1921 correct_index = 1; 1928 correct_index = 1;
1922 credits += (ext_depth(inode)) + 1; 1929 credits += (ext_depth(inode)) + 1;
1923 } 1930 }
1924#ifdef CONFIG_QUOTA
1925 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 1931 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
1926#endif
1927 1932
1928 err = ext4_ext_journal_restart(handle, credits); 1933 err = ext4_ext_journal_restart(handle, credits);
1929 if (err) 1934 if (err)
@@ -2805,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode)
2805 /* 2810 /*
2806 * probably first extent we're gonna free will be last in block 2811 * probably first extent we're gonna free will be last in block
2807 */ 2812 */
2808 err = ext4_writepage_trans_blocks(inode) + 3; 2813 err = ext4_writepage_trans_blocks(inode);
2809 handle = ext4_journal_start(inode, err); 2814 handle = ext4_journal_start(inode, err);
2810 if (IS_ERR(handle)) 2815 if (IS_ERR(handle))
2811 return; 2816 return;
@@ -2819,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode)
2819 down_write(&EXT4_I(inode)->i_data_sem); 2824 down_write(&EXT4_I(inode)->i_data_sem);
2820 ext4_ext_invalidate_cache(inode); 2825 ext4_ext_invalidate_cache(inode);
2821 2826
2822 ext4_mb_discard_inode_preallocations(inode); 2827 ext4_discard_reservation(inode);
2823 2828
2824 /* 2829 /*
2825 * TODO: optimization is possible here. 2830 * TODO: optimization is possible here.
@@ -2858,27 +2863,6 @@ out_stop:
2858 ext4_journal_stop(handle); 2863 ext4_journal_stop(handle);
2859} 2864}
2860 2865
2861/*
2862 * ext4_ext_writepage_trans_blocks:
2863 * calculate max number of blocks we could modify
2864 * in order to allocate new block for an inode
2865 */
2866int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2867{
2868 int needed;
2869
2870 needed = ext4_ext_calc_credits_for_insert(inode, NULL);
2871
2872 /* caller wants to allocate num blocks, but note it includes sb */
2873 needed = needed * num - (num - 1);
2874
2875#ifdef CONFIG_QUOTA
2876 needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
2877#endif
2878
2879 return needed;
2880}
2881
2882static void ext4_falloc_update_inode(struct inode *inode, 2866static void ext4_falloc_update_inode(struct inode *inode,
2883 int mode, loff_t new_size, int update_ctime) 2867 int mode, loff_t new_size, int update_ctime)
2884{ 2868{
@@ -2939,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2939 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 2923 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2940 - block; 2924 - block;
2941 /* 2925 /*
2942 * credits to insert 1 extent into extent tree + buffers to be able to 2926 * credits to insert 1 extent into extent tree
2943 * modify 1 super block, 1 block bitmap and 1 group descriptor.
2944 */ 2927 */
2945 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; 2928 credits = ext4_chunk_trans_blocks(inode, max_blocks);
2946 mutex_lock(&inode->i_mutex); 2929 mutex_lock(&inode->i_mutex);
2947retry: 2930retry:
2948 while (ret >= 0 && ret < max_blocks) { 2931 while (ret >= 0 && ret < max_blocks) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 655e760212b8..f344834bbf58 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -351,7 +351,7 @@ find_close_to_parent:
351 goto found_flexbg; 351 goto found_flexbg;
352 } 352 }
353 353
354 if (best_flex < 0 || 354 if (flex_group[best_flex].free_inodes == 0 ||
355 (flex_group[i].free_blocks > 355 (flex_group[i].free_blocks >
356 flex_group[best_flex].free_blocks && 356 flex_group[best_flex].free_blocks &&
357 flex_group[i].free_inodes)) 357 flex_group[i].free_inodes))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 59fbbe899acc..7e91913e325b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
41#include "acl.h" 41#include "acl.h"
42#include "ext4_extents.h" 42#include "ext4_extents.h"
43 43
44#define MPAGE_DA_EXTENT_TAIL 0x01
45
44static inline int ext4_begin_ordered_truncate(struct inode *inode, 46static inline int ext4_begin_ordered_truncate(struct inode *inode,
45 loff_t new_size) 47 loff_t new_size)
46{ 48{
@@ -1005,6 +1007,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1005 */ 1007 */
1006static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1008static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1007{ 1009{
1010 if (!blocks)
1011 return 0;
1012
1008 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1013 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1009 return ext4_ext_calc_metadata_amount(inode, blocks); 1014 return ext4_ext_calc_metadata_amount(inode, blocks);
1010 1015
@@ -1041,18 +1046,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1041 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1046 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1042} 1047}
1043 1048
1044/* Maximum number of blocks we map for direct IO at once. */
1045#define DIO_MAX_BLOCKS 4096
1046/*
1047 * Number of credits we need for writing DIO_MAX_BLOCKS:
1048 * We need sb + group descriptor + bitmap + inode -> 4
1049 * For B blocks with A block pointers per block we need:
1050 * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
1051 * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
1052 */
1053#define DIO_CREDITS 25
1054
1055
1056/* 1049/*
1057 * The ext4_get_blocks_wrap() function try to look up the requested blocks, 1050 * The ext4_get_blocks_wrap() function try to look up the requested blocks,
1058 * and returns if the blocks are already mapped. 1051 * and returns if the blocks are already mapped.
@@ -1164,19 +1157,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1164 return retval; 1157 return retval;
1165} 1158}
1166 1159
1160/* Maximum number of blocks we map for direct IO at once. */
1161#define DIO_MAX_BLOCKS 4096
1162
1167static int ext4_get_block(struct inode *inode, sector_t iblock, 1163static int ext4_get_block(struct inode *inode, sector_t iblock,
1168 struct buffer_head *bh_result, int create) 1164 struct buffer_head *bh_result, int create)
1169{ 1165{
1170 handle_t *handle = ext4_journal_current_handle(); 1166 handle_t *handle = ext4_journal_current_handle();
1171 int ret = 0, started = 0; 1167 int ret = 0, started = 0;
1172 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 1168 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1169 int dio_credits;
1173 1170
1174 if (create && !handle) { 1171 if (create && !handle) {
1175 /* Direct IO write... */ 1172 /* Direct IO write... */
1176 if (max_blocks > DIO_MAX_BLOCKS) 1173 if (max_blocks > DIO_MAX_BLOCKS)
1177 max_blocks = DIO_MAX_BLOCKS; 1174 max_blocks = DIO_MAX_BLOCKS;
1178 handle = ext4_journal_start(inode, DIO_CREDITS + 1175 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
1179 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); 1176 handle = ext4_journal_start(inode, dio_credits);
1180 if (IS_ERR(handle)) { 1177 if (IS_ERR(handle)) {
1181 ret = PTR_ERR(handle); 1178 ret = PTR_ERR(handle);
1182 goto out; 1179 goto out;
@@ -1559,7 +1556,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1559 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1556 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1560 int total, mdb, mdb_free, release; 1557 int total, mdb, mdb_free, release;
1561 1558
1559 if (!to_free)
1560 return; /* Nothing to release, exit */
1561
1562 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1562 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1563
1564 if (!EXT4_I(inode)->i_reserved_data_blocks) {
1565 /*
1566 * if there is no reserved blocks, but we try to free some
1567 * then the counter is messed up somewhere.
1568 * but since this function is called from invalidate
1569 * page, it's harmless to return without any action
1570 */
1571 printk(KERN_INFO "ext4 delalloc try to release %d reserved "
1572 "blocks for inode %lu, but there is no reserved "
1573 "data blocks\n", to_free, inode->i_ino);
1574 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1575 return;
1576 }
1577
1563 /* recalculate the number of metablocks still need to be reserved */ 1578 /* recalculate the number of metablocks still need to be reserved */
1564 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1579 total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
1565 mdb = ext4_calc_metadata_amount(inode, total); 1580 mdb = ext4_calc_metadata_amount(inode, total);
@@ -1613,11 +1628,13 @@ struct mpage_da_data {
1613 unsigned long first_page, next_page; /* extent of pages */ 1628 unsigned long first_page, next_page; /* extent of pages */
1614 get_block_t *get_block; 1629 get_block_t *get_block;
1615 struct writeback_control *wbc; 1630 struct writeback_control *wbc;
1631 int io_done;
1632 long pages_written;
1616}; 1633};
1617 1634
1618/* 1635/*
1619 * mpage_da_submit_io - walks through extent of pages and try to write 1636 * mpage_da_submit_io - walks through extent of pages and try to write
1620 * them with __mpage_writepage() 1637 * them with writepage() call back
1621 * 1638 *
1622 * @mpd->inode: inode 1639 * @mpd->inode: inode
1623 * @mpd->first_page: first page of the extent 1640 * @mpd->first_page: first page of the extent
@@ -1632,18 +1649,11 @@ struct mpage_da_data {
1632static int mpage_da_submit_io(struct mpage_da_data *mpd) 1649static int mpage_da_submit_io(struct mpage_da_data *mpd)
1633{ 1650{
1634 struct address_space *mapping = mpd->inode->i_mapping; 1651 struct address_space *mapping = mpd->inode->i_mapping;
1635 struct mpage_data mpd_pp = {
1636 .bio = NULL,
1637 .last_block_in_bio = 0,
1638 .get_block = mpd->get_block,
1639 .use_writepage = 1,
1640 };
1641 int ret = 0, err, nr_pages, i; 1652 int ret = 0, err, nr_pages, i;
1642 unsigned long index, end; 1653 unsigned long index, end;
1643 struct pagevec pvec; 1654 struct pagevec pvec;
1644 1655
1645 BUG_ON(mpd->next_page <= mpd->first_page); 1656 BUG_ON(mpd->next_page <= mpd->first_page);
1646
1647 pagevec_init(&pvec, 0); 1657 pagevec_init(&pvec, 0);
1648 index = mpd->first_page; 1658 index = mpd->first_page;
1649 end = mpd->next_page - 1; 1659 end = mpd->next_page - 1;
@@ -1661,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1661 break; 1671 break;
1662 index++; 1672 index++;
1663 1673
1664 err = __mpage_writepage(page, mpd->wbc, &mpd_pp); 1674 err = mapping->a_ops->writepage(page, mpd->wbc);
1665 1675 if (!err)
1676 mpd->pages_written++;
1666 /* 1677 /*
1667 * In error case, we have to continue because 1678 * In error case, we have to continue because
1668 * remaining pages are still locked 1679 * remaining pages are still locked
@@ -1673,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1673 } 1684 }
1674 pagevec_release(&pvec); 1685 pagevec_release(&pvec);
1675 } 1686 }
1676 if (mpd_pp.bio)
1677 mpage_bio_submit(WRITE, mpd_pp.bio);
1678
1679 return ret; 1687 return ret;
1680} 1688}
1681 1689
@@ -1698,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1698 int blocks = exbh->b_size >> inode->i_blkbits; 1706 int blocks = exbh->b_size >> inode->i_blkbits;
1699 sector_t pblock = exbh->b_blocknr, cur_logical; 1707 sector_t pblock = exbh->b_blocknr, cur_logical;
1700 struct buffer_head *head, *bh; 1708 struct buffer_head *head, *bh;
1701 unsigned long index, end; 1709 pgoff_t index, end;
1702 struct pagevec pvec; 1710 struct pagevec pvec;
1703 int nr_pages, i; 1711 int nr_pages, i;
1704 1712
@@ -1741,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1741 if (buffer_delay(bh)) { 1749 if (buffer_delay(bh)) {
1742 bh->b_blocknr = pblock; 1750 bh->b_blocknr = pblock;
1743 clear_buffer_delay(bh); 1751 clear_buffer_delay(bh);
1752 bh->b_bdev = inode->i_sb->s_bdev;
1753 } else if (buffer_unwritten(bh)) {
1754 bh->b_blocknr = pblock;
1755 clear_buffer_unwritten(bh);
1756 set_buffer_mapped(bh);
1757 set_buffer_new(bh);
1758 bh->b_bdev = inode->i_sb->s_bdev;
1744 } else if (buffer_mapped(bh)) 1759 } else if (buffer_mapped(bh))
1745 BUG_ON(bh->b_blocknr != pblock); 1760 BUG_ON(bh->b_blocknr != pblock);
1746 1761
@@ -1776,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1776 * 1791 *
1777 * The function skips space we know is already mapped to disk blocks. 1792 * The function skips space we know is already mapped to disk blocks.
1778 * 1793 *
1779 * The function ignores errors ->get_block() returns, thus real
1780 * error handling is postponed to __mpage_writepage()
1781 */ 1794 */
1782static void mpage_da_map_blocks(struct mpage_da_data *mpd) 1795static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1783{ 1796{
1797 int err = 0;
1784 struct buffer_head *lbh = &mpd->lbh; 1798 struct buffer_head *lbh = &mpd->lbh;
1785 int err = 0, remain = lbh->b_size;
1786 sector_t next = lbh->b_blocknr; 1799 sector_t next = lbh->b_blocknr;
1787 struct buffer_head new; 1800 struct buffer_head new;
1788 1801
@@ -1792,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1792 if (buffer_mapped(lbh) && !buffer_delay(lbh)) 1805 if (buffer_mapped(lbh) && !buffer_delay(lbh))
1793 return; 1806 return;
1794 1807
1795 while (remain) { 1808 new.b_state = lbh->b_state;
1796 new.b_state = lbh->b_state; 1809 new.b_blocknr = 0;
1797 new.b_blocknr = 0; 1810 new.b_size = lbh->b_size;
1798 new.b_size = remain;
1799 err = mpd->get_block(mpd->inode, next, &new, 1);
1800 if (err) {
1801 /*
1802 * Rather than implement own error handling
1803 * here, we just leave remaining blocks
1804 * unallocated and try again with ->writepage()
1805 */
1806 break;
1807 }
1808 BUG_ON(new.b_size == 0);
1809 1811
1810 if (buffer_new(&new)) 1812 /*
1811 __unmap_underlying_blocks(mpd->inode, &new); 1813 * If we didn't accumulate anything
1814 * to write simply return
1815 */
1816 if (!new.b_size)
1817 return;
1818 err = mpd->get_block(mpd->inode, next, &new, 1);
1819 if (err)
1820 return;
1821 BUG_ON(new.b_size == 0);
1812 1822
1813 /* 1823 if (buffer_new(&new))
1814 * If blocks are delayed marked, we need to 1824 __unmap_underlying_blocks(mpd->inode, &new);
1815 * put actual blocknr and drop delayed bit
1816 */
1817 if (buffer_delay(lbh))
1818 mpage_put_bnr_to_bhs(mpd, next, &new);
1819 1825
1820 /* go for the remaining blocks */ 1826 /*
1821 next += new.b_size >> mpd->inode->i_blkbits; 1827 * If blocks are delayed marked, we need to
1822 remain -= new.b_size; 1828 * put actual blocknr and drop delayed bit
1823 } 1829 */
1830 if (buffer_delay(lbh) || buffer_unwritten(lbh))
1831 mpage_put_bnr_to_bhs(mpd, next, &new);
1832
1833 return;
1824} 1834}
1825 1835
1826#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) 1836#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
1837 (1 << BH_Delay) | (1 << BH_Unwritten))
1827 1838
1828/* 1839/*
1829 * mpage_add_bh_to_extent - try to add one more block to extent of blocks 1840 * mpage_add_bh_to_extent - try to add one more block to extent of blocks
@@ -1837,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1837static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 1848static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1838 sector_t logical, struct buffer_head *bh) 1849 sector_t logical, struct buffer_head *bh)
1839{ 1850{
1840 struct buffer_head *lbh = &mpd->lbh;
1841 sector_t next; 1851 sector_t next;
1852 size_t b_size = bh->b_size;
1853 struct buffer_head *lbh = &mpd->lbh;
1854 int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
1842 1855
1843 next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); 1856 /* check if thereserved journal credits might overflow */
1844 1857 if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
1858 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1859 /*
1860 * With non-extent format we are limited by the journal
1861 * credit available. Total credit needed to insert
1862 * nrblocks contiguous blocks is dependent on the
1863 * nrblocks. So limit nrblocks.
1864 */
1865 goto flush_it;
1866 } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
1867 EXT4_MAX_TRANS_DATA) {
1868 /*
1869 * Adding the new buffer_head would make it cross the
1870 * allowed limit for which we have journal credit
1871 * reserved. So limit the new bh->b_size
1872 */
1873 b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
1874 mpd->inode->i_blkbits;
1875 /* we will do mpage_da_submit_io in the next loop */
1876 }
1877 }
1845 /* 1878 /*
1846 * First block in the extent 1879 * First block in the extent
1847 */ 1880 */
1848 if (lbh->b_size == 0) { 1881 if (lbh->b_size == 0) {
1849 lbh->b_blocknr = logical; 1882 lbh->b_blocknr = logical;
1850 lbh->b_size = bh->b_size; 1883 lbh->b_size = b_size;
1851 lbh->b_state = bh->b_state & BH_FLAGS; 1884 lbh->b_state = bh->b_state & BH_FLAGS;
1852 return; 1885 return;
1853 } 1886 }
1854 1887
1888 next = lbh->b_blocknr + nrblocks;
1855 /* 1889 /*
1856 * Can we merge the block to our big extent? 1890 * Can we merge the block to our big extent?
1857 */ 1891 */
1858 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { 1892 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
1859 lbh->b_size += bh->b_size; 1893 lbh->b_size += b_size;
1860 return; 1894 return;
1861 } 1895 }
1862 1896
1897flush_it:
1863 /* 1898 /*
1864 * We couldn't merge the block to our extent, so we 1899 * We couldn't merge the block to our extent, so we
1865 * need to flush current extent and start new one 1900 * need to flush current extent and start new one
1866 */ 1901 */
1867 mpage_da_map_blocks(mpd); 1902 mpage_da_map_blocks(mpd);
1868 1903 mpage_da_submit_io(mpd);
1869 /* 1904 mpd->io_done = 1;
1870 * Now start a new extent 1905 return;
1871 */
1872 lbh->b_size = bh->b_size;
1873 lbh->b_state = bh->b_state & BH_FLAGS;
1874 lbh->b_blocknr = logical;
1875} 1906}
1876 1907
1877/* 1908/*
@@ -1891,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page,
1891 struct buffer_head *bh, *head, fake; 1922 struct buffer_head *bh, *head, fake;
1892 sector_t logical; 1923 sector_t logical;
1893 1924
1925 if (mpd->io_done) {
1926 /*
1927 * Rest of the page in the page_vec
1928 * redirty then and skip then. We will
1929 * try to to write them again after
1930 * starting a new transaction
1931 */
1932 redirty_page_for_writepage(wbc, page);
1933 unlock_page(page);
1934 return MPAGE_DA_EXTENT_TAIL;
1935 }
1894 /* 1936 /*
1895 * Can we merge this page to current extent? 1937 * Can we merge this page to current extent?
1896 */ 1938 */
1897 if (mpd->next_page != page->index) { 1939 if (mpd->next_page != page->index) {
1898 /* 1940 /*
1899 * Nope, we can't. So, we map non-allocated blocks 1941 * Nope, we can't. So, we map non-allocated blocks
1900 * and start IO on them using __mpage_writepage() 1942 * and start IO on them using writepage()
1901 */ 1943 */
1902 if (mpd->next_page != mpd->first_page) { 1944 if (mpd->next_page != mpd->first_page) {
1903 mpage_da_map_blocks(mpd); 1945 mpage_da_map_blocks(mpd);
1904 mpage_da_submit_io(mpd); 1946 mpage_da_submit_io(mpd);
1947 /*
1948 * skip rest of the page in the page_vec
1949 */
1950 mpd->io_done = 1;
1951 redirty_page_for_writepage(wbc, page);
1952 unlock_page(page);
1953 return MPAGE_DA_EXTENT_TAIL;
1905 } 1954 }
1906 1955
1907 /* 1956 /*
@@ -1932,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page,
1932 set_buffer_dirty(bh); 1981 set_buffer_dirty(bh);
1933 set_buffer_uptodate(bh); 1982 set_buffer_uptodate(bh);
1934 mpage_add_bh_to_extent(mpd, logical, bh); 1983 mpage_add_bh_to_extent(mpd, logical, bh);
1984 if (mpd->io_done)
1985 return MPAGE_DA_EXTENT_TAIL;
1935 } else { 1986 } else {
1936 /* 1987 /*
1937 * Page with regular buffer heads, just add all dirty ones 1988 * Page with regular buffer heads, just add all dirty ones
@@ -1940,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page,
1940 bh = head; 1991 bh = head;
1941 do { 1992 do {
1942 BUG_ON(buffer_locked(bh)); 1993 BUG_ON(buffer_locked(bh));
1943 if (buffer_dirty(bh)) 1994 if (buffer_dirty(bh) &&
1995 (!buffer_mapped(bh) || buffer_delay(bh))) {
1944 mpage_add_bh_to_extent(mpd, logical, bh); 1996 mpage_add_bh_to_extent(mpd, logical, bh);
1997 if (mpd->io_done)
1998 return MPAGE_DA_EXTENT_TAIL;
1999 }
1945 logical++; 2000 logical++;
1946 } while ((bh = bh->b_this_page) != head); 2001 } while ((bh = bh->b_this_page) != head);
1947 } 2002 }
@@ -1960,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page,
1960 * 2015 *
1961 * This is a library function, which implements the writepages() 2016 * This is a library function, which implements the writepages()
1962 * address_space_operation. 2017 * address_space_operation.
1963 *
1964 * In order to avoid duplication of logic that deals with partial pages,
1965 * multiple bio per page, etc, we find non-allocated blocks, allocate
1966 * them with minimal calls to ->get_block() and re-use __mpage_writepage()
1967 *
1968 * It's important that we call __mpage_writepage() only once for each
1969 * involved page, otherwise we'd have to implement more complicated logic
1970 * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
1971 *
1972 * See comments to mpage_writepages()
1973 */ 2018 */
1974static int mpage_da_writepages(struct address_space *mapping, 2019static int mpage_da_writepages(struct address_space *mapping,
1975 struct writeback_control *wbc, 2020 struct writeback_control *wbc,
1976 get_block_t get_block) 2021 get_block_t get_block)
1977{ 2022{
1978 struct mpage_da_data mpd; 2023 struct mpage_da_data mpd;
2024 long to_write;
1979 int ret; 2025 int ret;
1980 2026
1981 if (!get_block) 2027 if (!get_block)
@@ -1989,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping,
1989 mpd.first_page = 0; 2035 mpd.first_page = 0;
1990 mpd.next_page = 0; 2036 mpd.next_page = 0;
1991 mpd.get_block = get_block; 2037 mpd.get_block = get_block;
2038 mpd.io_done = 0;
2039 mpd.pages_written = 0;
2040
2041 to_write = wbc->nr_to_write;
1992 2042
1993 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); 2043 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
1994 2044
1995 /* 2045 /*
1996 * Handle last extent of pages 2046 * Handle last extent of pages
1997 */ 2047 */
1998 if (mpd.next_page != mpd.first_page) { 2048 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
1999 mpage_da_map_blocks(&mpd); 2049 mpage_da_map_blocks(&mpd);
2000 mpage_da_submit_io(&mpd); 2050 mpage_da_submit_io(&mpd);
2001 } 2051 }
2002 2052
2053 wbc->nr_to_write = to_write - mpd.pages_written;
2003 return ret; 2054 return ret;
2004} 2055}
2005 2056
@@ -2204,63 +2255,95 @@ static int ext4_da_writepage(struct page *page,
2204} 2255}
2205 2256
2206/* 2257/*
2207 * For now just follow the DIO way to estimate the max credits 2258 * This is called via ext4_da_writepages() to
2208 * needed to write out EXT4_MAX_WRITEBACK_PAGES. 2259 * calulate the total number of credits to reserve to fit
2209 * todo: need to calculate the max credits need for 2260 * a single extent allocation into a single transaction,
2210 * extent based files, currently the DIO credits is based on 2261 * ext4_da_writpeages() will loop calling this before
2211 * indirect-blocks mapping way. 2262 * the block allocation.
2212 *
2213 * Probably should have a generic way to calculate credits
2214 * for DIO, writepages, and truncate
2215 */ 2263 */
2216#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS 2264
2217#define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS 2265static int ext4_da_writepages_trans_blocks(struct inode *inode)
2266{
2267 int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
2268
2269 /*
2270 * With non-extent format the journal credit needed to
2271 * insert nrblocks contiguous block is dependent on
2272 * number of contiguous block. So we will limit
2273 * number of contiguous block to a sane value
2274 */
2275 if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
2276 (max_blocks > EXT4_MAX_TRANS_DATA))
2277 max_blocks = EXT4_MAX_TRANS_DATA;
2278
2279 return ext4_chunk_trans_blocks(inode, max_blocks);
2280}
2218 2281
2219static int ext4_da_writepages(struct address_space *mapping, 2282static int ext4_da_writepages(struct address_space *mapping,
2220 struct writeback_control *wbc) 2283 struct writeback_control *wbc)
2221{ 2284{
2222 struct inode *inode = mapping->host;
2223 handle_t *handle = NULL; 2285 handle_t *handle = NULL;
2224 int needed_blocks;
2225 int ret = 0;
2226 long to_write;
2227 loff_t range_start = 0; 2286 loff_t range_start = 0;
2287 struct inode *inode = mapping->host;
2288 int needed_blocks, ret = 0, nr_to_writebump = 0;
2289 long to_write, pages_skipped = 0;
2290 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2228 2291
2229 /* 2292 /*
2230 * No pages to write? This is mainly a kludge to avoid starting 2293 * No pages to write? This is mainly a kludge to avoid starting
2231 * a transaction for special inodes like journal inode on last iput() 2294 * a transaction for special inodes like journal inode on last iput()
2232 * because that could violate lock ordering on umount 2295 * because that could violate lock ordering on umount
2233 */ 2296 */
2234 if (!mapping->nrpages) 2297 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2235 return 0; 2298 return 0;
2236
2237 /* 2299 /*
2238 * Estimate the worse case needed credits to write out 2300 * Make sure nr_to_write is >= sbi->s_mb_stream_request
2239 * EXT4_MAX_BUF_BLOCKS pages 2301 * This make sure small files blocks are allocated in
2302 * single attempt. This ensure that small files
2303 * get less fragmented.
2240 */ 2304 */
2241 needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; 2305 if (wbc->nr_to_write < sbi->s_mb_stream_request) {
2306 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
2307 wbc->nr_to_write = sbi->s_mb_stream_request;
2308 }
2242 2309
2243 to_write = wbc->nr_to_write; 2310 if (!wbc->range_cyclic)
2244 if (!wbc->range_cyclic) {
2245 /* 2311 /*
2246 * If range_cyclic is not set force range_cont 2312 * If range_cyclic is not set force range_cont
2247 * and save the old writeback_index 2313 * and save the old writeback_index
2248 */ 2314 */
2249 wbc->range_cont = 1; 2315 wbc->range_cont = 1;
2250 range_start = wbc->range_start;
2251 }
2252 2316
2253 while (!ret && to_write) { 2317 range_start = wbc->range_start;
2318 pages_skipped = wbc->pages_skipped;
2319
2320restart_loop:
2321 to_write = wbc->nr_to_write;
2322 while (!ret && to_write > 0) {
2323
2324 /*
2325 * we insert one extent at a time. So we need
2326 * credit needed for single extent allocation.
2327 * journalled mode is currently not supported
2328 * by delalloc
2329 */
2330 BUG_ON(ext4_should_journal_data(inode));
2331 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2332
2254 /* start a new transaction*/ 2333 /* start a new transaction*/
2255 handle = ext4_journal_start(inode, needed_blocks); 2334 handle = ext4_journal_start(inode, needed_blocks);
2256 if (IS_ERR(handle)) { 2335 if (IS_ERR(handle)) {
2257 ret = PTR_ERR(handle); 2336 ret = PTR_ERR(handle);
2337 printk(KERN_EMERG "%s: jbd2_start: "
2338 "%ld pages, ino %lu; err %d\n", __func__,
2339 wbc->nr_to_write, inode->i_ino, ret);
2340 dump_stack();
2258 goto out_writepages; 2341 goto out_writepages;
2259 } 2342 }
2260 if (ext4_should_order_data(inode)) { 2343 if (ext4_should_order_data(inode)) {
2261 /* 2344 /*
2262 * With ordered mode we need to add 2345 * With ordered mode we need to add
2263 * the inode to the journal handle 2346 * the inode to the journal handl
2264 * when we do block allocation. 2347 * when we do block allocation.
2265 */ 2348 */
2266 ret = ext4_jbd2_file_inode(handle, inode); 2349 ret = ext4_jbd2_file_inode(handle, inode);
@@ -2268,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping,
2268 ext4_journal_stop(handle); 2351 ext4_journal_stop(handle);
2269 goto out_writepages; 2352 goto out_writepages;
2270 } 2353 }
2271
2272 } 2354 }
2273 /*
2274 * set the max dirty pages could be write at a time
2275 * to fit into the reserved transaction credits
2276 */
2277 if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
2278 wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
2279 2355
2280 to_write -= wbc->nr_to_write; 2356 to_write -= wbc->nr_to_write;
2281 ret = mpage_da_writepages(mapping, wbc, 2357 ret = mpage_da_writepages(mapping, wbc,
2282 ext4_da_get_block_write); 2358 ext4_da_get_block_write);
2283 ext4_journal_stop(handle); 2359 ext4_journal_stop(handle);
2284 if (wbc->nr_to_write) { 2360 if (ret == MPAGE_DA_EXTENT_TAIL) {
2361 /*
2362 * got one extent now try with
2363 * rest of the pages
2364 */
2365 to_write += wbc->nr_to_write;
2366 ret = 0;
2367 } else if (wbc->nr_to_write) {
2285 /* 2368 /*
2286 * There is no more writeout needed 2369 * There is no more writeout needed
2287 * or we requested for a noblocking writeout 2370 * or we requested for a noblocking writeout
@@ -2293,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping,
2293 wbc->nr_to_write = to_write; 2376 wbc->nr_to_write = to_write;
2294 } 2377 }
2295 2378
2296out_writepages: 2379 if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
2297 wbc->nr_to_write = to_write; 2380 /* We skipped pages in this loop */
2298 if (range_start)
2299 wbc->range_start = range_start; 2381 wbc->range_start = range_start;
2382 wbc->nr_to_write = to_write +
2383 wbc->pages_skipped - pages_skipped;
2384 wbc->pages_skipped = pages_skipped;
2385 goto restart_loop;
2386 }
2387
2388out_writepages:
2389 wbc->nr_to_write = to_write - nr_to_writebump;
2390 wbc->range_start = range_start;
2300 return ret; 2391 return ret;
2301} 2392}
2302 2393
@@ -3486,6 +3577,9 @@ void ext4_truncate(struct inode *inode)
3486 * modify the block allocation tree. 3577 * modify the block allocation tree.
3487 */ 3578 */
3488 down_write(&ei->i_data_sem); 3579 down_write(&ei->i_data_sem);
3580
3581 ext4_discard_reservation(inode);
3582
3489 /* 3583 /*
3490 * The orphan list entry will now protect us from any crash which 3584 * The orphan list entry will now protect us from any crash which
3491 * occurs before the truncate completes, so it is now safe to propagate 3585 * occurs before the truncate completes, so it is now safe to propagate
@@ -3555,8 +3649,6 @@ do_indirects:
3555 ; 3649 ;
3556 } 3650 }
3557 3651
3558 ext4_discard_reservation(inode);
3559
3560 up_write(&ei->i_data_sem); 3652 up_write(&ei->i_data_sem);
3561 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3653 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3562 ext4_mark_inode_dirty(handle, inode); 3654 ext4_mark_inode_dirty(handle, inode);
@@ -4324,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
4324 return 0; 4416 return 0;
4325} 4417}
4326 4418
4419static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
4420 int chunk)
4421{
4422 int indirects;
4423
4424 /* if nrblocks are contiguous */
4425 if (chunk) {
4426 /*
4427 * With N contiguous data blocks, it need at most
4428 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
4429 * 2 dindirect blocks
4430 * 1 tindirect block
4431 */
4432 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
4433 return indirects + 3;
4434 }
4435 /*
4436 * if nrblocks are not contiguous, worse case, each block touch
4437 * a indirect block, and each indirect block touch a double indirect
4438 * block, plus a triple indirect block
4439 */
4440 indirects = nrblocks * 2 + 1;
4441 return indirects;
4442}
4443
4444static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4445{
4446 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
4447 return ext4_indirect_trans_blocks(inode, nrblocks, 0);
4448 return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
4449}
4327/* 4450/*
4328 * How many blocks doth make a writepage()? 4451 * Account for index blocks, block groups bitmaps and block group
4329 * 4452 * descriptor blocks if modify datablocks and index blocks
4330 * With N blocks per page, it may be: 4453 * worse case, the indexs blocks spread over different block groups
4331 * N data blocks
4332 * 2 indirect block
4333 * 2 dindirect
4334 * 1 tindirect
4335 * N+5 bitmap blocks (from the above)
4336 * N+5 group descriptor summary blocks
4337 * 1 inode block
4338 * 1 superblock.
4339 * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
4340 * 4454 *
4341 * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS 4455 * If datablocks are discontiguous, they are possible to spread over
4456 * different block groups too. If they are contiugous, with flexbg,
4457 * they could still across block group boundary.
4342 * 4458 *
4343 * With ordered or writeback data it's the same, less the N data blocks. 4459 * Also account for superblock, inode, quota and xattr blocks
4460 */
4461int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4462{
4463 int groups, gdpblocks;
4464 int idxblocks;
4465 int ret = 0;
4466
4467 /*
4468 * How many index blocks need to touch to modify nrblocks?
4469 * The "Chunk" flag indicating whether the nrblocks is
4470 * physically contiguous on disk
4471 *
4472 * For Direct IO and fallocate, they calls get_block to allocate
4473 * one single extent at a time, so they could set the "Chunk" flag
4474 */
4475 idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
4476
4477 ret = idxblocks;
4478
4479 /*
4480 * Now let's see how many group bitmaps and group descriptors need
4481 * to account
4482 */
4483 groups = idxblocks;
4484 if (chunk)
4485 groups += 1;
4486 else
4487 groups += nrblocks;
4488
4489 gdpblocks = groups;
4490 if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
4491 groups = EXT4_SB(inode->i_sb)->s_groups_count;
4492 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4493 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4494
4495 /* bitmaps and block group descriptor blocks */
4496 ret += groups + gdpblocks;
4497
4498 /* Blocks for super block, inode, quota and xattr blocks */
4499 ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
4500
4501 return ret;
4502}
4503
4504/*
4505 * Calulate the total number of credits to reserve to fit
4506 * the modification of a single pages into a single transaction,
4507 * which may include multiple chunks of block allocations.
4344 * 4508 *
4345 * If the inode's direct blocks can hold an integral number of pages then a 4509 * This could be called via ext4_write_begin()
4346 * page cannot straddle two indirect blocks, and we can only touch one indirect
4347 * and dindirect block, and the "5" above becomes "3".
4348 * 4510 *
4349 * This still overestimates under most circumstances. If we were to pass the 4511 * We need to consider the worse case, when
4350 * start and end offsets in here as well we could do block_to_path() on each 4512 * one new block per extent.
4351 * block and work out the exact number of indirects which are touched. Pah.
4352 */ 4513 */
4353
4354int ext4_writepage_trans_blocks(struct inode *inode) 4514int ext4_writepage_trans_blocks(struct inode *inode)
4355{ 4515{
4356 int bpp = ext4_journal_blocks_per_page(inode); 4516 int bpp = ext4_journal_blocks_per_page(inode);
4357 int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
4358 int ret; 4517 int ret;
4359 4518
4360 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 4519 ret = ext4_meta_trans_blocks(inode, bpp, 0);
4361 return ext4_ext_writepage_trans_blocks(inode, bpp);
4362 4520
4521 /* Account for data blocks for journalled mode */
4363 if (ext4_should_journal_data(inode)) 4522 if (ext4_should_journal_data(inode))
4364 ret = 3 * (bpp + indirects) + 2; 4523 ret += bpp;
4365 else
4366 ret = 2 * (bpp + indirects) + 2;
4367
4368#ifdef CONFIG_QUOTA
4369 /* We know that structure was already allocated during DQUOT_INIT so
4370 * we will be updating only the data blocks + inodes */
4371 ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
4372#endif
4373
4374 return ret; 4524 return ret;
4375} 4525}
4376 4526
4377/* 4527/*
4528 * Calculate the journal credits for a chunk of data modification.
4529 *
4530 * This is called from DIO, fallocate or whoever calling
4531 * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
4532 *
4533 * journal buffers for data blocks are not included here, as DIO
4534 * and fallocate do no need to journal data buffers.
4535 */
4536int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
4537{
4538 return ext4_meta_trans_blocks(inode, nrblocks, 1);
4539}
4540
4541/*
4378 * The caller must have previously called ext4_reserve_inode_write(). 4542 * The caller must have previously called ext4_reserve_inode_write().
4379 * Give this, we know that the caller already has write access to iloc->bh. 4543 * Give this, we know that the caller already has write access to iloc->bh.
4380 */ 4544 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 865e9ddb44d4..e0e3a5eb1ddb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3282} 3282}
3283 3283
3284/* 3284/*
3285 * Return the prealloc space that have minimal distance
3286 * from the goal block. @cpa is the prealloc
3287 * space that is having currently known minimal distance
3288 * from the goal block.
3289 */
3290static struct ext4_prealloc_space *
3291ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3292 struct ext4_prealloc_space *pa,
3293 struct ext4_prealloc_space *cpa)
3294{
3295 ext4_fsblk_t cur_distance, new_distance;
3296
3297 if (cpa == NULL) {
3298 atomic_inc(&pa->pa_count);
3299 return pa;
3300 }
3301 cur_distance = abs(goal_block - cpa->pa_pstart);
3302 new_distance = abs(goal_block - pa->pa_pstart);
3303
3304 if (cur_distance < new_distance)
3305 return cpa;
3306
3307 /* drop the previous reference */
3308 atomic_dec(&cpa->pa_count);
3309 atomic_inc(&pa->pa_count);
3310 return pa;
3311}
3312
3313/*
3285 * search goal blocks in preallocated space 3314 * search goal blocks in preallocated space
3286 */ 3315 */
3287static noinline_for_stack int 3316static noinline_for_stack int
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3290 int order, i; 3319 int order, i;
3291 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3320 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3292 struct ext4_locality_group *lg; 3321 struct ext4_locality_group *lg;
3293 struct ext4_prealloc_space *pa; 3322 struct ext4_prealloc_space *pa, *cpa = NULL;
3323 ext4_fsblk_t goal_block;
3294 3324
3295 /* only data can be preallocated */ 3325 /* only data can be preallocated */
3296 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3326 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3333 /* The max size of hash table is PREALLOC_TB_SIZE */ 3363 /* The max size of hash table is PREALLOC_TB_SIZE */
3334 order = PREALLOC_TB_SIZE - 1; 3364 order = PREALLOC_TB_SIZE - 1;
3335 3365
3366 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
3367 ac->ac_g_ex.fe_start +
3368 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3369 /*
3370 * search for the prealloc space that is having
3371 * minimal distance from the goal block.
3372 */
3336 for (i = order; i < PREALLOC_TB_SIZE; i++) { 3373 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3337 rcu_read_lock(); 3374 rcu_read_lock();
3338 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], 3375 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3340 spin_lock(&pa->pa_lock); 3377 spin_lock(&pa->pa_lock);
3341 if (pa->pa_deleted == 0 && 3378 if (pa->pa_deleted == 0 &&
3342 pa->pa_free >= ac->ac_o_ex.fe_len) { 3379 pa->pa_free >= ac->ac_o_ex.fe_len) {
3343 atomic_inc(&pa->pa_count); 3380
3344 ext4_mb_use_group_pa(ac, pa); 3381 cpa = ext4_mb_check_group_pa(goal_block,
3345 spin_unlock(&pa->pa_lock); 3382 pa, cpa);
3346 ac->ac_criteria = 20;
3347 rcu_read_unlock();
3348 return 1;
3349 } 3383 }
3350 spin_unlock(&pa->pa_lock); 3384 spin_unlock(&pa->pa_lock);
3351 } 3385 }
3352 rcu_read_unlock(); 3386 rcu_read_unlock();
3353 } 3387 }
3388 if (cpa) {
3389 ext4_mb_use_group_pa(ac, cpa);
3390 ac->ac_criteria = 20;
3391 return 1;
3392 }
3354 return 0; 3393 return 0;
3355} 3394}
3356 3395
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b9e077ba07e9..46fc0b5b12ba 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
53 * credit. But below we try to not accumalate too much 53 * credit. But below we try to not accumalate too much
54 * of them by restarting the journal. 54 * of them by restarting the journal.
55 */ 55 */
56 needed = ext4_ext_calc_credits_for_insert(inode, path); 56 needed = ext4_ext_calc_credits_for_single_extent(inode,
57 lb->last_block - lb->first_block + 1, path);
57 58
58 /* 59 /*
59 * Make sure the credit we accumalated is not really high 60 * Make sure the credit we accumalated is not really high
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 0a9265164265..b3d35604ea18 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
773 773
774 if (reserved_gdb || gdb_off == 0) { 774 if (reserved_gdb || gdb_off == 0) {
775 if (!EXT4_HAS_COMPAT_FEATURE(sb, 775 if (!EXT4_HAS_COMPAT_FEATURE(sb,
776 EXT4_FEATURE_COMPAT_RESIZE_INODE)){ 776 EXT4_FEATURE_COMPAT_RESIZE_INODE)
777 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
777 ext4_warning(sb, __func__, 778 ext4_warning(sb, __func__,
778 "No reserved GDT blocks, can't resize"); 779 "No reserved GDT blocks, can't resize");
779 return -EPERM; 780 return -EPERM;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d5d77958b861..566344b926b7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
568#endif 568#endif
569 ei->i_block_alloc_info = NULL; 569 ei->i_block_alloc_info = NULL;
570 ei->vfs_inode.i_version = 1; 570 ei->vfs_inode.i_version = 1;
571 ei->vfs_inode.i_data.writeback_index = 0;
571 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 572 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
572 INIT_LIST_HEAD(&ei->i_prealloc_list); 573 INIT_LIST_HEAD(&ei->i_prealloc_list);
573 spin_lock_init(&ei->i_prealloc_lock); 574 spin_lock_init(&ei->i_prealloc_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6d266d793e2c..80ff3381fa21 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait)
562 struct buffer_head *bh; 562 struct buffer_head *bh;
563 struct msdos_dir_entry *raw_entry; 563 struct msdos_dir_entry *raw_entry;
564 loff_t i_pos; 564 loff_t i_pos;
565 int err = 0; 565 int err;
566 566
567retry: 567retry:
568 i_pos = MSDOS_I(inode)->i_pos; 568 i_pos = MSDOS_I(inode)->i_pos;
569 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) 569 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
570 return 0; 570 return 0;
571 571
572 lock_super(sb);
573 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 572 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
574 if (!bh) { 573 if (!bh) {
575 printk(KERN_ERR "FAT: unable to read inode block " 574 printk(KERN_ERR "FAT: unable to read inode block "
576 "for updating (i_pos %lld)\n", i_pos); 575 "for updating (i_pos %lld)\n", i_pos);
577 err = -EIO; 576 return -EIO;
578 goto out;
579 } 577 }
580 spin_lock(&sbi->inode_hash_lock); 578 spin_lock(&sbi->inode_hash_lock);
581 if (i_pos != MSDOS_I(inode)->i_pos) { 579 if (i_pos != MSDOS_I(inode)->i_pos) {
582 spin_unlock(&sbi->inode_hash_lock); 580 spin_unlock(&sbi->inode_hash_lock);
583 brelse(bh); 581 brelse(bh);
584 unlock_super(sb);
585 goto retry; 582 goto retry;
586 } 583 }
587 584
@@ -607,11 +604,10 @@ retry:
607 } 604 }
608 spin_unlock(&sbi->inode_hash_lock); 605 spin_unlock(&sbi->inode_hash_lock);
609 mark_buffer_dirty(bh); 606 mark_buffer_dirty(bh);
607 err = 0;
610 if (wait) 608 if (wait)
611 err = sync_dirty_buffer(bh); 609 err = sync_dirty_buffer(bh);
612 brelse(bh); 610 brelse(bh);
613out:
614 unlock_super(sb);
615 return err; 611 return err;
616} 612}
617 613
diff --git a/fs/inode.c b/fs/inode.c
index b6726f644530..0487ddba1397 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -166,6 +166,7 @@ static struct inode *alloc_inode(struct super_block *sb)
166 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); 166 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
167 mapping->assoc_mapping = NULL; 167 mapping->assoc_mapping = NULL;
168 mapping->backing_dev_info = &default_backing_dev_info; 168 mapping->backing_dev_info = &default_backing_dev_info;
169 mapping->writeback_index = 0;
169 170
170 /* 171 /*
171 * If the block_device provides a backing_dev_info for client 172 * If the block_device provides a backing_dev_info for client
diff --git a/fs/ioprio.c b/fs/ioprio.c
index c4a1c3c65aac..da3cc460d4df 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
115 pgrp = task_pgrp(current); 115 pgrp = task_pgrp(current);
116 else 116 else
117 pgrp = find_vpid(who); 117 pgrp = find_vpid(who);
118 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 118 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
119 ret = set_task_ioprio(p, ioprio); 119 ret = set_task_ioprio(p, ioprio);
120 if (ret) 120 if (ret)
121 break; 121 break;
122 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 122 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
123 break; 123 break;
124 case IOPRIO_WHO_USER: 124 case IOPRIO_WHO_USER:
125 if (!who) 125 if (!who)
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
204 pgrp = task_pgrp(current); 204 pgrp = task_pgrp(current);
205 else 205 else
206 pgrp = find_vpid(who); 206 pgrp = find_vpid(who);
207 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 207 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
208 tmpio = get_task_ioprio(p); 208 tmpio = get_task_ioprio(p);
209 if (tmpio < 0) 209 if (tmpio < 0)
210 continue; 210 continue;
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
212 ret = tmpio; 212 ret = tmpio;
213 else 213 else
214 ret = ioprio_best(ret, tmpio); 214 ret = ioprio_best(ret, tmpio);
215 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 215 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
216 break; 216 break;
217 case IOPRIO_WHO_USER: 217 case IOPRIO_WHO_USER:
218 if (!who) 218 if (!who)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 8dee32007500..0540ca27a446 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
291 goto out; 291 goto out;
292 } 292 }
293 293
294 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); 294 lock_map_acquire(&handle->h_lockdep_map);
295 295
296out: 296out:
297 return handle; 297 return handle;
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle)
1448 spin_unlock(&journal->j_state_lock); 1448 spin_unlock(&journal->j_state_lock);
1449 } 1449 }
1450 1450
1451 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); 1451 lock_map_release(&handle->h_lockdep_map);
1452 1452
1453 jbd_free_handle(handle); 1453 jbd_free_handle(handle);
1454 return err; 1454 return err;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f7cadbb19fa..e5d540588fa9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
301 goto out; 301 goto out;
302 } 302 }
303 303
304 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); 304 lock_map_acquire(&handle->h_lockdep_map);
305out: 305out:
306 return handle; 306 return handle;
307} 307}
@@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle)
1279 spin_unlock(&journal->j_state_lock); 1279 spin_unlock(&journal->j_state_lock);
1280 } 1280 }
1281 1281
1282 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); 1282 lock_map_release(&handle->h_lockdep_map);
1283 1283
1284 jbd2_free_handle(handle); 1284 jbd2_free_handle(handle);
1285 return err; 1285 return err;
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 31559f45fdde..4c41db91eaa4 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -12,7 +12,6 @@
12#ifndef _JFFS2_FS_I 12#ifndef _JFFS2_FS_I
13#define _JFFS2_FS_I 13#define _JFFS2_FS_I
14 14
15#include <linux/version.h>
16#include <linux/rbtree.h> 15#include <linux/rbtree.h>
17#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
18#include <linux/mutex.h> 17#include <linux/mutex.h>
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 399444639337..4a714f64515b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
83{ 83{
84 struct nlm_host *host; 84 struct nlm_host *host;
85 struct nlm_file *file; 85 struct nlm_file *file;
86 int rc = rpc_success; 86 __be32 rc = rpc_success;
87 87
88 dprintk("lockd: TEST4 called\n"); 88 dprintk("lockd: TEST4 called\n");
89 resp->cookie = argp->cookie; 89 resp->cookie = argp->cookie;
@@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
116{ 116{
117 struct nlm_host *host; 117 struct nlm_host *host;
118 struct nlm_file *file; 118 struct nlm_file *file;
119 int rc = rpc_success; 119 __be32 rc = rpc_success;
120 120
121 dprintk("lockd: LOCK called\n"); 121 dprintk("lockd: LOCK called\n");
122 122
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 76019d2ff72d..76262c1986f2 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
112{ 112{
113 struct nlm_host *host; 113 struct nlm_host *host;
114 struct nlm_file *file; 114 struct nlm_file *file;
115 int rc = rpc_success; 115 __be32 rc = rpc_success;
116 116
117 dprintk("lockd: TEST called\n"); 117 dprintk("lockd: TEST called\n");
118 resp->cookie = argp->cookie; 118 resp->cookie = argp->cookie;
@@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
146{ 146{
147 struct nlm_host *host; 147 struct nlm_host *host;
148 struct nlm_file *file; 148 struct nlm_file *file;
149 int rc = rpc_success; 149 __be32 rc = rpc_success;
150 150
151 dprintk("lockd: LOCK called\n"); 151 dprintk("lockd: LOCK called\n");
152 152
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 33bfcf09db46..9dc036f18356 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp)
1023 /* Look up the dentry */ 1023 /* Look up the dentry */
1024 err = path_lookup(nxp->ex_path, 0, &nd); 1024 err = path_lookup(nxp->ex_path, 0, &nd);
1025 if (err) 1025 if (err)
1026 goto out_unlock; 1026 goto out_put_clp;
1027 err = -EINVAL; 1027 err = -EINVAL;
1028 1028
1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); 1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL);
@@ -1090,9 +1090,9 @@ finish:
1090 exp_put(exp); 1090 exp_put(exp);
1091 if (fsid_key && !IS_ERR(fsid_key)) 1091 if (fsid_key && !IS_ERR(fsid_key))
1092 cache_put(&fsid_key->h, &svc_expkey_cache); 1092 cache_put(&fsid_key->h, &svc_expkey_cache);
1093 if (clp)
1094 auth_domain_put(clp);
1095 path_put(&nd.path); 1093 path_put(&nd.path);
1094out_put_clp:
1095 auth_domain_put(clp);
1096out_unlock: 1096out_unlock:
1097 exp_writeunlock(); 1097 exp_writeunlock();
1098out: 1098out:
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index e1781c8b1650..9e8a95be7a1e 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
174 // TODO: Consider moving this lot to a separate function! (AIA) 174 // TODO: Consider moving this lot to a separate function! (AIA)
175handle_name: 175handle_name:
176 { 176 {
177 struct dentry *real_dent, *new_dent;
178 MFT_RECORD *m; 177 MFT_RECORD *m;
179 ntfs_attr_search_ctx *ctx; 178 ntfs_attr_search_ctx *ctx;
180 ntfs_inode *ni = NTFS_I(dent_inode); 179 ntfs_inode *ni = NTFS_I(dent_inode);
@@ -255,93 +254,9 @@ handle_name:
255 } 254 }
256 nls_name.hash = full_name_hash(nls_name.name, nls_name.len); 255 nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
257 256
258 /* 257 dent = d_add_ci(dent, dent_inode, &nls_name);
259 * Note: No need for dent->d_lock lock as i_mutex is held on the
260 * parent inode.
261 */
262
263 /* Does a dentry matching the nls_name exist already? */
264 real_dent = d_lookup(dent->d_parent, &nls_name);
265 /* If not, create it now. */
266 if (!real_dent) {
267 real_dent = d_alloc(dent->d_parent, &nls_name);
268 kfree(nls_name.name);
269 if (!real_dent) {
270 err = -ENOMEM;
271 goto err_out;
272 }
273 new_dent = d_splice_alias(dent_inode, real_dent);
274 if (new_dent)
275 dput(real_dent);
276 else
277 new_dent = real_dent;
278 ntfs_debug("Done. (Created new dentry.)");
279 return new_dent;
280 }
281 kfree(nls_name.name); 258 kfree(nls_name.name);
282 /* Matching dentry exists, check if it is negative. */ 259 return dent;
283 if (real_dent->d_inode) {
284 if (unlikely(real_dent->d_inode != dent_inode)) {
285 /* This can happen because bad inodes are unhashed. */
286 BUG_ON(!is_bad_inode(dent_inode));
287 BUG_ON(!is_bad_inode(real_dent->d_inode));
288 }
289 /*
290 * Already have the inode and the dentry attached, decrement
291 * the reference count to balance the ntfs_iget() we did
292 * earlier on. We found the dentry using d_lookup() so it
293 * cannot be disconnected and thus we do not need to worry
294 * about any NFS/disconnectedness issues here.
295 */
296 iput(dent_inode);
297 ntfs_debug("Done. (Already had inode and dentry.)");
298 return real_dent;
299 }
300 /*
301 * Negative dentry: instantiate it unless the inode is a directory and
302 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
303 * in which case d_move() that in place of the found dentry.
304 */
305 if (!S_ISDIR(dent_inode->i_mode)) {
306 /* Not a directory; everything is easy. */
307 d_instantiate(real_dent, dent_inode);
308 ntfs_debug("Done. (Already had negative file dentry.)");
309 return real_dent;
310 }
311 spin_lock(&dcache_lock);
312 if (list_empty(&dent_inode->i_dentry)) {
313 /*
314 * Directory without a 'disconnected' dentry; we need to do
315 * d_instantiate() by hand because it takes dcache_lock which
316 * we already hold.
317 */
318 list_add(&real_dent->d_alias, &dent_inode->i_dentry);
319 real_dent->d_inode = dent_inode;
320 spin_unlock(&dcache_lock);
321 security_d_instantiate(real_dent, dent_inode);
322 ntfs_debug("Done. (Already had negative directory dentry.)");
323 return real_dent;
324 }
325 /*
326 * Directory with a 'disconnected' dentry; get a reference to the
327 * 'disconnected' dentry.
328 */
329 new_dent = list_entry(dent_inode->i_dentry.next, struct dentry,
330 d_alias);
331 dget_locked(new_dent);
332 spin_unlock(&dcache_lock);
333 /* Do security vodoo. */
334 security_d_instantiate(real_dent, dent_inode);
335 /* Move new_dent in place of real_dent. */
336 d_move(new_dent, real_dent);
337 /* Balance the ntfs_iget() we did above. */
338 iput(dent_inode);
339 /* Throw away real_dent. */
340 dput(real_dent);
341 /* Use new_dent as the actual dentry. */
342 ntfs_debug("Done. (Already had negative, disconnected directory "
343 "dentry.)");
344 return new_dent;
345 260
346eio_err_out: 261eio_err_out:
347 ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); 262 ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index d8bfa0eb41b2..52276c02f710 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v)
138 " message id: %d\n" 138 " message id: %d\n"
139 " message type: %u\n" 139 " message type: %u\n"
140 " message key: 0x%08x\n" 140 " message key: 0x%08x\n"
141 " sock acquiry: %lu.%lu\n" 141 " sock acquiry: %lu.%ld\n"
142 " send start: %lu.%lu\n" 142 " send start: %lu.%ld\n"
143 " wait start: %lu.%lu\n", 143 " wait start: %lu.%ld\n",
144 nst, (unsigned long)nst->st_task->pid, 144 nst, (unsigned long)nst->st_task->pid,
145 (unsigned long)nst->st_task->tgid, 145 (unsigned long)nst->st_task->tgid,
146 nst->st_task->comm, nst->st_node, 146 nst->st_task->comm, nst->st_node,
147 nst->st_sc, nst->st_id, nst->st_msg_type, 147 nst->st_sc, nst->st_id, nst->st_msg_type,
148 nst->st_msg_key, 148 nst->st_msg_key,
149 nst->st_sock_time.tv_sec, 149 nst->st_sock_time.tv_sec,
150 (unsigned long)nst->st_sock_time.tv_usec, 150 (long)nst->st_sock_time.tv_usec,
151 nst->st_send_time.tv_sec, 151 nst->st_send_time.tv_sec,
152 (unsigned long)nst->st_send_time.tv_usec, 152 (long)nst->st_send_time.tv_usec,
153 nst->st_status_time.tv_sec, 153 nst->st_status_time.tv_sec,
154 nst->st_status_time.tv_usec); 154 (long)nst->st_status_time.tv_usec);
155 } 155 }
156 156
157 spin_unlock(&o2net_debug_lock); 157 spin_unlock(&o2net_debug_lock);
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
276 return sc; /* unused, just needs to be null when done */ 276 return sc; /* unused, just needs to be null when done */
277} 277}
278 278
279#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec 279#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
280 280
281static int sc_seq_show(struct seq_file *seq, void *v) 281static int sc_seq_show(struct seq_file *seq, void *v)
282{ 282{
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v)
309 " remote node: %s\n" 309 " remote node: %s\n"
310 " page off: %zu\n" 310 " page off: %zu\n"
311 " handshake ok: %u\n" 311 " handshake ok: %u\n"
312 " timer: %lu.%lu\n" 312 " timer: %lu.%ld\n"
313 " data ready: %lu.%lu\n" 313 " data ready: %lu.%ld\n"
314 " advance start: %lu.%lu\n" 314 " advance start: %lu.%ld\n"
315 " advance stop: %lu.%lu\n" 315 " advance stop: %lu.%ld\n"
316 " func start: %lu.%lu\n" 316 " func start: %lu.%ld\n"
317 " func stop: %lu.%lu\n" 317 " func stop: %lu.%ld\n"
318 " func key: %u\n" 318 " func key: %u\n"
319 " func type: %u\n", 319 " func type: %u\n",
320 sc, 320 sc,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a27d61581bd6..2bcf706d9dd3 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
144 144
145#ifdef CONFIG_DEBUG_FS 145#ifdef CONFIG_DEBUG_FS
146void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, 146static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
147 u32 msgkey, struct task_struct *task, u8 node) 147 u32 msgkey, struct task_struct *task, u8 node)
148{ 148{
149 INIT_LIST_HEAD(&nst->st_net_debug_item); 149 INIT_LIST_HEAD(&nst->st_net_debug_item);
150 nst->st_task = task; 150 nst->st_task = task;
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
153 nst->st_node = node; 153 nst->st_node = node;
154} 154}
155 155
156void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 156static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
157{ 157{
158 do_gettimeofday(&nst->st_sock_time); 158 do_gettimeofday(&nst->st_sock_time);
159} 159}
160 160
161void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 161static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
162{ 162{
163 do_gettimeofday(&nst->st_send_time); 163 do_gettimeofday(&nst->st_send_time);
164} 164}
165 165
166void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 166static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
167{ 167{
168 do_gettimeofday(&nst->st_status_time); 168 do_gettimeofday(&nst->st_status_time);
169} 169}
170 170
171void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 171static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
172 struct o2net_sock_container *sc) 172 struct o2net_sock_container *sc)
173{ 173{
174 nst->st_sc = sc; 174 nst->st_sc = sc;
175} 175}
176 176
177void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) 177static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
178{ 178{
179 nst->st_id = msg_id; 179 nst->st_id = msg_id;
180} 180}
181
182#else /* CONFIG_DEBUG_FS */
183
184static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
185 u32 msgkey, struct task_struct *task, u8 node)
186{
187}
188
189static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
190{
191}
192
193static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
194{
195}
196
197static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
198{
199}
200
201static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
202 struct o2net_sock_container *sc)
203{
204}
205
206static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
207 u32 msg_id)
208{
209}
210
181#endif /* CONFIG_DEBUG_FS */ 211#endif /* CONFIG_DEBUG_FS */
182 212
183static inline int o2net_reconnect_delay(void) 213static inline int o2net_reconnect_delay(void)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 18307ff81b77..8d58cfe410b1 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -224,42 +224,10 @@ struct o2net_send_tracking {
224 struct timeval st_send_time; 224 struct timeval st_send_time;
225 struct timeval st_status_time; 225 struct timeval st_status_time;
226}; 226};
227
228void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
229 u32 msgkey, struct task_struct *task, u8 node);
230void o2net_set_nst_sock_time(struct o2net_send_tracking *nst);
231void o2net_set_nst_send_time(struct o2net_send_tracking *nst);
232void o2net_set_nst_status_time(struct o2net_send_tracking *nst);
233void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
234 struct o2net_sock_container *sc);
235void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id);
236
237#else 227#else
238struct o2net_send_tracking { 228struct o2net_send_tracking {
239 u32 dummy; 229 u32 dummy;
240}; 230};
241
242static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
243 u32 msgkey, struct task_struct *task, u8 node)
244{
245}
246static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
247{
248}
249static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
250{
251}
252static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
253{
254}
255static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
256 struct o2net_sock_container *sc)
257{
258}
259static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
260 u32 msg_id)
261{
262}
263#endif /* CONFIG_DEBUG_FS */ 231#endif /* CONFIG_DEBUG_FS */
264 232
265#endif /* O2CLUSTER_TCP_INTERNAL_H */ 233#endif /* O2CLUSTER_TCP_INTERNAL_H */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8a1875848080..9cce563fd627 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1300 di->i_size = cpu_to_le64(sb->s_blocksize); 1300 di->i_size = cpu_to_le64(sb->s_blocksize);
1301 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); 1301 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
1302 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); 1302 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
1303 dir->i_blocks = ocfs2_inode_sector_count(dir);
1304 1303
1305 /* 1304 /*
1306 * This should never fail as our extent list is empty and all 1305 * This should never fail as our extent list is empty and all
@@ -1310,9 +1309,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1310 NULL); 1309 NULL);
1311 if (ret) { 1310 if (ret) {
1312 mlog_errno(ret); 1311 mlog_errno(ret);
1313 goto out; 1312 goto out_commit;
1314 } 1313 }
1315 1314
1315 /*
1316 * Set i_blocks after the extent insert for the most up to
1317 * date ip_clusters value.
1318 */
1319 dir->i_blocks = ocfs2_inode_sector_count(dir);
1320
1316 ret = ocfs2_journal_dirty(handle, di_bh); 1321 ret = ocfs2_journal_dirty(handle, di_bh);
1317 if (ret) { 1322 if (ret) {
1318 mlog_errno(ret); 1323 mlog_errno(ret);
@@ -1336,7 +1341,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1336 len, 0, NULL); 1341 len, 0, NULL);
1337 if (ret) { 1342 if (ret) {
1338 mlog_errno(ret); 1343 mlog_errno(ret);
1339 goto out; 1344 goto out_commit;
1340 } 1345 }
1341 } 1346 }
1342 1347
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 7a37240f7a31..c47bc2a809c2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1418,13 +1418,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1418{ 1418{
1419 unsigned int node_num; 1419 unsigned int node_num;
1420 int status, i; 1420 int status, i;
1421 u32 gen;
1421 struct buffer_head *bh = NULL; 1422 struct buffer_head *bh = NULL;
1422 struct ocfs2_dinode *di; 1423 struct ocfs2_dinode *di;
1423 1424
1424 /* This is called with the super block cluster lock, so we 1425 /* This is called with the super block cluster lock, so we
1425 * know that the slot map can't change underneath us. */ 1426 * know that the slot map can't change underneath us. */
1426 1427
1427 spin_lock(&osb->osb_lock);
1428 for (i = 0; i < osb->max_slots; i++) { 1428 for (i = 0; i < osb->max_slots; i++) {
1429 /* Read journal inode to get the recovery generation */ 1429 /* Read journal inode to get the recovery generation */
1430 status = ocfs2_read_journal_inode(osb, i, &bh, NULL); 1430 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
@@ -1433,23 +1433,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1433 goto bail; 1433 goto bail;
1434 } 1434 }
1435 di = (struct ocfs2_dinode *)bh->b_data; 1435 di = (struct ocfs2_dinode *)bh->b_data;
1436 osb->slot_recovery_generations[i] = 1436 gen = ocfs2_get_recovery_generation(di);
1437 ocfs2_get_recovery_generation(di);
1438 brelse(bh); 1437 brelse(bh);
1439 bh = NULL; 1438 bh = NULL;
1440 1439
1440 spin_lock(&osb->osb_lock);
1441 osb->slot_recovery_generations[i] = gen;
1442
1441 mlog(0, "Slot %u recovery generation is %u\n", i, 1443 mlog(0, "Slot %u recovery generation is %u\n", i,
1442 osb->slot_recovery_generations[i]); 1444 osb->slot_recovery_generations[i]);
1443 1445
1444 if (i == osb->slot_num) 1446 if (i == osb->slot_num) {
1447 spin_unlock(&osb->osb_lock);
1445 continue; 1448 continue;
1449 }
1446 1450
1447 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); 1451 status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
1448 if (status == -ENOENT) 1452 if (status == -ENOENT) {
1453 spin_unlock(&osb->osb_lock);
1449 continue; 1454 continue;
1455 }
1450 1456
1451 if (__ocfs2_recovery_map_test(osb, node_num)) 1457 if (__ocfs2_recovery_map_test(osb, node_num)) {
1458 spin_unlock(&osb->osb_lock);
1452 continue; 1459 continue;
1460 }
1453 spin_unlock(&osb->osb_lock); 1461 spin_unlock(&osb->osb_lock);
1454 1462
1455 /* Ok, we have a slot occupied by another node which 1463 /* Ok, we have a slot occupied by another node which
@@ -1465,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1465 mlog_errno(status); 1473 mlog_errno(status);
1466 goto bail; 1474 goto bail;
1467 } 1475 }
1468
1469 spin_lock(&osb->osb_lock);
1470 } 1476 }
1471 spin_unlock(&osb->osb_lock);
1472 1477
1473 status = 0; 1478 status = 0;
1474bail: 1479bail:
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 10e149ae5e3a..07f348b8d721 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name,
97 goto out; 97 goto out;
98 } 98 }
99 99
100 /* Ok, the stack is pinned */
101 p->sp_count++;
102 active_stack = p; 100 active_stack = p;
103
104 rc = 0; 101 rc = 0;
105 102
106out: 103out:
104 /* If we found it, pin it */
105 if (!rc)
106 active_stack->sp_count++;
107
107 spin_unlock(&ocfs2_stack_lock); 108 spin_unlock(&ocfs2_stack_lock);
108 return rc; 109 return rc;
109} 110}
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index 697663b01bae..e1c0ec0ae989 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -92,7 +92,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
92 struct buffer_head *bh; 92 struct buffer_head *bh;
93 struct omfs_sb_info *sbi = OMFS_SB(sb); 93 struct omfs_sb_info *sbi = OMFS_SB(sb);
94 int bits_per_entry = 8 * sb->s_blocksize; 94 int bits_per_entry = 8 * sb->s_blocksize;
95 int map, bit; 95 unsigned int map, bit;
96 int ret = 0; 96 int ret = 0;
97 u64 tmp; 97 u64 tmp;
98 98
@@ -176,7 +176,8 @@ int omfs_clear_range(struct super_block *sb, u64 block, int count)
176 struct omfs_sb_info *sbi = OMFS_SB(sb); 176 struct omfs_sb_info *sbi = OMFS_SB(sb);
177 int bits_per_entry = 8 * sb->s_blocksize; 177 int bits_per_entry = 8 * sb->s_blocksize;
178 u64 tmp; 178 u64 tmp;
179 int map, bit, ret; 179 unsigned int map, bit;
180 int ret;
180 181
181 tmp = block; 182 tmp = block;
182 bit = do_div(tmp, bits_per_entry); 183 bit = do_div(tmp, bits_per_entry);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 7e2499053e4d..834b2331f6b3 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -26,6 +26,13 @@ static int omfs_sync_file(struct file *file, struct dentry *dentry,
26 return err ? -EIO : 0; 26 return err ? -EIO : 0;
27} 27}
28 28
29static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
30{
31 return (sbi->s_sys_blocksize - offset -
32 sizeof(struct omfs_extent)) /
33 sizeof(struct omfs_extent_entry) + 1;
34}
35
29void omfs_make_empty_table(struct buffer_head *bh, int offset) 36void omfs_make_empty_table(struct buffer_head *bh, int offset)
30{ 37{
31 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; 38 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
@@ -45,6 +52,7 @@ int omfs_shrink_inode(struct inode *inode)
45 struct buffer_head *bh; 52 struct buffer_head *bh;
46 u64 next, last; 53 u64 next, last;
47 u32 extent_count; 54 u32 extent_count;
55 u32 max_extents;
48 int ret; 56 int ret;
49 57
50 /* traverse extent table, freeing each entry that is greater 58 /* traverse extent table, freeing each entry that is greater
@@ -62,15 +70,18 @@ int omfs_shrink_inode(struct inode *inode)
62 goto out; 70 goto out;
63 71
64 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); 72 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
73 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
65 74
66 for (;;) { 75 for (;;) {
67 76
68 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) { 77 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
69 brelse(bh); 78 goto out_brelse;
70 goto out;
71 }
72 79
73 extent_count = be32_to_cpu(oe->e_extent_count); 80 extent_count = be32_to_cpu(oe->e_extent_count);
81
82 if (extent_count > max_extents)
83 goto out_brelse;
84
74 last = next; 85 last = next;
75 next = be64_to_cpu(oe->e_next); 86 next = be64_to_cpu(oe->e_next);
76 entry = &oe->e_entry; 87 entry = &oe->e_entry;
@@ -98,10 +109,14 @@ int omfs_shrink_inode(struct inode *inode)
98 if (!bh) 109 if (!bh)
99 goto out; 110 goto out;
100 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); 111 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
112 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
101 } 113 }
102 ret = 0; 114 ret = 0;
103out: 115out:
104 return ret; 116 return ret;
117out_brelse:
118 brelse(bh);
119 return ret;
105} 120}
106 121
107static void omfs_truncate(struct inode *inode) 122static void omfs_truncate(struct inode *inode)
@@ -154,9 +169,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
154 goto out; 169 goto out;
155 } 170 }
156 } 171 }
157 max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START - 172 max_count = omfs_max_extents(sbi, OMFS_EXTENT_START);
158 sizeof(struct omfs_extent)) /
159 sizeof(struct omfs_extent_entry) + 1;
160 173
161 /* TODO: add a continuation block here */ 174 /* TODO: add a continuation block here */
162 if (be32_to_cpu(oe->e_extent_count) > max_count-1) 175 if (be32_to_cpu(oe->e_extent_count) > max_count-1)
@@ -225,6 +238,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
225 sector_t next, offset; 238 sector_t next, offset;
226 int ret; 239 int ret;
227 u64 new_block; 240 u64 new_block;
241 u32 max_extents;
228 int extent_count; 242 int extent_count;
229 struct omfs_extent *oe; 243 struct omfs_extent *oe;
230 struct omfs_extent_entry *entry; 244 struct omfs_extent_entry *entry;
@@ -238,6 +252,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
238 goto out; 252 goto out;
239 253
240 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); 254 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
255 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
241 next = inode->i_ino; 256 next = inode->i_ino;
242 257
243 for (;;) { 258 for (;;) {
@@ -249,6 +264,9 @@ static int omfs_get_block(struct inode *inode, sector_t block,
249 next = be64_to_cpu(oe->e_next); 264 next = be64_to_cpu(oe->e_next);
250 entry = &oe->e_entry; 265 entry = &oe->e_entry;
251 266
267 if (extent_count > max_extents)
268 goto out_brelse;
269
252 offset = find_block(inode, entry, block, extent_count, &remain); 270 offset = find_block(inode, entry, block, extent_count, &remain);
253 if (offset > 0) { 271 if (offset > 0) {
254 ret = 0; 272 ret = 0;
@@ -266,6 +284,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
266 if (!bh) 284 if (!bh)
267 goto out; 285 goto out;
268 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); 286 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
287 max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
269 } 288 }
270 if (create) { 289 if (create) {
271 ret = omfs_grow_extent(inode, oe, &new_block); 290 ret = omfs_grow_extent(inode, oe, &new_block);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index a95fe5984f4b..d29047b1b9b0 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -232,8 +232,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
232 inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask); 232 inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
233 inode->i_op = &omfs_dir_inops; 233 inode->i_op = &omfs_dir_inops;
234 inode->i_fop = &omfs_dir_operations; 234 inode->i_fop = &omfs_dir_operations;
235 inode->i_size = be32_to_cpu(oi->i_head.h_body_size) + 235 inode->i_size = sbi->s_sys_blocksize;
236 sizeof(struct omfs_header);
237 inc_nlink(inode); 236 inc_nlink(inode);
238 break; 237 break;
239 case OMFS_FILE: 238 case OMFS_FILE:
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 4fb81e9c94e3..bca0f81eb687 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -330,6 +330,7 @@ retry:
330 spin_lock(&proc_inum_lock); 330 spin_lock(&proc_inum_lock);
331 ida_remove(&proc_inum_ida, i); 331 ida_remove(&proc_inum_ida, i);
332 spin_unlock(&proc_inum_lock); 332 spin_unlock(&proc_inum_lock);
333 return 0;
333 } 334 }
334 return PROC_DYNAMIC_FIRST + i; 335 return PROC_DYNAMIC_FIRST + i;
335} 336}
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 79ecd281d2cb..3f87d2632947 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
52 } 52 }
53 53
54 seq_printf(m, 54 seq_printf(m,
55 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56 vma->vm_start, 56 vma->vm_start,
57 vma->vm_end, 57 vma->vm_end,
58 flags & VM_READ ? 'r' : '-', 58 flags & VM_READ ? 'r' : '-',
59 flags & VM_WRITE ? 'w' : '-', 59 flags & VM_WRITE ? 'w' : '-',
60 flags & VM_EXEC ? 'x' : '-', 60 flags & VM_EXEC ? 'x' : '-',
61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62 vma->vm_pgoff << PAGE_SHIFT, 62 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
63 MAJOR(dev), MINOR(dev), ino, &len); 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64
65 if (file) { 65 if (file) {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7546a918f790..73d1891ee625 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v)
219 ino = inode->i_ino; 219 ino = inode->i_ino;
220 } 220 }
221 221
222 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 222 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
223 vma->vm_start, 223 vma->vm_start,
224 vma->vm_end, 224 vma->vm_end,
225 flags & VM_READ ? 'r' : '-', 225 flags & VM_READ ? 'r' : '-',
226 flags & VM_WRITE ? 'w' : '-', 226 flags & VM_WRITE ? 'w' : '-',
227 flags & VM_EXEC ? 'x' : '-', 227 flags & VM_EXEC ? 'x' : '-',
228 flags & VM_MAYSHARE ? 's' : 'p', 228 flags & VM_MAYSHARE ? 's' : 'p',
229 vma->vm_pgoff << PAGE_SHIFT, 229 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
230 MAJOR(dev), MINOR(dev), ino, &len); 230 MAJOR(dev), MINOR(dev), ino, &len);
231 231
232 /* 232 /*
diff --git a/fs/readdir.c b/fs/readdir.c
index 4e026e5407fb..93a7559bbfd8 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset
80 if (buf->result) 80 if (buf->result)
81 return -EINVAL; 81 return -EINVAL;
82 d_ino = ino; 82 d_ino = ino;
83 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 83 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
84 buf->result = -EOVERFLOW;
84 return -EOVERFLOW; 85 return -EOVERFLOW;
86 }
85 buf->result++; 87 buf->result++;
86 dirent = buf->dirent; 88 dirent = buf->dirent;
87 if (!access_ok(VERIFY_WRITE, dirent, 89 if (!access_ok(VERIFY_WRITE, dirent,
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
155 if (reclen > buf->count) 157 if (reclen > buf->count)
156 return -EINVAL; 158 return -EINVAL;
157 d_ino = ino; 159 d_ino = ino;
158 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) 160 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
161 buf->error = -EOVERFLOW;
159 return -EOVERFLOW; 162 return -EOVERFLOW;
163 }
160 dirent = buf->previous; 164 dirent = buf->previous;
161 if (dirent) { 165 if (dirent) {
162 if (__put_user(offset, &dirent->d_off)) 166 if (__put_user(offset, &dirent->d_off))
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 282a13596c70..d318c7e663fa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -27,7 +27,6 @@
27#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/quotaops.h>
31 30
32struct file_system_type reiserfs_fs_type; 31struct file_system_type reiserfs_fs_type;
33 32
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3f54dbd6c49b..bd20f7f5a933 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
108 goto Done; 108 goto Done;
109 } 109 }
110 /* we need at least one record in buffer */ 110 /* we need at least one record in buffer */
111 pos = m->index;
112 p = m->op->start(m, &pos);
111 while (1) { 113 while (1) {
112 pos = m->index;
113 p = m->op->start(m, &pos);
114 err = PTR_ERR(p); 114 err = PTR_ERR(p);
115 if (!p || IS_ERR(p)) 115 if (!p || IS_ERR(p))
116 break; 116 break;
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
119 break; 119 break;
120 if (unlikely(err)) 120 if (unlikely(err))
121 m->count = 0; 121 m->count = 0;
122 if (unlikely(!m->count)) {
123 p = m->op->next(m, p, &pos);
124 m->index = pos;
125 continue;
126 }
122 if (m->count < m->size) 127 if (m->count < m->size)
123 goto Fill; 128 goto Fill;
124 m->op->stop(m, p); 129 m->op->stop(m, p);
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
128 goto Enomem; 133 goto Enomem;
129 m->count = 0; 134 m->count = 0;
130 m->version = 0; 135 m->version = 0;
136 pos = m->index;
137 p = m->op->start(m, &pos);
131 } 138 }
132 m->op->stop(m, p); 139 m->op->stop(m, p);
133 m->count = 0; 140 m->count = 0;
@@ -443,6 +450,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
443 return -1; 450 return -1;
444} 451}
445 452
453int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
454{
455 size_t len = bitmap_scnprintf_len(nr_bits);
456
457 if (m->count + len < m->size) {
458 bitmap_scnprintf(m->buf + m->count, m->size - m->count,
459 bits, nr_bits);
460 m->count += len;
461 return 0;
462 }
463 m->count = m->size;
464 return -1;
465}
466
446static void *single_start(struct seq_file *p, loff_t *pos) 467static void *single_start(struct seq_file *p, loff_t *pos)
447{ 468{
448 return NULL + (*pos == 0); 469 return NULL + (*pos == 0);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index d81fb9ed2b8e..154098157473 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -263,8 +263,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
263 263
264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
265 265
266 /* And make sure we have twice the index size of space reserved */ 266 /* And make sure we have thrice the index size of space reserved */
267 idx_size <<= 1; 267 idx_size = idx_size + (idx_size << 1);
268 268
269 /* 269 /*
270 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' 270 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
@@ -388,11 +388,11 @@ static int can_use_rp(struct ubifs_info *c)
388 * This function makes sure UBIFS has enough free eraseblocks for index growth 388 * This function makes sure UBIFS has enough free eraseblocks for index growth
389 * and data. 389 * and data.
390 * 390 *
391 * When budgeting index space, UBIFS reserves twice as more LEBs as the index 391 * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
392 * would take if it was consolidated and written to the flash. This guarantees 392 * would take if it was consolidated and written to the flash. This guarantees
393 * that the "in-the-gaps" commit method always succeeds and UBIFS will always 393 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
394 * be able to commit dirty index. So this function basically adds amount of 394 * be able to commit dirty index. So this function basically adds amount of
395 * budgeted index space to the size of the current index, multiplies this by 2, 395 * budgeted index space to the size of the current index, multiplies this by 3,
396 * and makes sure this does not exceed the amount of free eraseblocks. 396 * and makes sure this does not exceed the amount of free eraseblocks.
397 * 397 *
398 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: 398 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
@@ -543,8 +543,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
543 int err, idx_growth, data_growth, dd_growth; 543 int err, idx_growth, data_growth, dd_growth;
544 struct retries_info ri; 544 struct retries_info ri;
545 545
546 ubifs_assert(req->new_page <= 1);
547 ubifs_assert(req->dirtied_page <= 1);
548 ubifs_assert(req->new_dent <= 1);
549 ubifs_assert(req->mod_dent <= 1);
550 ubifs_assert(req->new_ino <= 1);
551 ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
546 ubifs_assert(req->dirtied_ino <= 4); 552 ubifs_assert(req->dirtied_ino <= 4);
547 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); 553 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
554 ubifs_assert(!(req->new_ino_d & 7));
555 ubifs_assert(!(req->dirtied_ino_d & 7));
548 556
549 data_growth = calc_data_growth(c, req); 557 data_growth = calc_data_growth(c, req);
550 dd_growth = calc_dd_growth(c, req); 558 dd_growth = calc_dd_growth(c, req);
@@ -618,8 +626,16 @@ again:
618 */ 626 */
619void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) 627void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
620{ 628{
629 ubifs_assert(req->new_page <= 1);
630 ubifs_assert(req->dirtied_page <= 1);
631 ubifs_assert(req->new_dent <= 1);
632 ubifs_assert(req->mod_dent <= 1);
633 ubifs_assert(req->new_ino <= 1);
634 ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
621 ubifs_assert(req->dirtied_ino <= 4); 635 ubifs_assert(req->dirtied_ino <= 4);
622 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); 636 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
637 ubifs_assert(!(req->new_ino_d & 7));
638 ubifs_assert(!(req->dirtied_ino_d & 7));
623 if (!req->recalculate) { 639 if (!req->recalculate) {
624 ubifs_assert(req->idx_growth >= 0); 640 ubifs_assert(req->idx_growth >= 0);
625 ubifs_assert(req->data_growth >= 0); 641 ubifs_assert(req->data_growth >= 0);
@@ -647,7 +663,11 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
647 663
648 ubifs_assert(c->budg_idx_growth >= 0); 664 ubifs_assert(c->budg_idx_growth >= 0);
649 ubifs_assert(c->budg_data_growth >= 0); 665 ubifs_assert(c->budg_data_growth >= 0);
666 ubifs_assert(c->budg_dd_growth >= 0);
650 ubifs_assert(c->min_idx_lebs < c->main_lebs); 667 ubifs_assert(c->min_idx_lebs < c->main_lebs);
668 ubifs_assert(!(c->budg_idx_growth & 7));
669 ubifs_assert(!(c->budg_data_growth & 7));
670 ubifs_assert(!(c->budg_dd_growth & 7));
651 spin_unlock(&c->space_lock); 671 spin_unlock(&c->space_lock);
652} 672}
653 673
@@ -686,9 +706,10 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
686void ubifs_release_dirty_inode_budget(struct ubifs_info *c, 706void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
687 struct ubifs_inode *ui) 707 struct ubifs_inode *ui)
688{ 708{
689 struct ubifs_budget_req req = {.dd_growth = c->inode_budget, 709 struct ubifs_budget_req req;
690 .dirtied_ino_d = ui->data_len};
691 710
711 memset(&req, 0, sizeof(struct ubifs_budget_req));
712 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
692 ubifs_release_budget(c, &req); 713 ubifs_release_budget(c, &req);
693} 714}
694 715
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 3b516316c9b3..0a6aa2cc78f0 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -74,6 +74,7 @@ static int do_commit(struct ubifs_info *c)
74 goto out_up; 74 goto out_up;
75 } 75 }
76 76
77 c->cmt_no += 1;
77 err = ubifs_gc_start_commit(c); 78 err = ubifs_gc_start_commit(c);
78 if (err) 79 if (err)
79 goto out_up; 80 goto out_up;
@@ -115,7 +116,7 @@ static int do_commit(struct ubifs_info *c)
115 goto out; 116 goto out;
116 117
117 mutex_lock(&c->mst_mutex); 118 mutex_lock(&c->mst_mutex);
118 c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); 119 c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
119 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); 120 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
120 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); 121 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
121 c->mst_node->root_offs = cpu_to_le32(zroot.offs); 122 c->mst_node->root_offs = cpu_to_le32(zroot.offs);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4e3aaeba4eca..b9cb77473758 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -568,8 +568,8 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req)
568void dbg_dump_lstats(const struct ubifs_lp_stats *lst) 568void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
569{ 569{
570 spin_lock(&dbg_lock); 570 spin_lock(&dbg_lock);
571 printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", 571 printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, "
572 lst->empty_lebs, lst->idx_lebs); 572 "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
573 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " 573 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
574 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, 574 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
575 lst->total_dirty); 575 lst->total_dirty);
@@ -587,8 +587,8 @@ void dbg_dump_budg(struct ubifs_info *c)
587 struct ubifs_gced_idx_leb *idx_gc; 587 struct ubifs_gced_idx_leb *idx_gc;
588 588
589 spin_lock(&dbg_lock); 589 spin_lock(&dbg_lock);
590 printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " 590 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
591 "budg_dd_growth %lld, budg_idx_growth %lld\n", 591 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
592 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); 592 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
593 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " 593 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
594 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, 594 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
@@ -634,7 +634,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
634 struct ubifs_lprops lp; 634 struct ubifs_lprops lp;
635 struct ubifs_lp_stats lst; 635 struct ubifs_lp_stats lst;
636 636
637 printk(KERN_DEBUG "Dumping LEB properties\n"); 637 printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid);
638 ubifs_get_lp_stats(c, &lst); 638 ubifs_get_lp_stats(c, &lst);
639 dbg_dump_lstats(&lst); 639 dbg_dump_lstats(&lst);
640 640
@@ -655,7 +655,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
655 if (dbg_failure_mode) 655 if (dbg_failure_mode)
656 return; 656 return;
657 657
658 printk(KERN_DEBUG "Dumping LEB %d\n", lnum); 658 printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum);
659 659
660 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); 660 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
661 if (IS_ERR(sleb)) { 661 if (IS_ERR(sleb)) {
@@ -720,8 +720,8 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
720{ 720{
721 int i; 721 int i;
722 722
723 printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", 723 printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n",
724 cat, heap->cnt); 724 current->pid, cat, heap->cnt);
725 for (i = 0; i < heap->cnt; i++) { 725 for (i = 0; i < heap->cnt; i++) {
726 struct ubifs_lprops *lprops = heap->arr[i]; 726 struct ubifs_lprops *lprops = heap->arr[i];
727 727
@@ -736,7 +736,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
736{ 736{
737 int i; 737 int i;
738 738
739 printk(KERN_DEBUG "Dumping pnode:\n"); 739 printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid);
740 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", 740 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
741 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); 741 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
742 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", 742 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
@@ -755,7 +755,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
755 int level; 755 int level;
756 756
757 printk(KERN_DEBUG "\n"); 757 printk(KERN_DEBUG "\n");
758 printk(KERN_DEBUG "Dumping the TNC tree\n"); 758 printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid);
759 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 759 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
760 level = znode->level; 760 level = znode->level;
761 printk(KERN_DEBUG "== Level %d ==\n", level); 761 printk(KERN_DEBUG "== Level %d ==\n", level);
@@ -2208,16 +2208,17 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2208int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, 2208int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2209 int offset, int len, int dtype) 2209 int offset, int len, int dtype)
2210{ 2210{
2211 int err; 2211 int err, failing;
2212 2212
2213 if (in_failure_mode(desc)) 2213 if (in_failure_mode(desc))
2214 return -EIO; 2214 return -EIO;
2215 if (do_fail(desc, lnum, 1)) 2215 failing = do_fail(desc, lnum, 1);
2216 if (failing)
2216 cut_data(buf, len); 2217 cut_data(buf, len);
2217 err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); 2218 err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
2218 if (err) 2219 if (err)
2219 return err; 2220 return err;
2220 if (in_failure_mode(desc)) 2221 if (failing)
2221 return -EIO; 2222 return -EIO;
2222 return 0; 2223 return 0;
2223} 2224}
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 3c4f1e93c9e0..50315fc57185 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -27,7 +27,7 @@
27 27
28#define UBIFS_DBG(op) op 28#define UBIFS_DBG(op) op
29 29
30#define ubifs_assert(expr) do { \ 30#define ubifs_assert(expr) do { \
31 if (unlikely(!(expr))) { \ 31 if (unlikely(!(expr))) { \
32 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ 32 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
33 __func__, __LINE__, current->pid); \ 33 __func__, __LINE__, current->pid); \
@@ -73,50 +73,50 @@ const char *dbg_key_str1(const struct ubifs_info *c,
73 const union ubifs_key *key); 73 const union ubifs_key *key);
74 74
75/* 75/*
76 * DBGKEY macros require dbg_lock to be held, which it is in the dbg message 76 * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message
77 * macros. 77 * macros.
78 */ 78 */
79#define DBGKEY(key) dbg_key_str0(c, (key)) 79#define DBGKEY(key) dbg_key_str0(c, (key))
80#define DBGKEY1(key) dbg_key_str1(c, (key)) 80#define DBGKEY1(key) dbg_key_str1(c, (key))
81 81
82/* General messages */ 82/* General messages */
83#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) 83#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
84 84
85/* Additional journal messages */ 85/* Additional journal messages */
86#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) 86#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
87 87
88/* Additional TNC messages */ 88/* Additional TNC messages */
89#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) 89#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
90 90
91/* Additional lprops messages */ 91/* Additional lprops messages */
92#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) 92#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
93 93
94/* Additional LEB find messages */ 94/* Additional LEB find messages */
95#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) 95#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
96 96
97/* Additional mount messages */ 97/* Additional mount messages */
98#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) 98#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
99 99
100/* Additional I/O messages */ 100/* Additional I/O messages */
101#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) 101#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
102 102
103/* Additional commit messages */ 103/* Additional commit messages */
104#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) 104#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
105 105
106/* Additional budgeting messages */ 106/* Additional budgeting messages */
107#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) 107#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
108 108
109/* Additional log messages */ 109/* Additional log messages */
110#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) 110#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
111 111
112/* Additional gc messages */ 112/* Additional gc messages */
113#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) 113#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
114 114
115/* Additional scan messages */ 115/* Additional scan messages */
116#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) 116#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
117 117
118/* Additional recovery messages */ 118/* Additional recovery messages */
119#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 119#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
120 120
121/* 121/*
122 * Debugging message type flags (must match msg_type_names in debug.c). 122 * Debugging message type flags (must match msg_type_names in debug.c).
@@ -239,34 +239,23 @@ typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
239 struct ubifs_zbranch *zbr, void *priv); 239 struct ubifs_zbranch *zbr, void *priv);
240typedef int (*dbg_znode_callback)(struct ubifs_info *c, 240typedef int (*dbg_znode_callback)(struct ubifs_info *c,
241 struct ubifs_znode *znode, void *priv); 241 struct ubifs_znode *znode, void *priv);
242
243int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, 242int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
244 dbg_znode_callback znode_cb, void *priv); 243 dbg_znode_callback znode_cb, void *priv);
245 244
246/* Checking functions */ 245/* Checking functions */
247 246
248int dbg_check_lprops(struct ubifs_info *c); 247int dbg_check_lprops(struct ubifs_info *c);
249
250int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); 248int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
251int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); 249int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
252
253int dbg_check_cats(struct ubifs_info *c); 250int dbg_check_cats(struct ubifs_info *c);
254
255int dbg_check_ltab(struct ubifs_info *c); 251int dbg_check_ltab(struct ubifs_info *c);
256
257int dbg_check_synced_i_size(struct inode *inode); 252int dbg_check_synced_i_size(struct inode *inode);
258
259int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); 253int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
260
261int dbg_check_tnc(struct ubifs_info *c, int extra); 254int dbg_check_tnc(struct ubifs_info *c, int extra);
262
263int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); 255int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
264
265int dbg_check_filesystem(struct ubifs_info *c); 256int dbg_check_filesystem(struct ubifs_info *c);
266
267void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, 257void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
268 int add_pos); 258 int add_pos);
269
270int dbg_check_lprops(struct ubifs_info *c); 259int dbg_check_lprops(struct ubifs_info *c);
271int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 260int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
272 int row, int col); 261 int row, int col);
@@ -329,71 +318,77 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
329#else /* !CONFIG_UBIFS_FS_DEBUG */ 318#else /* !CONFIG_UBIFS_FS_DEBUG */
330 319
331#define UBIFS_DBG(op) 320#define UBIFS_DBG(op)
332#define ubifs_assert(expr) ({}) 321
333#define ubifs_assert_cmt_locked(c) 322/* Use "if (0)" to make compiler check arguments even if debugging is off */
323#define ubifs_assert(expr) do { \
324 if (0 && (expr)) \
325 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
326 __func__, __LINE__, current->pid); \
327} while (0)
328
329#define dbg_err(fmt, ...) do { \
330 if (0) \
331 ubifs_err(fmt, ##__VA_ARGS__); \
332} while (0)
333
334#define dbg_msg(fmt, ...) do { \
335 if (0) \
336 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \
337 current->pid, __func__, ##__VA_ARGS__); \
338} while (0)
339
334#define dbg_dump_stack() 340#define dbg_dump_stack()
335#define dbg_err(fmt, ...) ({}) 341#define ubifs_assert_cmt_locked(c)
336#define dbg_msg(fmt, ...) ({})
337#define dbg_key(c, key, fmt, ...) ({})
338
339#define dbg_gen(fmt, ...) ({})
340#define dbg_jnl(fmt, ...) ({})
341#define dbg_tnc(fmt, ...) ({})
342#define dbg_lp(fmt, ...) ({})
343#define dbg_find(fmt, ...) ({})
344#define dbg_mnt(fmt, ...) ({})
345#define dbg_io(fmt, ...) ({})
346#define dbg_cmt(fmt, ...) ({})
347#define dbg_budg(fmt, ...) ({})
348#define dbg_log(fmt, ...) ({})
349#define dbg_gc(fmt, ...) ({})
350#define dbg_scan(fmt, ...) ({})
351#define dbg_rcvry(fmt, ...) ({})
352
353#define dbg_ntype(type) ""
354#define dbg_cstate(cmt_state) ""
355#define dbg_get_key_dump(c, key) ({})
356#define dbg_dump_inode(c, inode) ({})
357#define dbg_dump_node(c, node) ({})
358#define dbg_dump_budget_req(req) ({})
359#define dbg_dump_lstats(lst) ({})
360#define dbg_dump_budg(c) ({})
361#define dbg_dump_lprop(c, lp) ({})
362#define dbg_dump_lprops(c) ({})
363#define dbg_dump_leb(c, lnum) ({})
364#define dbg_dump_znode(c, znode) ({})
365#define dbg_dump_heap(c, heap, cat) ({})
366#define dbg_dump_pnode(c, pnode, parent, iip) ({})
367#define dbg_dump_tnc(c) ({})
368#define dbg_dump_index(c) ({})
369 342
370#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 343#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
344#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
345#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
346#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
347#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
348#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
349#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
350#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
351#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
352#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
353#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
354#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
355#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
356
357#define DBGKEY(key) ((char *)(key))
358#define DBGKEY1(key) ((char *)(key))
359
360#define dbg_ntype(type) ""
361#define dbg_cstate(cmt_state) ""
362#define dbg_get_key_dump(c, key) ({})
363#define dbg_dump_inode(c, inode) ({})
364#define dbg_dump_node(c, node) ({})
365#define dbg_dump_budget_req(req) ({})
366#define dbg_dump_lstats(lst) ({})
367#define dbg_dump_budg(c) ({})
368#define dbg_dump_lprop(c, lp) ({})
369#define dbg_dump_lprops(c) ({})
370#define dbg_dump_leb(c, lnum) ({})
371#define dbg_dump_znode(c, znode) ({})
372#define dbg_dump_heap(c, heap, cat) ({})
373#define dbg_dump_pnode(c, pnode, parent, iip) ({})
374#define dbg_dump_tnc(c) ({})
375#define dbg_dump_index(c) ({})
371 376
377#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
372#define dbg_old_index_check_init(c, zroot) 0 378#define dbg_old_index_check_init(c, zroot) 0
373#define dbg_check_old_index(c, zroot) 0 379#define dbg_check_old_index(c, zroot) 0
374
375#define dbg_check_cats(c) 0 380#define dbg_check_cats(c) 0
376
377#define dbg_check_ltab(c) 0 381#define dbg_check_ltab(c) 0
378
379#define dbg_check_synced_i_size(inode) 0 382#define dbg_check_synced_i_size(inode) 0
380
381#define dbg_check_dir_size(c, dir) 0 383#define dbg_check_dir_size(c, dir) 0
382
383#define dbg_check_tnc(c, x) 0 384#define dbg_check_tnc(c, x) 0
384
385#define dbg_check_idx_size(c, idx_size) 0 385#define dbg_check_idx_size(c, idx_size) 0
386
387#define dbg_check_filesystem(c) 0 386#define dbg_check_filesystem(c) 0
388
389#define dbg_check_heap(c, heap, cat, add_pos) ({}) 387#define dbg_check_heap(c, heap, cat, add_pos) ({})
390
391#define dbg_check_lprops(c) 0 388#define dbg_check_lprops(c) 0
392#define dbg_check_lpt_nodes(c, cnode, row, col) 0 389#define dbg_check_lpt_nodes(c, cnode, row, col) 0
393
394#define dbg_force_in_the_gaps_enabled 0 390#define dbg_force_in_the_gaps_enabled 0
395#define dbg_force_in_the_gaps() 0 391#define dbg_force_in_the_gaps() 0
396
397#define dbg_failure_mode 0 392#define dbg_failure_mode 0
398#define dbg_failure_mode_registration(c) ({}) 393#define dbg_failure_mode_registration(c) ({})
399#define dbg_failure_mode_deregistration(c) ({}) 394#define dbg_failure_mode_deregistration(c) ({})
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e90374be7d3b..5c96f1fb7016 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -165,7 +165,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
165 } 165 }
166 166
167 inode->i_ino = ++c->highest_inum; 167 inode->i_ino = ++c->highest_inum;
168 inode->i_generation = ++c->vfs_gen;
169 /* 168 /*
170 * The creation sequence number remains with this inode for its 169 * The creation sequence number remains with this inode for its
171 * lifetime. All nodes for this inode have a greater sequence number, 170 * lifetime. All nodes for this inode have a greater sequence number,
@@ -220,15 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
220 219
221 err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); 220 err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
222 if (err) { 221 if (err) {
223 /* 222 if (err == -ENOENT) {
224 * Do not hash the direntry if parent 'i_nlink' is zero, because
225 * this has side-effects - '->delete_inode()' call will not be
226 * called for the parent orphan inode, because 'd_count' of its
227 * direntry will stay 1 (it'll be negative direntry I guess)
228 * and prevent 'iput_final()' until the dentry is destroyed due
229 * to unmount or memory pressure.
230 */
231 if (err == -ENOENT && dir->i_nlink != 0) {
232 dbg_gen("not found"); 223 dbg_gen("not found");
233 goto done; 224 goto done;
234 } 225 }
@@ -525,7 +516,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
525 struct ubifs_inode *dir_ui = ubifs_inode(dir); 516 struct ubifs_inode *dir_ui = ubifs_inode(dir);
526 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); 517 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
527 struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, 518 struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
528 .dirtied_ino_d = ui->data_len }; 519 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
529 520
530 /* 521 /*
531 * Budget request settings: new direntry, changing the target inode, 522 * Budget request settings: new direntry, changing the target inode,
@@ -727,8 +718,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
727 struct ubifs_inode *dir_ui = ubifs_inode(dir); 718 struct ubifs_inode *dir_ui = ubifs_inode(dir);
728 struct ubifs_info *c = dir->i_sb->s_fs_info; 719 struct ubifs_info *c = dir->i_sb->s_fs_info;
729 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); 720 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
730 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 721 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
731 .dirtied_ino_d = 1 };
732 722
733 /* 723 /*
734 * Budget request settings: new inode, new direntry and changing parent 724 * Budget request settings: new inode, new direntry and changing parent
@@ -789,7 +779,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
789 int sz_change = CALC_DENT_SIZE(dentry->d_name.len); 779 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
790 int err, devlen = 0; 780 int err, devlen = 0;
791 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 781 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
792 .new_ino_d = devlen, .dirtied_ino = 1 }; 782 .new_ino_d = ALIGN(devlen, 8),
783 .dirtied_ino = 1 };
793 784
794 /* 785 /*
795 * Budget request settings: new inode, new direntry and changing parent 786 * Budget request settings: new inode, new direntry and changing parent
@@ -863,7 +854,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
863 int err, len = strlen(symname); 854 int err, len = strlen(symname);
864 int sz_change = CALC_DENT_SIZE(dentry->d_name.len); 855 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
865 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 856 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
866 .new_ino_d = len, .dirtied_ino = 1 }; 857 .new_ino_d = ALIGN(len, 8),
858 .dirtied_ino = 1 };
867 859
868 /* 860 /*
869 * Budget request settings: new inode, new direntry and changing parent 861 * Budget request settings: new inode, new direntry and changing parent
@@ -1012,7 +1004,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1012 struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, 1004 struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
1013 .dirtied_ino = 3 }; 1005 .dirtied_ino = 3 };
1014 struct ubifs_budget_req ino_req = { .dirtied_ino = 1, 1006 struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
1015 .dirtied_ino_d = old_inode_ui->data_len }; 1007 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
1016 struct timespec time; 1008 struct timespec time;
1017 1009
1018 /* 1010 /*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 8565e586e533..4071d1cae29f 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -890,7 +890,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
890 loff_t new_size = attr->ia_size; 890 loff_t new_size = attr->ia_size;
891 struct ubifs_inode *ui = ubifs_inode(inode); 891 struct ubifs_inode *ui = ubifs_inode(inode);
892 struct ubifs_budget_req req = { .dirtied_ino = 1, 892 struct ubifs_budget_req req = { .dirtied_ino = 1,
893 .dirtied_ino_d = ui->data_len }; 893 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
894 894
895 err = ubifs_budget_space(c, &req); 895 err = ubifs_budget_space(c, &req);
896 if (err) 896 if (err)
@@ -941,7 +941,8 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
941 struct inode *inode = dentry->d_inode; 941 struct inode *inode = dentry->d_inode;
942 struct ubifs_info *c = inode->i_sb->s_fs_info; 942 struct ubifs_info *c = inode->i_sb->s_fs_info;
943 943
944 dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); 944 dbg_gen("ino %lu, mode %#x, ia_valid %#x",
945 inode->i_ino, inode->i_mode, attr->ia_valid);
945 err = inode_change_ok(inode, attr); 946 err = inode_change_ok(inode, attr);
946 if (err) 947 if (err)
947 return err; 948 return err;
@@ -1051,7 +1052,7 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode)
1051 if (mctime_update_needed(inode, &now)) { 1052 if (mctime_update_needed(inode, &now)) {
1052 int err, release; 1053 int err, release;
1053 struct ubifs_budget_req req = { .dirtied_ino = 1, 1054 struct ubifs_budget_req req = { .dirtied_ino = 1,
1054 .dirtied_ino_d = ui->data_len }; 1055 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
1055 1056
1056 err = ubifs_budget_space(c, &req); 1057 err = ubifs_budget_space(c, &req);
1057 if (err) 1058 if (err)
@@ -1270,6 +1271,7 @@ struct file_operations ubifs_file_operations = {
1270 .fsync = ubifs_fsync, 1271 .fsync = ubifs_fsync,
1271 .unlocked_ioctl = ubifs_ioctl, 1272 .unlocked_ioctl = ubifs_ioctl,
1272 .splice_read = generic_file_splice_read, 1273 .splice_read = generic_file_splice_read,
1274 .splice_write = generic_file_splice_write,
1273#ifdef CONFIG_COMPAT 1275#ifdef CONFIG_COMPAT
1274 .compat_ioctl = ubifs_compat_ioctl, 1276 .compat_ioctl = ubifs_compat_ioctl,
1275#endif 1277#endif
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 10394c548367..adee7b5ddeab 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -290,9 +290,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
290 idx_lp = idx_heap->arr[0]; 290 idx_lp = idx_heap->arr[0];
291 sum = idx_lp->free + idx_lp->dirty; 291 sum = idx_lp->free + idx_lp->dirty;
292 /* 292 /*
293 * Since we reserve twice as more space for the index than it 293 * Since we reserve thrice as much space for the index than it
294 * actually takes, it does not make sense to pick indexing LEBs 294 * actually takes, it does not make sense to pick indexing LEBs
295 * with less than half LEB of dirty space. 295 * with less than, say, half LEB of dirty space. May be half is
296 * not the optimal boundary - this should be tested and
297 * checked. This boundary should determine how much we use
298 * in-the-gaps to consolidate the index comparing to how much
299 * we use garbage collector to consolidate it. The "half"
300 * criteria just feels to be fine.
296 */ 301 */
297 if (sum < min_space || sum < c->half_leb_size) 302 if (sum < min_space || sum < c->half_leb_size)
298 idx_lp = NULL; 303 idx_lp = NULL;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3374f91b6709..054363f2b207 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -54,6 +54,20 @@
54#include "ubifs.h" 54#include "ubifs.h"
55 55
56/** 56/**
57 * ubifs_ro_mode - switch UBIFS to read read-only mode.
58 * @c: UBIFS file-system description object
59 * @err: error code which is the reason of switching to R/O mode
60 */
61void ubifs_ro_mode(struct ubifs_info *c, int err)
62{
63 if (!c->ro_media) {
64 c->ro_media = 1;
65 ubifs_warn("switched to read-only mode, error %d", err);
66 dbg_dump_stack();
67 }
68}
69
70/**
57 * ubifs_check_node - check node. 71 * ubifs_check_node - check node.
58 * @c: UBIFS file-system description object 72 * @c: UBIFS file-system description object
59 * @buf: node to check 73 * @buf: node to check
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 283155abe5f5..22993f867d19 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -447,13 +447,11 @@ static int get_dent_type(int mode)
447 * @ino: buffer in which to pack inode node 447 * @ino: buffer in which to pack inode node
448 * @inode: inode to pack 448 * @inode: inode to pack
449 * @last: indicates the last node of the group 449 * @last: indicates the last node of the group
450 * @last_reference: non-zero if this is a deletion inode
451 */ 450 */
452static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, 451static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
453 const struct inode *inode, int last, 452 const struct inode *inode, int last)
454 int last_reference)
455{ 453{
456 int data_len = 0; 454 int data_len = 0, last_reference = !inode->i_nlink;
457 struct ubifs_inode *ui = ubifs_inode(inode); 455 struct ubifs_inode *ui = ubifs_inode(inode);
458 456
459 ino->ch.node_type = UBIFS_INO_NODE; 457 ino->ch.node_type = UBIFS_INO_NODE;
@@ -596,9 +594,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
596 ubifs_prep_grp_node(c, dent, dlen, 0); 594 ubifs_prep_grp_node(c, dent, dlen, 0);
597 595
598 ino = (void *)dent + aligned_dlen; 596 ino = (void *)dent + aligned_dlen;
599 pack_inode(c, ino, inode, 0, last_reference); 597 pack_inode(c, ino, inode, 0);
600 ino = (void *)ino + aligned_ilen; 598 ino = (void *)ino + aligned_ilen;
601 pack_inode(c, ino, dir, 1, 0); 599 pack_inode(c, ino, dir, 1);
602 600
603 if (last_reference) { 601 if (last_reference) {
604 err = ubifs_add_orphan(c, inode->i_ino); 602 err = ubifs_add_orphan(c, inode->i_ino);
@@ -606,6 +604,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
606 release_head(c, BASEHD); 604 release_head(c, BASEHD);
607 goto out_finish; 605 goto out_finish;
608 } 606 }
607 ui->del_cmtno = c->cmt_no;
609 } 608 }
610 609
611 err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); 610 err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
@@ -750,30 +749,25 @@ out_free:
750 * ubifs_jnl_write_inode - flush inode to the journal. 749 * ubifs_jnl_write_inode - flush inode to the journal.
751 * @c: UBIFS file-system description object 750 * @c: UBIFS file-system description object
752 * @inode: inode to flush 751 * @inode: inode to flush
753 * @deletion: inode has been deleted
754 * 752 *
755 * This function writes inode @inode to the journal. If the inode is 753 * This function writes inode @inode to the journal. If the inode is
756 * synchronous, it also synchronizes the write-buffer. Returns zero in case of 754 * synchronous, it also synchronizes the write-buffer. Returns zero in case of
757 * success and a negative error code in case of failure. 755 * success and a negative error code in case of failure.
758 */ 756 */
759int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, 757int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
760 int deletion)
761{ 758{
762 int err, len, lnum, offs, sync = 0; 759 int err, lnum, offs;
763 struct ubifs_ino_node *ino; 760 struct ubifs_ino_node *ino;
764 struct ubifs_inode *ui = ubifs_inode(inode); 761 struct ubifs_inode *ui = ubifs_inode(inode);
762 int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
765 763
766 dbg_jnl("ino %lu%s", inode->i_ino, 764 dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
767 deletion ? " (last reference)" : "");
768 if (deletion)
769 ubifs_assert(inode->i_nlink == 0);
770 765
771 len = UBIFS_INO_NODE_SZ;
772 /* 766 /*
773 * If the inode is being deleted, do not write the attached data. No 767 * If the inode is being deleted, do not write the attached data. No
774 * need to synchronize the write-buffer either. 768 * need to synchronize the write-buffer either.
775 */ 769 */
776 if (!deletion) { 770 if (!last_reference) {
777 len += ui->data_len; 771 len += ui->data_len;
778 sync = IS_SYNC(inode); 772 sync = IS_SYNC(inode);
779 } 773 }
@@ -786,7 +780,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
786 if (err) 780 if (err)
787 goto out_free; 781 goto out_free;
788 782
789 pack_inode(c, ino, inode, 1, deletion); 783 pack_inode(c, ino, inode, 1);
790 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); 784 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
791 if (err) 785 if (err)
792 goto out_release; 786 goto out_release;
@@ -795,7 +789,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
795 inode->i_ino); 789 inode->i_ino);
796 release_head(c, BASEHD); 790 release_head(c, BASEHD);
797 791
798 if (deletion) { 792 if (last_reference) {
799 err = ubifs_tnc_remove_ino(c, inode->i_ino); 793 err = ubifs_tnc_remove_ino(c, inode->i_ino);
800 if (err) 794 if (err)
801 goto out_ro; 795 goto out_ro;
@@ -828,6 +822,65 @@ out_free:
828} 822}
829 823
830/** 824/**
825 * ubifs_jnl_delete_inode - delete an inode.
826 * @c: UBIFS file-system description object
827 * @inode: inode to delete
828 *
829 * This function deletes inode @inode which includes removing it from orphans,
830 * deleting it from TNC and, in some cases, writing a deletion inode to the
831 * journal.
832 *
833 * When regular file inodes are unlinked or a directory inode is removed, the
834 * 'ubifs_jnl_update()' function writes a corresponding deletion inode and
835 * direntry to the media, and adds the inode to orphans. After this, when the
836 * last reference to this inode has been dropped, this function is called. In
837 * general, it has to write one more deletion inode to the media, because if
838 * a commit happened between 'ubifs_jnl_update()' and
839 * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal
840 * anymore, and in fact it might not be on the flash anymore, because it might
841 * have been garbage-collected already. And for optimization reasons UBIFS does
842 * not read the orphan area if it has been unmounted cleanly, so it would have
843 * no indication in the journal that there is a deleted inode which has to be
844 * removed from TNC.
845 *
846 * However, if there was no commit between 'ubifs_jnl_update()' and
847 * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion
848 * inode to the media for the second time. And this is quite a typical case.
849 *
850 * This function returns zero in case of success and a negative error code in
851 * case of failure.
852 */
853int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
854{
855 int err;
856 struct ubifs_inode *ui = ubifs_inode(inode);
857
858 ubifs_assert(inode->i_nlink == 0);
859
860 if (ui->del_cmtno != c->cmt_no)
861 /* A commit happened for sure */
862 return ubifs_jnl_write_inode(c, inode);
863
864 down_read(&c->commit_sem);
865 /*
866 * Check commit number again, because the first test has been done
867 * without @c->commit_sem, so a commit might have happened.
868 */
869 if (ui->del_cmtno != c->cmt_no) {
870 up_read(&c->commit_sem);
871 return ubifs_jnl_write_inode(c, inode);
872 }
873
874 err = ubifs_tnc_remove_ino(c, inode->i_ino);
875 if (err)
876 ubifs_ro_mode(c, err);
877 else
878 ubifs_delete_orphan(c, inode->i_ino);
879 up_read(&c->commit_sem);
880 return err;
881}
882
883/**
831 * ubifs_jnl_rename - rename a directory entry. 884 * ubifs_jnl_rename - rename a directory entry.
832 * @c: UBIFS file-system description object 885 * @c: UBIFS file-system description object
833 * @old_dir: parent inode of directory entry to rename 886 * @old_dir: parent inode of directory entry to rename
@@ -917,16 +970,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
917 970
918 p = (void *)dent2 + aligned_dlen2; 971 p = (void *)dent2 + aligned_dlen2;
919 if (new_inode) { 972 if (new_inode) {
920 pack_inode(c, p, new_inode, 0, last_reference); 973 pack_inode(c, p, new_inode, 0);
921 p += ALIGN(ilen, 8); 974 p += ALIGN(ilen, 8);
922 } 975 }
923 976
924 if (!move) 977 if (!move)
925 pack_inode(c, p, old_dir, 1, 0); 978 pack_inode(c, p, old_dir, 1);
926 else { 979 else {
927 pack_inode(c, p, old_dir, 0, 0); 980 pack_inode(c, p, old_dir, 0);
928 p += ALIGN(plen, 8); 981 p += ALIGN(plen, 8);
929 pack_inode(c, p, new_dir, 1, 0); 982 pack_inode(c, p, new_dir, 1);
930 } 983 }
931 984
932 if (last_reference) { 985 if (last_reference) {
@@ -935,6 +988,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
935 release_head(c, BASEHD); 988 release_head(c, BASEHD);
936 goto out_finish; 989 goto out_finish;
937 } 990 }
991 new_ui->del_cmtno = c->cmt_no;
938 } 992 }
939 993
940 err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); 994 err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
@@ -1131,7 +1185,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1131 if (err) 1185 if (err)
1132 goto out_free; 1186 goto out_free;
1133 1187
1134 pack_inode(c, ino, inode, 0, 0); 1188 pack_inode(c, ino, inode, 0);
1135 ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); 1189 ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
1136 if (dlen) 1190 if (dlen)
1137 ubifs_prep_grp_node(c, dn, dlen, 1); 1191 ubifs_prep_grp_node(c, dn, dlen, 1);
@@ -1251,9 +1305,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
1251 ubifs_prep_grp_node(c, xent, xlen, 0); 1305 ubifs_prep_grp_node(c, xent, xlen, 0);
1252 1306
1253 ino = (void *)xent + aligned_xlen; 1307 ino = (void *)xent + aligned_xlen;
1254 pack_inode(c, ino, inode, 0, 1); 1308 pack_inode(c, ino, inode, 0);
1255 ino = (void *)ino + UBIFS_INO_NODE_SZ; 1309 ino = (void *)ino + UBIFS_INO_NODE_SZ;
1256 pack_inode(c, ino, host, 1, 0); 1310 pack_inode(c, ino, host, 1);
1257 1311
1258 err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); 1312 err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
1259 if (!sync && !err) 1313 if (!sync && !err)
@@ -1320,7 +1374,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
1320 const struct inode *host) 1374 const struct inode *host)
1321{ 1375{
1322 int err, len1, len2, aligned_len, aligned_len1, lnum, offs; 1376 int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
1323 struct ubifs_inode *host_ui = ubifs_inode(inode); 1377 struct ubifs_inode *host_ui = ubifs_inode(host);
1324 struct ubifs_ino_node *ino; 1378 struct ubifs_ino_node *ino;
1325 union ubifs_key key; 1379 union ubifs_key key;
1326 int sync = IS_DIRSYNC(host); 1380 int sync = IS_DIRSYNC(host);
@@ -1344,8 +1398,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
1344 if (err) 1398 if (err)
1345 goto out_free; 1399 goto out_free;
1346 1400
1347 pack_inode(c, ino, host, 0, 0); 1401 pack_inode(c, ino, host, 0);
1348 pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); 1402 pack_inode(c, (void *)ino + aligned_len1, inode, 1);
1349 1403
1350 err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); 1404 err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
1351 if (!sync && !err) { 1405 if (!sync && !err) {
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36857b9ed59e..3e0aa7367556 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -317,6 +317,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
317 return 0; 317 return 0;
318 318
319out_unlock: 319out_unlock:
320 if (err != -EAGAIN)
321 ubifs_ro_mode(c, err);
320 mutex_unlock(&c->log_mutex); 322 mutex_unlock(&c->log_mutex);
321 kfree(ref); 323 kfree(ref);
322 kfree(bud); 324 kfree(bud);
@@ -410,7 +412,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
410 return -ENOMEM; 412 return -ENOMEM;
411 413
412 cs->ch.node_type = UBIFS_CS_NODE; 414 cs->ch.node_type = UBIFS_CS_NODE;
413 cs->cmt_no = cpu_to_le64(c->cmt_no + 1); 415 cs->cmt_no = cpu_to_le64(c->cmt_no);
414 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); 416 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
415 417
416 /* 418 /*
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4beccfc256d2..87dabf9fe742 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -80,20 +80,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
80} 80}
81 81
82/** 82/**
83 * ubifs_ro_mode - switch UBIFS to read read-only mode.
84 * @c: UBIFS file-system description object
85 * @err: error code which is the reason of switching to R/O mode
86 */
87static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
88{
89 if (!c->ro_media) {
90 c->ro_media = 1;
91 ubifs_warn("switched to read-only mode, error %d", err);
92 dbg_dump_stack();
93 }
94}
95
96/**
97 * ubifs_compr_present - check if compressor was compiled in. 83 * ubifs_compr_present - check if compressor was compiled in.
98 * @compr_type: compressor type to check 84 * @compr_type: compressor type to check
99 * 85 *
@@ -322,7 +308,7 @@ static inline long long ubifs_reported_space(const struct ubifs_info *c,
322{ 308{
323 int divisor, factor; 309 int divisor, factor;
324 310
325 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); 311 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
326 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; 312 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
327 do_div(free, divisor); 313 do_div(free, divisor);
328 314
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 3afeb9242c6a..02d3462f4d3e 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -310,10 +310,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
310 c->cmt_orphans -= cnt; 310 c->cmt_orphans -= cnt;
311 spin_unlock(&c->orphan_lock); 311 spin_unlock(&c->orphan_lock);
312 if (c->cmt_orphans) 312 if (c->cmt_orphans)
313 orph->cmt_no = cpu_to_le64(c->cmt_no + 1); 313 orph->cmt_no = cpu_to_le64(c->cmt_no);
314 else 314 else
315 /* Mark the last node of the commit */ 315 /* Mark the last node of the commit */
316 orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); 316 orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63));
317 ubifs_assert(c->ohead_offs + len <= c->leb_size); 317 ubifs_assert(c->ohead_offs + len <= c->leb_size);
318 ubifs_assert(c->ohead_lnum >= c->orph_first); 318 ubifs_assert(c->ohead_lnum >= c->orph_first);
319 ubifs_assert(c->ohead_lnum <= c->orph_last); 319 ubifs_assert(c->ohead_lnum <= c->orph_last);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ca1e2d4e03cc..f71e6b8822c4 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -30,7 +30,6 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/ctype.h> 32#include <linux/ctype.h>
33#include <linux/random.h>
34#include <linux/kthread.h> 33#include <linux/kthread.h>
35#include <linux/parser.h> 34#include <linux/parser.h>
36#include <linux/seq_file.h> 35#include <linux/seq_file.h>
@@ -149,7 +148,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
149 if (err) 148 if (err)
150 goto out_invalid; 149 goto out_invalid;
151 150
152 /* Disable readahead */ 151 /* Disable read-ahead */
153 inode->i_mapping->backing_dev_info = &c->bdi; 152 inode->i_mapping->backing_dev_info = &c->bdi;
154 153
155 switch (inode->i_mode & S_IFMT) { 154 switch (inode->i_mode & S_IFMT) {
@@ -278,7 +277,7 @@ static void ubifs_destroy_inode(struct inode *inode)
278 */ 277 */
279static int ubifs_write_inode(struct inode *inode, int wait) 278static int ubifs_write_inode(struct inode *inode, int wait)
280{ 279{
281 int err; 280 int err = 0;
282 struct ubifs_info *c = inode->i_sb->s_fs_info; 281 struct ubifs_info *c = inode->i_sb->s_fs_info;
283 struct ubifs_inode *ui = ubifs_inode(inode); 282 struct ubifs_inode *ui = ubifs_inode(inode);
284 283
@@ -299,10 +298,18 @@ static int ubifs_write_inode(struct inode *inode, int wait)
299 return 0; 298 return 0;
300 } 299 }
301 300
302 dbg_gen("inode %lu", inode->i_ino); 301 /*
303 err = ubifs_jnl_write_inode(c, inode, 0); 302 * As an optimization, do not write orphan inodes to the media just
304 if (err) 303 * because this is not needed.
305 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); 304 */
305 dbg_gen("inode %lu, mode %#x, nlink %u",
306 inode->i_ino, (int)inode->i_mode, inode->i_nlink);
307 if (inode->i_nlink) {
308 err = ubifs_jnl_write_inode(c, inode);
309 if (err)
310 ubifs_err("can't write inode %lu, error %d",
311 inode->i_ino, err);
312 }
306 313
307 ui->dirty = 0; 314 ui->dirty = 0;
308 mutex_unlock(&ui->ui_mutex); 315 mutex_unlock(&ui->ui_mutex);
@@ -314,8 +321,9 @@ static void ubifs_delete_inode(struct inode *inode)
314{ 321{
315 int err; 322 int err;
316 struct ubifs_info *c = inode->i_sb->s_fs_info; 323 struct ubifs_info *c = inode->i_sb->s_fs_info;
324 struct ubifs_inode *ui = ubifs_inode(inode);
317 325
318 if (ubifs_inode(inode)->xattr) 326 if (ui->xattr)
319 /* 327 /*
320 * Extended attribute inode deletions are fully handled in 328 * Extended attribute inode deletions are fully handled in
321 * 'ubifs_removexattr()'. These inodes are special and have 329 * 'ubifs_removexattr()'. These inodes are special and have
@@ -323,7 +331,7 @@ static void ubifs_delete_inode(struct inode *inode)
323 */ 331 */
324 goto out; 332 goto out;
325 333
326 dbg_gen("inode %lu", inode->i_ino); 334 dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
327 ubifs_assert(!atomic_read(&inode->i_count)); 335 ubifs_assert(!atomic_read(&inode->i_count));
328 ubifs_assert(inode->i_nlink == 0); 336 ubifs_assert(inode->i_nlink == 0);
329 337
@@ -331,15 +339,19 @@ static void ubifs_delete_inode(struct inode *inode)
331 if (is_bad_inode(inode)) 339 if (is_bad_inode(inode))
332 goto out; 340 goto out;
333 341
334 ubifs_inode(inode)->ui_size = inode->i_size = 0; 342 ui->ui_size = inode->i_size = 0;
335 err = ubifs_jnl_write_inode(c, inode, 1); 343 err = ubifs_jnl_delete_inode(c, inode);
336 if (err) 344 if (err)
337 /* 345 /*
338 * Worst case we have a lost orphan inode wasting space, so a 346 * Worst case we have a lost orphan inode wasting space, so a
339 * simple error message is ok here. 347 * simple error message is OK here.
340 */ 348 */
341 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); 349 ubifs_err("can't delete inode %lu, error %d",
350 inode->i_ino, err);
351
342out: 352out:
353 if (ui->dirty)
354 ubifs_release_dirty_inode_budget(c, ui);
343 clear_inode(inode); 355 clear_inode(inode);
344} 356}
345 357
@@ -1122,8 +1134,8 @@ static int mount_ubifs(struct ubifs_info *c)
1122 if (err) 1134 if (err)
1123 goto out_infos; 1135 goto out_infos;
1124 1136
1125 ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, 1137 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
1126 c->vi.vol_id); 1138 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
1127 if (mounted_read_only) 1139 if (mounted_read_only)
1128 ubifs_msg("mounted read-only"); 1140 ubifs_msg("mounted read-only");
1129 x = (long long)c->main_lebs * c->leb_size; 1141 x = (long long)c->main_lebs * c->leb_size;
@@ -1469,6 +1481,7 @@ static void ubifs_put_super(struct super_block *sb)
1469 */ 1481 */
1470 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1482 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
1471 ubifs_assert(c->budg_idx_growth == 0); 1483 ubifs_assert(c->budg_idx_growth == 0);
1484 ubifs_assert(c->budg_dd_growth == 0);
1472 ubifs_assert(c->budg_data_growth == 0); 1485 ubifs_assert(c->budg_data_growth == 0);
1473 1486
1474 /* 1487 /*
@@ -1657,7 +1670,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1657 INIT_LIST_HEAD(&c->orph_new); 1670 INIT_LIST_HEAD(&c->orph_new);
1658 1671
1659 c->highest_inum = UBIFS_FIRST_INO; 1672 c->highest_inum = UBIFS_FIRST_INO;
1660 get_random_bytes(&c->vfs_gen, sizeof(int));
1661 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; 1673 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
1662 1674
1663 ubi_get_volume_info(ubi, &c->vi); 1675 ubi_get_volume_info(ubi, &c->vi);
@@ -1671,10 +1683,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1671 } 1683 }
1672 1684
1673 /* 1685 /*
1674 * UBIFS provids 'backing_dev_info' in order to disable readahead. For 1686 * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For
1675 * UBIFS, I/O is not deferred, it is done immediately in readpage, 1687 * UBIFS, I/O is not deferred, it is done immediately in readpage,
1676 * which means the user would have to wait not just for their own I/O 1688 * which means the user would have to wait not just for their own I/O
1677 * but the readahead I/O as well i.e. completely pointless. 1689 * but the read-ahead I/O as well i.e. completely pointless.
1678 * 1690 *
1679 * Read-ahead will be disabled because @c->bdi.ra_pages is 0. 1691 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
1680 */ 1692 */
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8117e65ba2e9..8ac76b1c2d55 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -372,26 +372,25 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
372 written = layout_leb_in_gaps(c, p); 372 written = layout_leb_in_gaps(c, p);
373 if (written < 0) { 373 if (written < 0) {
374 err = written; 374 err = written;
375 if (err == -ENOSPC) { 375 if (err != -ENOSPC) {
376 if (!dbg_force_in_the_gaps_enabled) { 376 kfree(c->gap_lebs);
377 /* 377 c->gap_lebs = NULL;
378 * Do not print scary warnings if the 378 return err;
379 * debugging option which forces
380 * in-the-gaps is enabled.
381 */
382 ubifs_err("out of space");
383 spin_lock(&c->space_lock);
384 dbg_dump_budg(c);
385 spin_unlock(&c->space_lock);
386 dbg_dump_lprops(c);
387 }
388 /* Try to commit anyway */
389 err = 0;
390 break;
391 } 379 }
392 kfree(c->gap_lebs); 380 if (!dbg_force_in_the_gaps_enabled) {
393 c->gap_lebs = NULL; 381 /*
394 return err; 382 * Do not print scary warnings if the debugging
383 * option which forces in-the-gaps is enabled.
384 */
385 ubifs_err("out of space");
386 spin_lock(&c->space_lock);
387 dbg_dump_budg(c);
388 spin_unlock(&c->space_lock);
389 dbg_dump_lprops(c);
390 }
391 /* Try to commit anyway */
392 err = 0;
393 break;
395 } 394 }
396 p++; 395 p++;
397 cnt -= written; 396 cnt -= written;
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0cc7da9bed47..bd2121f3426e 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -228,10 +228,10 @@ enum {
228/* Minimum number of orphan area logical eraseblocks */ 228/* Minimum number of orphan area logical eraseblocks */
229#define UBIFS_MIN_ORPH_LEBS 1 229#define UBIFS_MIN_ORPH_LEBS 1
230/* 230/*
231 * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 231 * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1
232 * for GC, 1 for deletions, and at least 1 for committed data). 232 * for GC, 1 for deletions, and at least 1 for committed data).
233 */ 233 */
234#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) 234#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6)
235 235
236/* Minimum number of logical eraseblocks */ 236/* Minimum number of logical eraseblocks */
237#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ 237#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e4f89f271827..d7f706f7a302 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -20,8 +20,6 @@
20 * Adrian Hunter 20 * Adrian Hunter
21 */ 21 */
22 22
23/* Implementation version 0.7 */
24
25#ifndef __UBIFS_H__ 23#ifndef __UBIFS_H__
26#define __UBIFS_H__ 24#define __UBIFS_H__
27 25
@@ -322,6 +320,8 @@ struct ubifs_gced_idx_leb {
322 * struct ubifs_inode - UBIFS in-memory inode description. 320 * struct ubifs_inode - UBIFS in-memory inode description.
323 * @vfs_inode: VFS inode description object 321 * @vfs_inode: VFS inode description object
324 * @creat_sqnum: sequence number at time of creation 322 * @creat_sqnum: sequence number at time of creation
323 * @del_cmtno: commit number corresponding to the time the inode was deleted,
324 * protected by @c->commit_sem;
325 * @xattr_size: summarized size of all extended attributes in bytes 325 * @xattr_size: summarized size of all extended attributes in bytes
326 * @xattr_cnt: count of extended attributes this inode has 326 * @xattr_cnt: count of extended attributes this inode has
327 * @xattr_names: sum of lengths of all extended attribute names belonging to 327 * @xattr_names: sum of lengths of all extended attribute names belonging to
@@ -373,6 +373,7 @@ struct ubifs_gced_idx_leb {
373struct ubifs_inode { 373struct ubifs_inode {
374 struct inode vfs_inode; 374 struct inode vfs_inode;
375 unsigned long long creat_sqnum; 375 unsigned long long creat_sqnum;
376 unsigned long long del_cmtno;
376 unsigned int xattr_size; 377 unsigned int xattr_size;
377 unsigned int xattr_cnt; 378 unsigned int xattr_cnt;
378 unsigned int xattr_names; 379 unsigned int xattr_names;
@@ -779,7 +780,7 @@ struct ubifs_compressor {
779/** 780/**
780 * struct ubifs_budget_req - budget requirements of an operation. 781 * struct ubifs_budget_req - budget requirements of an operation.
781 * 782 *
782 * @fast: non-zero if the budgeting should try to aquire budget quickly and 783 * @fast: non-zero if the budgeting should try to acquire budget quickly and
783 * should not try to call write-back 784 * should not try to call write-back
784 * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields 785 * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
785 * have to be re-calculated 786 * have to be re-calculated
@@ -805,21 +806,31 @@ struct ubifs_compressor {
805 * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d 806 * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
806 * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made 807 * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
807 * dirty by the re-name operation. 808 * dirty by the re-name operation.
809 *
810 * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to
811 * make sure the amount of inode data which contribute to @new_ino_d and
812 * @dirtied_ino_d fields are aligned.
808 */ 813 */
809struct ubifs_budget_req { 814struct ubifs_budget_req {
810 unsigned int fast:1; 815 unsigned int fast:1;
811 unsigned int recalculate:1; 816 unsigned int recalculate:1;
817#ifndef UBIFS_DEBUG
812 unsigned int new_page:1; 818 unsigned int new_page:1;
813 unsigned int dirtied_page:1; 819 unsigned int dirtied_page:1;
814 unsigned int new_dent:1; 820 unsigned int new_dent:1;
815 unsigned int mod_dent:1; 821 unsigned int mod_dent:1;
816 unsigned int new_ino:1; 822 unsigned int new_ino:1;
817 unsigned int new_ino_d:13; 823 unsigned int new_ino_d:13;
818#ifndef UBIFS_DEBUG
819 unsigned int dirtied_ino:4; 824 unsigned int dirtied_ino:4;
820 unsigned int dirtied_ino_d:15; 825 unsigned int dirtied_ino_d:15;
821#else 826#else
822 /* Not bit-fields to check for overflows */ 827 /* Not bit-fields to check for overflows */
828 unsigned int new_page;
829 unsigned int dirtied_page;
830 unsigned int new_dent;
831 unsigned int mod_dent;
832 unsigned int new_ino;
833 unsigned int new_ino_d;
823 unsigned int dirtied_ino; 834 unsigned int dirtied_ino;
824 unsigned int dirtied_ino_d; 835 unsigned int dirtied_ino_d;
825#endif 836#endif
@@ -860,13 +871,13 @@ struct ubifs_mount_opts {
860 * struct ubifs_info - UBIFS file-system description data structure 871 * struct ubifs_info - UBIFS file-system description data structure
861 * (per-superblock). 872 * (per-superblock).
862 * @vfs_sb: VFS @struct super_block object 873 * @vfs_sb: VFS @struct super_block object
863 * @bdi: backing device info object to make VFS happy and disable readahead 874 * @bdi: backing device info object to make VFS happy and disable read-ahead
864 * 875 *
865 * @highest_inum: highest used inode number 876 * @highest_inum: highest used inode number
866 * @vfs_gen: VFS inode generation counter
867 * @max_sqnum: current global sequence number 877 * @max_sqnum: current global sequence number
868 * @cmt_no: commit number (last successfully completed commit) 878 * @cmt_no: commit number of the last successfully completed commit, protected
869 * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters 879 * by @commit_sem
880 * @cnt_lock: protects @highest_inum and @max_sqnum counters
870 * @fmt_version: UBIFS on-flash format version 881 * @fmt_version: UBIFS on-flash format version
871 * @uuid: UUID from super block 882 * @uuid: UUID from super block
872 * 883 *
@@ -1103,7 +1114,6 @@ struct ubifs_info {
1103 struct backing_dev_info bdi; 1114 struct backing_dev_info bdi;
1104 1115
1105 ino_t highest_inum; 1116 ino_t highest_inum;
1106 unsigned int vfs_gen;
1107 unsigned long long max_sqnum; 1117 unsigned long long max_sqnum;
1108 unsigned long long cmt_no; 1118 unsigned long long cmt_no;
1109 spinlock_t cnt_lock; 1119 spinlock_t cnt_lock;
@@ -1346,6 +1356,7 @@ extern struct backing_dev_info ubifs_backing_dev_info;
1346extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; 1356extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
1347 1357
1348/* io.c */ 1358/* io.c */
1359void ubifs_ro_mode(struct ubifs_info *c, int err);
1349int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); 1360int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
1350int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 1361int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
1351 int dtype); 1362 int dtype);
@@ -1399,8 +1410,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
1399 int deletion, int xent); 1410 int deletion, int xent);
1400int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, 1411int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
1401 const union ubifs_key *key, const void *buf, int len); 1412 const union ubifs_key *key, const void *buf, int len);
1402int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, 1413int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode);
1403 int last_reference); 1414int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode);
1404int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, 1415int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
1405 const struct dentry *old_dentry, 1416 const struct dentry *old_dentry,
1406 const struct inode *new_dir, 1417 const struct inode *new_dir,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 1388a078e1a9..649bec78b645 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -61,7 +61,7 @@
61 61
62/* 62/*
63 * Limit the number of extended attributes per inode so that the total size 63 * Limit the number of extended attributes per inode so that the total size
64 * (xattr_size) is guaranteeded to fit in an 'unsigned int'. 64 * (@xattr_size) is guaranteeded to fit in an 'unsigned int'.
65 */ 65 */
66#define MAX_XATTRS_PER_INODE 65535 66#define MAX_XATTRS_PER_INODE 65535
67 67
@@ -103,14 +103,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
103 struct inode *inode; 103 struct inode *inode;
104 struct ubifs_inode *ui, *host_ui = ubifs_inode(host); 104 struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
105 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, 105 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
106 .new_ino_d = size, .dirtied_ino = 1, 106 .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1,
107 .dirtied_ino_d = host_ui->data_len}; 107 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
108 108
109 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) 109 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
110 return -ENOSPC; 110 return -ENOSPC;
111 /* 111 /*
112 * Linux limits the maximum size of the extended attribute names list 112 * Linux limits the maximum size of the extended attribute names list
113 * to %XATTR_LIST_MAX. This means we should not allow creating more* 113 * to %XATTR_LIST_MAX. This means we should not allow creating more
114 * extended attributes if the name list becomes larger. This limitation 114 * extended attributes if the name list becomes larger. This limitation
115 * is artificial for UBIFS, though. 115 * is artificial for UBIFS, though.
116 */ 116 */
@@ -128,7 +128,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
128 goto out_budg; 128 goto out_budg;
129 } 129 }
130 130
131 mutex_lock(&host_ui->ui_mutex);
132 /* Re-define all operations to be "nothing" */ 131 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations; 132 inode->i_mapping->a_ops = &none_address_operations;
134 inode->i_op = &none_inode_operations; 133 inode->i_op = &none_inode_operations;
@@ -141,23 +140,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
141 ui->data = kmalloc(size, GFP_NOFS); 140 ui->data = kmalloc(size, GFP_NOFS);
142 if (!ui->data) { 141 if (!ui->data) {
143 err = -ENOMEM; 142 err = -ENOMEM;
144 goto out_unlock; 143 goto out_free;
145 } 144 }
146
147 memcpy(ui->data, value, size); 145 memcpy(ui->data, value, size);
146 inode->i_size = ui->ui_size = size;
147 ui->data_len = size;
148
149 mutex_lock(&host_ui->ui_mutex);
148 host->i_ctime = ubifs_current_time(host); 150 host->i_ctime = ubifs_current_time(host);
149 host_ui->xattr_cnt += 1; 151 host_ui->xattr_cnt += 1;
150 host_ui->xattr_size += CALC_DENT_SIZE(nm->len); 152 host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
151 host_ui->xattr_size += CALC_XATTR_BYTES(size); 153 host_ui->xattr_size += CALC_XATTR_BYTES(size);
152 host_ui->xattr_names += nm->len; 154 host_ui->xattr_names += nm->len;
153 155
154 /*
155 * We do not use i_size_write() because nobody can race with us as we
156 * are holding host @host->i_mutex - every xattr operation for this
157 * inode is serialized by it.
158 */
159 inode->i_size = ui->ui_size = size;
160 ui->data_len = size;
161 err = ubifs_jnl_update(c, host, nm, inode, 0, 1); 156 err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
162 if (err) 157 if (err)
163 goto out_cancel; 158 goto out_cancel;
@@ -172,8 +167,8 @@ out_cancel:
172 host_ui->xattr_cnt -= 1; 167 host_ui->xattr_cnt -= 1;
173 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); 168 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
174 host_ui->xattr_size -= CALC_XATTR_BYTES(size); 169 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
175out_unlock:
176 mutex_unlock(&host_ui->ui_mutex); 170 mutex_unlock(&host_ui->ui_mutex);
171out_free:
177 make_bad_inode(inode); 172 make_bad_inode(inode);
178 iput(inode); 173 iput(inode);
179out_budg: 174out_budg:
@@ -200,29 +195,28 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
200 struct ubifs_inode *host_ui = ubifs_inode(host); 195 struct ubifs_inode *host_ui = ubifs_inode(host);
201 struct ubifs_inode *ui = ubifs_inode(inode); 196 struct ubifs_inode *ui = ubifs_inode(inode);
202 struct ubifs_budget_req req = { .dirtied_ino = 2, 197 struct ubifs_budget_req req = { .dirtied_ino = 2,
203 .dirtied_ino_d = size + host_ui->data_len }; 198 .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) };
204 199
205 ubifs_assert(ui->data_len == inode->i_size); 200 ubifs_assert(ui->data_len == inode->i_size);
206 err = ubifs_budget_space(c, &req); 201 err = ubifs_budget_space(c, &req);
207 if (err) 202 if (err)
208 return err; 203 return err;
209 204
210 mutex_lock(&host_ui->ui_mutex);
211 host->i_ctime = ubifs_current_time(host);
212 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
213 host_ui->xattr_size += CALC_XATTR_BYTES(size);
214
215 kfree(ui->data); 205 kfree(ui->data);
216 ui->data = kmalloc(size, GFP_NOFS); 206 ui->data = kmalloc(size, GFP_NOFS);
217 if (!ui->data) { 207 if (!ui->data) {
218 err = -ENOMEM; 208 err = -ENOMEM;
219 goto out_unlock; 209 goto out_free;
220 } 210 }
221
222 memcpy(ui->data, value, size); 211 memcpy(ui->data, value, size);
223 inode->i_size = ui->ui_size = size; 212 inode->i_size = ui->ui_size = size;
224 ui->data_len = size; 213 ui->data_len = size;
225 214
215 mutex_lock(&host_ui->ui_mutex);
216 host->i_ctime = ubifs_current_time(host);
217 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
218 host_ui->xattr_size += CALC_XATTR_BYTES(size);
219
226 /* 220 /*
227 * It is important to write the host inode after the xattr inode 221 * It is important to write the host inode after the xattr inode
228 * because if the host inode gets synchronized (via 'fsync()'), then 222 * because if the host inode gets synchronized (via 'fsync()'), then
@@ -240,9 +234,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
240out_cancel: 234out_cancel:
241 host_ui->xattr_size -= CALC_XATTR_BYTES(size); 235 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
242 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); 236 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
243 make_bad_inode(inode);
244out_unlock:
245 mutex_unlock(&host_ui->ui_mutex); 237 mutex_unlock(&host_ui->ui_mutex);
238 make_bad_inode(inode);
239out_free:
246 ubifs_release_budget(c, &req); 240 ubifs_release_budget(c, &req);
247 return err; 241 return err;
248} 242}
@@ -312,6 +306,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
312 306
313 dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, 307 dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
314 host->i_ino, dentry->d_name.len, dentry->d_name.name, size); 308 host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
309 ubifs_assert(mutex_is_locked(&host->i_mutex));
315 310
316 if (size > UBIFS_MAX_INO_DATA) 311 if (size > UBIFS_MAX_INO_DATA)
317 return -ERANGE; 312 return -ERANGE;
@@ -384,7 +379,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
384 if (!xent) 379 if (!xent)
385 return -ENOMEM; 380 return -ENOMEM;
386 381
387 mutex_lock(&host->i_mutex);
388 xent_key_init(c, &key, host->i_ino, &nm); 382 xent_key_init(c, &key, host->i_ino, &nm);
389 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); 383 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
390 if (err) { 384 if (err) {
@@ -419,7 +413,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
419out_iput: 413out_iput:
420 iput(inode); 414 iput(inode);
421out_unlock: 415out_unlock:
422 mutex_unlock(&host->i_mutex);
423 kfree(xent); 416 kfree(xent);
424 return err; 417 return err;
425} 418}
@@ -449,8 +442,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
449 return -ERANGE; 442 return -ERANGE;
450 443
451 lowest_xent_key(c, &key, host->i_ino); 444 lowest_xent_key(c, &key, host->i_ino);
452
453 mutex_lock(&host->i_mutex);
454 while (1) { 445 while (1) {
455 int type; 446 int type;
456 447
@@ -479,7 +470,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
479 pxent = xent; 470 pxent = xent;
480 key_read(c, &xent->key, &key); 471 key_read(c, &xent->key, &key);
481 } 472 }
482 mutex_unlock(&host->i_mutex);
483 473
484 kfree(pxent); 474 kfree(pxent);
485 if (err != -ENOENT) { 475 if (err != -ENOENT) {
@@ -497,8 +487,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
497 int err; 487 int err;
498 struct ubifs_inode *host_ui = ubifs_inode(host); 488 struct ubifs_inode *host_ui = ubifs_inode(host);
499 struct ubifs_inode *ui = ubifs_inode(inode); 489 struct ubifs_inode *ui = ubifs_inode(inode);
500 struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, 490 struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1,
501 .dirtied_ino_d = host_ui->data_len }; 491 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
502 492
503 ubifs_assert(ui->data_len == inode->i_size); 493 ubifs_assert(ui->data_len == inode->i_size);
504 494
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
deleted file mode 100644
index 3abe7e9ceb33..000000000000
--- a/fs/xfs/linux-2.6/sema.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_SEMA_H__
19#define __XFS_SUPPORT_SEMA_H__
20
21#include <linux/time.h>
22#include <linux/wait.h>
23#include <linux/semaphore.h>
24#include <asm/atomic.h>
25
26/*
27 * sema_t structure just maps to struct semaphore in Linux kernel.
28 */
29
30typedef struct semaphore sema_t;
31
32#define initnsema(sp, val, name) sema_init(sp, val)
33#define psema(sp, b) down(sp)
34#define vsema(sp) up(sp)
35#define freesema(sema) do { } while (0)
36
37static inline int issemalocked(sema_t *sp)
38{
39 return down_trylock(sp) || (up(sp), 0);
40}
41
42/*
43 * Map cpsema (try to get the sema) to down_trylock. We need to switch
44 * the return values since cpsema returns 1 (acquired) 0 (failed) and
45 * down_trylock returns the reverse 0 (acquired) 1 (failed).
46 */
47static inline int cpsema(sema_t *sp)
48{
49 return down_trylock(sp) ? 0 : 1;
50}
51
52#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index fa47e43b8b41..f42f80a3b1fa 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -73,7 +73,6 @@ xfs_page_trace(
73 unsigned long pgoff) 73 unsigned long pgoff)
74{ 74{
75 xfs_inode_t *ip; 75 xfs_inode_t *ip;
76 bhv_vnode_t *vp = vn_from_inode(inode);
77 loff_t isize = i_size_read(inode); 76 loff_t isize = i_size_read(inode);
78 loff_t offset = page_offset(page); 77 loff_t offset = page_offset(page);
79 int delalloc = -1, unmapped = -1, unwritten = -1; 78 int delalloc = -1, unmapped = -1, unwritten = -1;
@@ -81,7 +80,7 @@ xfs_page_trace(
81 if (page_has_buffers(page)) 80 if (page_has_buffers(page))
82 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 81 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
83 82
84 ip = xfs_vtoi(vp); 83 ip = XFS_I(inode);
85 if (!ip->i_rwtrace) 84 if (!ip->i_rwtrace)
86 return; 85 return;
87 86
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9cc8f0213095..986061ae1b9b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -58,7 +58,7 @@ xfs_buf_trace(
58 bp, id, 58 bp, id,
59 (void *)(unsigned long)bp->b_flags, 59 (void *)(unsigned long)bp->b_flags,
60 (void *)(unsigned long)bp->b_hold.counter, 60 (void *)(unsigned long)bp->b_hold.counter,
61 (void *)(unsigned long)bp->b_sema.count.counter, 61 (void *)(unsigned long)bp->b_sema.count,
62 (void *)current, 62 (void *)current,
63 data, ra, 63 data, ra,
64 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), 64 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
@@ -253,7 +253,7 @@ _xfs_buf_initialize(
253 253
254 memset(bp, 0, sizeof(xfs_buf_t)); 254 memset(bp, 0, sizeof(xfs_buf_t));
255 atomic_set(&bp->b_hold, 1); 255 atomic_set(&bp->b_hold, 1);
256 init_MUTEX_LOCKED(&bp->b_iodonesema); 256 init_completion(&bp->b_iowait);
257 INIT_LIST_HEAD(&bp->b_list); 257 INIT_LIST_HEAD(&bp->b_list);
258 INIT_LIST_HEAD(&bp->b_hash_list); 258 INIT_LIST_HEAD(&bp->b_hash_list);
259 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ 259 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
@@ -838,6 +838,7 @@ xfs_buf_rele(
838 return; 838 return;
839 } 839 }
840 840
841 ASSERT(atomic_read(&bp->b_hold) > 0);
841 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { 842 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
842 if (bp->b_relse) { 843 if (bp->b_relse) {
843 atomic_inc(&bp->b_hold); 844 atomic_inc(&bp->b_hold);
@@ -851,11 +852,6 @@ xfs_buf_rele(
851 spin_unlock(&hash->bh_lock); 852 spin_unlock(&hash->bh_lock);
852 xfs_buf_free(bp); 853 xfs_buf_free(bp);
853 } 854 }
854 } else {
855 /*
856 * Catch reference count leaks
857 */
858 ASSERT(atomic_read(&bp->b_hold) >= 0);
859 } 855 }
860} 856}
861 857
@@ -1037,7 +1033,7 @@ xfs_buf_ioend(
1037 xfs_buf_iodone_work(&bp->b_iodone_work); 1033 xfs_buf_iodone_work(&bp->b_iodone_work);
1038 } 1034 }
1039 } else { 1035 } else {
1040 up(&bp->b_iodonesema); 1036 complete(&bp->b_iowait);
1041 } 1037 }
1042} 1038}
1043 1039
@@ -1275,7 +1271,7 @@ xfs_buf_iowait(
1275 XB_TRACE(bp, "iowait", 0); 1271 XB_TRACE(bp, "iowait", 0);
1276 if (atomic_read(&bp->b_io_remaining)) 1272 if (atomic_read(&bp->b_io_remaining))
1277 blk_run_address_space(bp->b_target->bt_mapping); 1273 blk_run_address_space(bp->b_target->bt_mapping);
1278 down(&bp->b_iodonesema); 1274 wait_for_completion(&bp->b_iowait);
1279 XB_TRACE(bp, "iowaited", (long)bp->b_error); 1275 XB_TRACE(bp, "iowaited", (long)bp->b_error);
1280 return bp->b_error; 1276 return bp->b_error;
1281} 1277}
@@ -1799,7 +1795,7 @@ int __init
1799xfs_buf_init(void) 1795xfs_buf_init(void)
1800{ 1796{
1801#ifdef XFS_BUF_TRACE 1797#ifdef XFS_BUF_TRACE
1802 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); 1798 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
1803#endif 1799#endif
1804 1800
1805 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", 1801 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 29d1d4adc078..fe0109956656 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -157,7 +157,7 @@ typedef struct xfs_buf {
157 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 157 xfs_buf_iodone_t b_iodone; /* I/O completion function */
158 xfs_buf_relse_t b_relse; /* releasing function */ 158 xfs_buf_relse_t b_relse; /* releasing function */
159 xfs_buf_bdstrat_t b_strat; /* pre-write function */ 159 xfs_buf_bdstrat_t b_strat; /* pre-write function */
160 struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ 160 struct completion b_iowait; /* queue for I/O waiters */
161 void *b_fspriv; 161 void *b_fspriv;
162 void *b_fspriv2; 162 void *b_fspriv2;
163 void *b_fspriv3; 163 void *b_fspriv3;
@@ -352,7 +352,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
352#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) 352#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
353#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) 353#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
354#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) 354#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
355#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); 355#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
356 356
357#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) 357#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
358#define XFS_BUF_TARGET(bp) ((bp)->b_target) 358#define XFS_BUF_TARGET(bp) ((bp)->b_target)
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 987fe84f7b13..24fd598af846 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -139,7 +139,7 @@ xfs_nfs_get_inode(
139 } 139 }
140 140
141 xfs_iunlock(ip, XFS_ILOCK_SHARED); 141 xfs_iunlock(ip, XFS_ILOCK_SHARED);
142 return ip->i_vnode; 142 return VFS_I(ip);
143} 143}
144 144
145STATIC struct dentry * 145STATIC struct dentry *
@@ -167,7 +167,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
167 if (!inode) 167 if (!inode)
168 return NULL; 168 return NULL;
169 if (IS_ERR(inode)) 169 if (IS_ERR(inode))
170 return ERR_PTR(PTR_ERR(inode)); 170 return ERR_CAST(inode);
171 result = d_alloc_anon(inode); 171 result = d_alloc_anon(inode);
172 if (!result) { 172 if (!result) {
173 iput(inode); 173 iput(inode);
@@ -198,7 +198,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
198 if (!inode) 198 if (!inode)
199 return NULL; 199 return NULL;
200 if (IS_ERR(inode)) 200 if (IS_ERR(inode))
201 return ERR_PTR(PTR_ERR(inode)); 201 return ERR_CAST(inode);
202 result = d_alloc_anon(inode); 202 result = d_alloc_anon(inode);
203 if (!result) { 203 if (!result) {
204 iput(inode); 204 iput(inode);
@@ -219,9 +219,9 @@ xfs_fs_get_parent(
219 if (unlikely(error)) 219 if (unlikely(error))
220 return ERR_PTR(-error); 220 return ERR_PTR(-error);
221 221
222 parent = d_alloc_anon(cip->i_vnode); 222 parent = d_alloc_anon(VFS_I(cip));
223 if (unlikely(!parent)) { 223 if (unlikely(!parent)) {
224 iput(cip->i_vnode); 224 iput(VFS_I(cip));
225 return ERR_PTR(-ENOMEM); 225 return ERR_PTR(-ENOMEM);
226 } 226 }
227 return parent; 227 return parent;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 5f60363b9343..5311c1acdd40 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = {
475const struct file_operations xfs_dir_file_operations = { 475const struct file_operations xfs_dir_file_operations = {
476 .read = generic_read_dir, 476 .read = generic_read_dir,
477 .readdir = xfs_file_readdir, 477 .readdir = xfs_file_readdir,
478 .llseek = generic_file_llseek,
478 .unlocked_ioctl = xfs_file_ioctl, 479 .unlocked_ioctl = xfs_file_ioctl,
479#ifdef CONFIG_COMPAT 480#ifdef CONFIG_COMPAT
480 .compat_ioctl = xfs_file_compat_ioctl, 481 .compat_ioctl = xfs_file_compat_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 1eefe61f0e10..36caa6d957df 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -31,7 +31,7 @@ xfs_tosspages(
31 xfs_off_t last, 31 xfs_off_t last,
32 int fiopt) 32 int fiopt)
33{ 33{
34 struct address_space *mapping = ip->i_vnode->i_mapping; 34 struct address_space *mapping = VFS_I(ip)->i_mapping;
35 35
36 if (mapping->nrpages) 36 if (mapping->nrpages)
37 truncate_inode_pages(mapping, first); 37 truncate_inode_pages(mapping, first);
@@ -44,7 +44,7 @@ xfs_flushinval_pages(
44 xfs_off_t last, 44 xfs_off_t last,
45 int fiopt) 45 int fiopt)
46{ 46{
47 struct address_space *mapping = ip->i_vnode->i_mapping; 47 struct address_space *mapping = VFS_I(ip)->i_mapping;
48 int ret = 0; 48 int ret = 0;
49 49
50 if (mapping->nrpages) { 50 if (mapping->nrpages) {
@@ -64,7 +64,7 @@ xfs_flush_pages(
64 uint64_t flags, 64 uint64_t flags,
65 int fiopt) 65 int fiopt)
66{ 66{
67 struct address_space *mapping = ip->i_vnode->i_mapping; 67 struct address_space *mapping = VFS_I(ip)->i_mapping;
68 int ret = 0; 68 int ret = 0;
69 int ret2; 69 int ret2;
70 70
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index acb978d9d085..48799ba7e3e6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -245,7 +245,7 @@ xfs_vget_fsop_handlereq(
245 245
246 xfs_iunlock(ip, XFS_ILOCK_SHARED); 246 xfs_iunlock(ip, XFS_ILOCK_SHARED);
247 247
248 *inode = XFS_ITOV(ip); 248 *inode = VFS_I(ip);
249 return 0; 249 return 0;
250} 250}
251 251
@@ -927,7 +927,7 @@ STATIC void
927xfs_diflags_to_linux( 927xfs_diflags_to_linux(
928 struct xfs_inode *ip) 928 struct xfs_inode *ip)
929{ 929{
930 struct inode *inode = XFS_ITOV(ip); 930 struct inode *inode = VFS_I(ip);
931 unsigned int xflags = xfs_ip2xflags(ip); 931 unsigned int xflags = xfs_ip2xflags(ip);
932 932
933 if (xflags & XFS_XFLAG_IMMUTABLE) 933 if (xflags & XFS_XFLAG_IMMUTABLE)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index e88f51028086..095d271f3434 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -62,7 +62,7 @@ void
62xfs_synchronize_atime( 62xfs_synchronize_atime(
63 xfs_inode_t *ip) 63 xfs_inode_t *ip)
64{ 64{
65 struct inode *inode = ip->i_vnode; 65 struct inode *inode = VFS_I(ip);
66 66
67 if (inode) { 67 if (inode) {
68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
@@ -79,7 +79,7 @@ void
79xfs_mark_inode_dirty_sync( 79xfs_mark_inode_dirty_sync(
80 xfs_inode_t *ip) 80 xfs_inode_t *ip)
81{ 81{
82 struct inode *inode = ip->i_vnode; 82 struct inode *inode = VFS_I(ip);
83 83
84 if (inode) 84 if (inode)
85 mark_inode_dirty_sync(inode); 85 mark_inode_dirty_sync(inode);
@@ -89,36 +89,31 @@ xfs_mark_inode_dirty_sync(
89 * Change the requested timestamp in the given inode. 89 * Change the requested timestamp in the given inode.
90 * We don't lock across timestamp updates, and we don't log them but 90 * We don't lock across timestamp updates, and we don't log them but
91 * we do record the fact that there is dirty information in core. 91 * we do record the fact that there is dirty information in core.
92 *
93 * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG
94 * with XFS_ICHGTIME_ACC to be sure that access time
95 * update will take. Calling first with XFS_ICHGTIME_ACC
96 * and then XFS_ICHGTIME_MOD may fail to modify the access
97 * timestamp if the filesystem is mounted noacctm.
98 */ 92 */
99void 93void
100xfs_ichgtime( 94xfs_ichgtime(
101 xfs_inode_t *ip, 95 xfs_inode_t *ip,
102 int flags) 96 int flags)
103{ 97{
104 struct inode *inode = vn_to_inode(XFS_ITOV(ip)); 98 struct inode *inode = VFS_I(ip);
105 timespec_t tv; 99 timespec_t tv;
100 int sync_it = 0;
101
102 tv = current_fs_time(inode->i_sb);
106 103
107 nanotime(&tv); 104 if ((flags & XFS_ICHGTIME_MOD) &&
108 if (flags & XFS_ICHGTIME_MOD) { 105 !timespec_equal(&inode->i_mtime, &tv)) {
109 inode->i_mtime = tv; 106 inode->i_mtime = tv;
110 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 107 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
111 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 108 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
109 sync_it = 1;
112 } 110 }
113 if (flags & XFS_ICHGTIME_ACC) { 111 if ((flags & XFS_ICHGTIME_CHG) &&
114 inode->i_atime = tv; 112 !timespec_equal(&inode->i_ctime, &tv)) {
115 ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
116 ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
117 }
118 if (flags & XFS_ICHGTIME_CHG) {
119 inode->i_ctime = tv; 113 inode->i_ctime = tv;
120 ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; 114 ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
121 ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; 115 ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
116 sync_it = 1;
122 } 117 }
123 118
124 /* 119 /*
@@ -130,55 +125,11 @@ xfs_ichgtime(
130 * ensure that the compiler does not reorder the update 125 * ensure that the compiler does not reorder the update
131 * of i_update_core above the timestamp updates above. 126 * of i_update_core above the timestamp updates above.
132 */ 127 */
133 SYNCHRONIZE(); 128 if (sync_it) {
134 ip->i_update_core = 1; 129 SYNCHRONIZE();
135 if (!(inode->i_state & I_NEW)) 130 ip->i_update_core = 1;
136 mark_inode_dirty_sync(inode); 131 mark_inode_dirty_sync(inode);
137}
138
139/*
140 * Variant on the above which avoids querying the system clock
141 * in situations where we know the Linux inode timestamps have
142 * just been updated (and so we can update our inode cheaply).
143 */
144void
145xfs_ichgtime_fast(
146 xfs_inode_t *ip,
147 struct inode *inode,
148 int flags)
149{
150 timespec_t *tvp;
151
152 /*
153 * Atime updates for read() & friends are handled lazily now, and
154 * explicit updates must go through xfs_ichgtime()
155 */
156 ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
157
158 if (flags & XFS_ICHGTIME_MOD) {
159 tvp = &inode->i_mtime;
160 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
161 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
162 } 132 }
163 if (flags & XFS_ICHGTIME_CHG) {
164 tvp = &inode->i_ctime;
165 ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
166 ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec;
167 }
168
169 /*
170 * We update the i_update_core field _after_ changing
171 * the timestamps in order to coordinate properly with
172 * xfs_iflush() so that we don't lose timestamp updates.
173 * This keeps us from having to hold the inode lock
174 * while doing this. We use the SYNCHRONIZE macro to
175 * ensure that the compiler does not reorder the update
176 * of i_update_core above the timestamp updates above.
177 */
178 SYNCHRONIZE();
179 ip->i_update_core = 1;
180 if (!(inode->i_state & I_NEW))
181 mark_inode_dirty_sync(inode);
182} 133}
183 134
184/* 135/*
@@ -299,7 +250,7 @@ xfs_vn_mknod(
299 if (unlikely(error)) 250 if (unlikely(error))
300 goto out_free_acl; 251 goto out_free_acl;
301 252
302 inode = ip->i_vnode; 253 inode = VFS_I(ip);
303 254
304 error = xfs_init_security(inode, dir); 255 error = xfs_init_security(inode, dir);
305 if (unlikely(error)) 256 if (unlikely(error))
@@ -366,7 +317,7 @@ xfs_vn_lookup(
366 return NULL; 317 return NULL;
367 } 318 }
368 319
369 return d_splice_alias(cip->i_vnode, dentry); 320 return d_splice_alias(VFS_I(cip), dentry);
370} 321}
371 322
372STATIC struct dentry * 323STATIC struct dentry *
@@ -399,12 +350,12 @@ xfs_vn_ci_lookup(
399 350
400 /* if exact match, just splice and exit */ 351 /* if exact match, just splice and exit */
401 if (!ci_name.name) 352 if (!ci_name.name)
402 return d_splice_alias(ip->i_vnode, dentry); 353 return d_splice_alias(VFS_I(ip), dentry);
403 354
404 /* else case-insensitive match... */ 355 /* else case-insensitive match... */
405 dname.name = ci_name.name; 356 dname.name = ci_name.name;
406 dname.len = ci_name.len; 357 dname.len = ci_name.len;
407 dentry = d_add_ci(ip->i_vnode, dentry, &dname); 358 dentry = d_add_ci(dentry, VFS_I(ip), &dname);
408 kmem_free(ci_name.name); 359 kmem_free(ci_name.name);
409 return dentry; 360 return dentry;
410} 361}
@@ -478,7 +429,7 @@ xfs_vn_symlink(
478 if (unlikely(error)) 429 if (unlikely(error))
479 goto out; 430 goto out;
480 431
481 inode = cip->i_vnode; 432 inode = VFS_I(cip);
482 433
483 error = xfs_init_security(inode, dir); 434 error = xfs_init_security(inode, dir);
484 if (unlikely(error)) 435 if (unlikely(error))
@@ -710,7 +661,7 @@ out_error:
710 return error; 661 return error;
711} 662}
712 663
713const struct inode_operations xfs_inode_operations = { 664static const struct inode_operations xfs_inode_operations = {
714 .permission = xfs_vn_permission, 665 .permission = xfs_vn_permission,
715 .truncate = xfs_vn_truncate, 666 .truncate = xfs_vn_truncate,
716 .getattr = xfs_vn_getattr, 667 .getattr = xfs_vn_getattr,
@@ -722,7 +673,7 @@ const struct inode_operations xfs_inode_operations = {
722 .fallocate = xfs_vn_fallocate, 673 .fallocate = xfs_vn_fallocate,
723}; 674};
724 675
725const struct inode_operations xfs_dir_inode_operations = { 676static const struct inode_operations xfs_dir_inode_operations = {
726 .create = xfs_vn_create, 677 .create = xfs_vn_create,
727 .lookup = xfs_vn_lookup, 678 .lookup = xfs_vn_lookup,
728 .link = xfs_vn_link, 679 .link = xfs_vn_link,
@@ -747,7 +698,7 @@ const struct inode_operations xfs_dir_inode_operations = {
747 .listxattr = xfs_vn_listxattr, 698 .listxattr = xfs_vn_listxattr,
748}; 699};
749 700
750const struct inode_operations xfs_dir_ci_inode_operations = { 701static const struct inode_operations xfs_dir_ci_inode_operations = {
751 .create = xfs_vn_create, 702 .create = xfs_vn_create,
752 .lookup = xfs_vn_ci_lookup, 703 .lookup = xfs_vn_ci_lookup,
753 .link = xfs_vn_link, 704 .link = xfs_vn_link,
@@ -772,7 +723,7 @@ const struct inode_operations xfs_dir_ci_inode_operations = {
772 .listxattr = xfs_vn_listxattr, 723 .listxattr = xfs_vn_listxattr,
773}; 724};
774 725
775const struct inode_operations xfs_symlink_inode_operations = { 726static const struct inode_operations xfs_symlink_inode_operations = {
776 .readlink = generic_readlink, 727 .readlink = generic_readlink,
777 .follow_link = xfs_vn_follow_link, 728 .follow_link = xfs_vn_follow_link,
778 .put_link = xfs_vn_put_link, 729 .put_link = xfs_vn_put_link,
@@ -784,3 +735,98 @@ const struct inode_operations xfs_symlink_inode_operations = {
784 .removexattr = generic_removexattr, 735 .removexattr = generic_removexattr,
785 .listxattr = xfs_vn_listxattr, 736 .listxattr = xfs_vn_listxattr,
786}; 737};
738
739STATIC void
740xfs_diflags_to_iflags(
741 struct inode *inode,
742 struct xfs_inode *ip)
743{
744 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
745 inode->i_flags |= S_IMMUTABLE;
746 else
747 inode->i_flags &= ~S_IMMUTABLE;
748 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
749 inode->i_flags |= S_APPEND;
750 else
751 inode->i_flags &= ~S_APPEND;
752 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
753 inode->i_flags |= S_SYNC;
754 else
755 inode->i_flags &= ~S_SYNC;
756 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
757 inode->i_flags |= S_NOATIME;
758 else
759 inode->i_flags &= ~S_NOATIME;
760}
761
762/*
763 * Initialize the Linux inode, set up the operation vectors and
764 * unlock the inode.
765 *
766 * When reading existing inodes from disk this is called directly
767 * from xfs_iget, when creating a new inode it is called from
768 * xfs_ialloc after setting up the inode.
769 */
770void
771xfs_setup_inode(
772 struct xfs_inode *ip)
773{
774 struct inode *inode = ip->i_vnode;
775
776 inode->i_mode = ip->i_d.di_mode;
777 inode->i_nlink = ip->i_d.di_nlink;
778 inode->i_uid = ip->i_d.di_uid;
779 inode->i_gid = ip->i_d.di_gid;
780
781 switch (inode->i_mode & S_IFMT) {
782 case S_IFBLK:
783 case S_IFCHR:
784 inode->i_rdev =
785 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
786 sysv_minor(ip->i_df.if_u2.if_rdev));
787 break;
788 default:
789 inode->i_rdev = 0;
790 break;
791 }
792
793 inode->i_generation = ip->i_d.di_gen;
794 i_size_write(inode, ip->i_d.di_size);
795 inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
796 inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
797 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
798 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
799 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
800 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
801 xfs_diflags_to_iflags(inode, ip);
802 xfs_iflags_clear(ip, XFS_IMODIFIED);
803
804 switch (inode->i_mode & S_IFMT) {
805 case S_IFREG:
806 inode->i_op = &xfs_inode_operations;
807 inode->i_fop = &xfs_file_operations;
808 inode->i_mapping->a_ops = &xfs_address_space_operations;
809 break;
810 case S_IFDIR:
811 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
812 inode->i_op = &xfs_dir_ci_inode_operations;
813 else
814 inode->i_op = &xfs_dir_inode_operations;
815 inode->i_fop = &xfs_dir_file_operations;
816 break;
817 case S_IFLNK:
818 inode->i_op = &xfs_symlink_inode_operations;
819 if (!(ip->i_df.if_flags & XFS_IFINLINE))
820 inode->i_mapping->a_ops = &xfs_address_space_operations;
821 break;
822 default:
823 inode->i_op = &xfs_inode_operations;
824 init_special_inode(inode, inode->i_mode, inode->i_rdev);
825 break;
826 }
827
828 xfs_iflags_clear(ip, XFS_INEW);
829 barrier();
830
831 unlock_new_inode(inode);
832}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index d97ba934a2ac..8b1a1e31dc21 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -18,10 +18,7 @@
18#ifndef __XFS_IOPS_H__ 18#ifndef __XFS_IOPS_H__
19#define __XFS_IOPS_H__ 19#define __XFS_IOPS_H__
20 20
21extern const struct inode_operations xfs_inode_operations; 21struct xfs_inode;
22extern const struct inode_operations xfs_dir_inode_operations;
23extern const struct inode_operations xfs_dir_ci_inode_operations;
24extern const struct inode_operations xfs_symlink_inode_operations;
25 22
26extern const struct file_operations xfs_file_operations; 23extern const struct file_operations xfs_file_operations;
27extern const struct file_operations xfs_dir_file_operations; 24extern const struct file_operations xfs_dir_file_operations;
@@ -29,14 +26,6 @@ extern const struct file_operations xfs_invis_file_operations;
29 26
30extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); 27extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
31 28
32struct xfs_inode; 29extern void xfs_setup_inode(struct xfs_inode *);
33extern void xfs_ichgtime(struct xfs_inode *, int);
34extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int);
35
36#define xfs_vtoi(vp) \
37 ((struct xfs_inode *)vn_to_inode(vp)->i_private)
38
39#define XFS_I(inode) \
40 ((struct xfs_inode *)(inode)->i_private)
41 30
42#endif /* __XFS_IOPS_H__ */ 31#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4d45d9351a6c..cc0f7b3a9795 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -45,13 +45,13 @@
45#include <mrlock.h> 45#include <mrlock.h>
46#include <sv.h> 46#include <sv.h>
47#include <mutex.h> 47#include <mutex.h>
48#include <sema.h>
49#include <time.h> 48#include <time.h>
50 49
51#include <support/ktrace.h> 50#include <support/ktrace.h>
52#include <support/debug.h> 51#include <support/debug.h>
53#include <support/uuid.h> 52#include <support/uuid.h>
54 53
54#include <linux/semaphore.h>
55#include <linux/mm.h> 55#include <linux/mm.h>
56#include <linux/kernel.h> 56#include <linux/kernel.h>
57#include <linux/blkdev.h> 57#include <linux/blkdev.h>
@@ -126,8 +126,6 @@
126 126
127#define current_cpu() (raw_smp_processor_id()) 127#define current_cpu() (raw_smp_processor_id())
128#define current_pid() (current->pid) 128#define current_pid() (current->pid)
129#define current_fsuid(cred) (current->fsuid)
130#define current_fsgid(cred) (current->fsgid)
131#define current_test_flags(f) (current->flags & (f)) 129#define current_test_flags(f) (current->flags & (f))
132#define current_set_flags_nested(sp, f) \ 130#define current_set_flags_nested(sp, f) \
133 (*(sp) = current->flags, current->flags |= (f)) 131 (*(sp) = current->flags, current->flags |= (f))
@@ -180,7 +178,7 @@
180#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) 178#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
181#define xfs_stack_trace() dump_stack() 179#define xfs_stack_trace() dump_stack()
182#define xfs_itruncate_data(ip, off) \ 180#define xfs_itruncate_data(ip, off) \
183 (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) 181 (-vmtruncate(VFS_I(ip), (off)))
184 182
185 183
186/* Move the kernel do_div definition off to one side */ 184/* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 82333b3e118e..1957e5357d04 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -137,7 +137,7 @@ xfs_iozero(
137 struct address_space *mapping; 137 struct address_space *mapping;
138 int status; 138 int status;
139 139
140 mapping = ip->i_vnode->i_mapping; 140 mapping = VFS_I(ip)->i_mapping;
141 do { 141 do {
142 unsigned offset, bytes; 142 unsigned offset, bytes;
143 void *fsdata; 143 void *fsdata;
@@ -674,9 +674,7 @@ start:
674 */ 674 */
675 if (likely(!(ioflags & IO_INVIS) && 675 if (likely(!(ioflags & IO_INVIS) &&
676 !mnt_want_write(file->f_path.mnt))) { 676 !mnt_want_write(file->f_path.mnt))) {
677 file_update_time(file); 677 xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
678 xfs_ichgtime_fast(xip, inode,
679 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
680 mnt_drop_write(file->f_path.mnt); 678 mnt_drop_write(file->f_path.mnt);
681 } 679 }
682 680
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 30ae96397e31..73c65f19e549 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -581,118 +581,6 @@ xfs_max_file_offset(
581 return (((__uint64_t)pagefactor) << bitshift) - 1; 581 return (((__uint64_t)pagefactor) << bitshift) - 1;
582} 582}
583 583
584STATIC_INLINE void
585xfs_set_inodeops(
586 struct inode *inode)
587{
588 switch (inode->i_mode & S_IFMT) {
589 case S_IFREG:
590 inode->i_op = &xfs_inode_operations;
591 inode->i_fop = &xfs_file_operations;
592 inode->i_mapping->a_ops = &xfs_address_space_operations;
593 break;
594 case S_IFDIR:
595 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
596 inode->i_op = &xfs_dir_ci_inode_operations;
597 else
598 inode->i_op = &xfs_dir_inode_operations;
599 inode->i_fop = &xfs_dir_file_operations;
600 break;
601 case S_IFLNK:
602 inode->i_op = &xfs_symlink_inode_operations;
603 if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE))
604 inode->i_mapping->a_ops = &xfs_address_space_operations;
605 break;
606 default:
607 inode->i_op = &xfs_inode_operations;
608 init_special_inode(inode, inode->i_mode, inode->i_rdev);
609 break;
610 }
611}
612
613STATIC_INLINE void
614xfs_revalidate_inode(
615 xfs_mount_t *mp,
616 bhv_vnode_t *vp,
617 xfs_inode_t *ip)
618{
619 struct inode *inode = vn_to_inode(vp);
620
621 inode->i_mode = ip->i_d.di_mode;
622 inode->i_nlink = ip->i_d.di_nlink;
623 inode->i_uid = ip->i_d.di_uid;
624 inode->i_gid = ip->i_d.di_gid;
625
626 switch (inode->i_mode & S_IFMT) {
627 case S_IFBLK:
628 case S_IFCHR:
629 inode->i_rdev =
630 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
631 sysv_minor(ip->i_df.if_u2.if_rdev));
632 break;
633 default:
634 inode->i_rdev = 0;
635 break;
636 }
637
638 inode->i_generation = ip->i_d.di_gen;
639 i_size_write(inode, ip->i_d.di_size);
640 inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
641 inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
642 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
643 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
644 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
645 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
646 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
647 inode->i_flags |= S_IMMUTABLE;
648 else
649 inode->i_flags &= ~S_IMMUTABLE;
650 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
651 inode->i_flags |= S_APPEND;
652 else
653 inode->i_flags &= ~S_APPEND;
654 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
655 inode->i_flags |= S_SYNC;
656 else
657 inode->i_flags &= ~S_SYNC;
658 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
659 inode->i_flags |= S_NOATIME;
660 else
661 inode->i_flags &= ~S_NOATIME;
662 xfs_iflags_clear(ip, XFS_IMODIFIED);
663}
664
665void
666xfs_initialize_vnode(
667 struct xfs_mount *mp,
668 bhv_vnode_t *vp,
669 struct xfs_inode *ip)
670{
671 struct inode *inode = vn_to_inode(vp);
672
673 if (!ip->i_vnode) {
674 ip->i_vnode = vp;
675 inode->i_private = ip;
676 }
677
678 /*
679 * We need to set the ops vectors, and unlock the inode, but if
680 * we have been called during the new inode create process, it is
681 * too early to fill in the Linux inode. We will get called a
682 * second time once the inode is properly set up, and then we can
683 * finish our work.
684 */
685 if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) {
686 xfs_revalidate_inode(mp, vp, ip);
687 xfs_set_inodeops(inode);
688
689 xfs_iflags_clear(ip, XFS_INEW);
690 barrier();
691
692 unlock_new_inode(inode);
693 }
694}
695
696int 584int
697xfs_blkdev_get( 585xfs_blkdev_get(
698 xfs_mount_t *mp, 586 xfs_mount_t *mp,
@@ -982,26 +870,21 @@ STATIC struct inode *
982xfs_fs_alloc_inode( 870xfs_fs_alloc_inode(
983 struct super_block *sb) 871 struct super_block *sb)
984{ 872{
985 bhv_vnode_t *vp; 873 return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
986
987 vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
988 if (unlikely(!vp))
989 return NULL;
990 return vn_to_inode(vp);
991} 874}
992 875
993STATIC void 876STATIC void
994xfs_fs_destroy_inode( 877xfs_fs_destroy_inode(
995 struct inode *inode) 878 struct inode *inode)
996{ 879{
997 kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode)); 880 kmem_zone_free(xfs_vnode_zone, inode);
998} 881}
999 882
1000STATIC void 883STATIC void
1001xfs_fs_inode_init_once( 884xfs_fs_inode_init_once(
1002 void *vnode) 885 void *vnode)
1003{ 886{
1004 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); 887 inode_init_once((struct inode *)vnode);
1005} 888}
1006 889
1007/* 890/*
@@ -1106,7 +989,7 @@ void
1106xfs_flush_inode( 989xfs_flush_inode(
1107 xfs_inode_t *ip) 990 xfs_inode_t *ip)
1108{ 991{
1109 struct inode *inode = ip->i_vnode; 992 struct inode *inode = VFS_I(ip);
1110 993
1111 igrab(inode); 994 igrab(inode);
1112 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); 995 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
@@ -1131,7 +1014,7 @@ void
1131xfs_flush_device( 1014xfs_flush_device(
1132 xfs_inode_t *ip) 1015 xfs_inode_t *ip)
1133{ 1016{
1134 struct inode *inode = vn_to_inode(XFS_ITOV(ip)); 1017 struct inode *inode = VFS_I(ip);
1135 1018
1136 igrab(inode); 1019 igrab(inode);
1137 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); 1020 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
@@ -1201,6 +1084,15 @@ xfssyncd(
1201} 1084}
1202 1085
1203STATIC void 1086STATIC void
1087xfs_free_fsname(
1088 struct xfs_mount *mp)
1089{
1090 kfree(mp->m_fsname);
1091 kfree(mp->m_rtname);
1092 kfree(mp->m_logname);
1093}
1094
1095STATIC void
1204xfs_fs_put_super( 1096xfs_fs_put_super(
1205 struct super_block *sb) 1097 struct super_block *sb)
1206{ 1098{
@@ -1239,8 +1131,6 @@ xfs_fs_put_super(
1239 error = xfs_unmount_flush(mp, 0); 1131 error = xfs_unmount_flush(mp, 0);
1240 WARN_ON(error); 1132 WARN_ON(error);
1241 1133
1242 IRELE(rip);
1243
1244 /* 1134 /*
1245 * If we're forcing a shutdown, typically because of a media error, 1135 * If we're forcing a shutdown, typically because of a media error,
1246 * we want to make sure we invalidate dirty pages that belong to 1136 * we want to make sure we invalidate dirty pages that belong to
@@ -1257,10 +1147,12 @@ xfs_fs_put_super(
1257 } 1147 }
1258 1148
1259 xfs_unmountfs(mp); 1149 xfs_unmountfs(mp);
1150 xfs_freesb(mp);
1260 xfs_icsb_destroy_counters(mp); 1151 xfs_icsb_destroy_counters(mp);
1261 xfs_close_devices(mp); 1152 xfs_close_devices(mp);
1262 xfs_qmops_put(mp); 1153 xfs_qmops_put(mp);
1263 xfs_dmops_put(mp); 1154 xfs_dmops_put(mp);
1155 xfs_free_fsname(mp);
1264 kfree(mp); 1156 kfree(mp);
1265} 1157}
1266 1158
@@ -1517,6 +1409,8 @@ xfs_start_flags(
1517 struct xfs_mount_args *ap, 1409 struct xfs_mount_args *ap,
1518 struct xfs_mount *mp) 1410 struct xfs_mount *mp)
1519{ 1411{
1412 int error;
1413
1520 /* Values are in BBs */ 1414 /* Values are in BBs */
1521 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { 1415 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1522 /* 1416 /*
@@ -1549,17 +1443,27 @@ xfs_start_flags(
1549 ap->logbufsize); 1443 ap->logbufsize);
1550 return XFS_ERROR(EINVAL); 1444 return XFS_ERROR(EINVAL);
1551 } 1445 }
1446
1447 error = ENOMEM;
1448
1552 mp->m_logbsize = ap->logbufsize; 1449 mp->m_logbsize = ap->logbufsize;
1553 mp->m_fsname_len = strlen(ap->fsname) + 1; 1450 mp->m_fsname_len = strlen(ap->fsname) + 1;
1554 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); 1451
1555 strcpy(mp->m_fsname, ap->fsname); 1452 mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
1453 if (!mp->m_fsname)
1454 goto out;
1455
1556 if (ap->rtname[0]) { 1456 if (ap->rtname[0]) {
1557 mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP); 1457 mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
1558 strcpy(mp->m_rtname, ap->rtname); 1458 if (!mp->m_rtname)
1459 goto out_free_fsname;
1460
1559 } 1461 }
1462
1560 if (ap->logname[0]) { 1463 if (ap->logname[0]) {
1561 mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP); 1464 mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
1562 strcpy(mp->m_logname, ap->logname); 1465 if (!mp->m_logname)
1466 goto out_free_rtname;
1563 } 1467 }
1564 1468
1565 if (ap->flags & XFSMNT_WSYNC) 1469 if (ap->flags & XFSMNT_WSYNC)
@@ -1632,6 +1536,14 @@ xfs_start_flags(
1632 if (ap->flags & XFSMNT_DMAPI) 1536 if (ap->flags & XFSMNT_DMAPI)
1633 mp->m_flags |= XFS_MOUNT_DMAPI; 1537 mp->m_flags |= XFS_MOUNT_DMAPI;
1634 return 0; 1538 return 0;
1539
1540
1541 out_free_rtname:
1542 kfree(mp->m_rtname);
1543 out_free_fsname:
1544 kfree(mp->m_fsname);
1545 out:
1546 return error;
1635} 1547}
1636 1548
1637/* 1549/*
@@ -1792,10 +1704,10 @@ xfs_fs_fill_super(
1792 */ 1704 */
1793 error = xfs_start_flags(args, mp); 1705 error = xfs_start_flags(args, mp);
1794 if (error) 1706 if (error)
1795 goto out_destroy_counters; 1707 goto out_free_fsname;
1796 error = xfs_readsb(mp, flags); 1708 error = xfs_readsb(mp, flags);
1797 if (error) 1709 if (error)
1798 goto out_destroy_counters; 1710 goto out_free_fsname;
1799 error = xfs_finish_flags(args, mp); 1711 error = xfs_finish_flags(args, mp);
1800 if (error) 1712 if (error)
1801 goto out_free_sb; 1713 goto out_free_sb;
@@ -1811,7 +1723,7 @@ xfs_fs_fill_super(
1811 if (error) 1723 if (error)
1812 goto out_free_sb; 1724 goto out_free_sb;
1813 1725
1814 error = xfs_mountfs(mp, flags); 1726 error = xfs_mountfs(mp);
1815 if (error) 1727 if (error)
1816 goto out_filestream_unmount; 1728 goto out_filestream_unmount;
1817 1729
@@ -1825,7 +1737,7 @@ xfs_fs_fill_super(
1825 sb->s_time_gran = 1; 1737 sb->s_time_gran = 1;
1826 set_posix_acl_flag(sb); 1738 set_posix_acl_flag(sb);
1827 1739
1828 root = igrab(mp->m_rootip->i_vnode); 1740 root = igrab(VFS_I(mp->m_rootip));
1829 if (!root) { 1741 if (!root) {
1830 error = ENOENT; 1742 error = ENOENT;
1831 goto fail_unmount; 1743 goto fail_unmount;
@@ -1857,7 +1769,8 @@ xfs_fs_fill_super(
1857 xfs_filestream_unmount(mp); 1769 xfs_filestream_unmount(mp);
1858 out_free_sb: 1770 out_free_sb:
1859 xfs_freesb(mp); 1771 xfs_freesb(mp);
1860 out_destroy_counters: 1772 out_free_fsname:
1773 xfs_free_fsname(mp);
1861 xfs_icsb_destroy_counters(mp); 1774 xfs_icsb_destroy_counters(mp);
1862 xfs_close_devices(mp); 1775 xfs_close_devices(mp);
1863 out_put_qmops: 1776 out_put_qmops:
@@ -1890,10 +1803,8 @@ xfs_fs_fill_super(
1890 error = xfs_unmount_flush(mp, 0); 1803 error = xfs_unmount_flush(mp, 0);
1891 WARN_ON(error); 1804 WARN_ON(error);
1892 1805
1893 IRELE(mp->m_rootip);
1894
1895 xfs_unmountfs(mp); 1806 xfs_unmountfs(mp);
1896 goto out_destroy_counters; 1807 goto out_free_sb;
1897} 1808}
1898 1809
1899STATIC int 1810STATIC int
@@ -2014,7 +1925,7 @@ xfs_free_trace_bufs(void)
2014STATIC int __init 1925STATIC int __init
2015xfs_init_zones(void) 1926xfs_init_zones(void)
2016{ 1927{
2017 xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", 1928 xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
2018 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | 1929 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
2019 KM_ZONE_SPREAD, 1930 KM_ZONE_SPREAD,
2020 xfs_fs_inode_init_once); 1931 xfs_fs_inode_init_once);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index b7d13da01bd6..fe2ef4e6a0f9 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -101,9 +101,6 @@ struct block_device;
101 101
102extern __uint64_t xfs_max_file_offset(unsigned int); 102extern __uint64_t xfs_max_file_offset(unsigned int);
103 103
104extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
105 struct xfs_inode *ip);
106
107extern void xfs_flush_inode(struct xfs_inode *); 104extern void xfs_flush_inode(struct xfs_inode *);
108extern void xfs_flush_device(struct xfs_inode *); 105extern void xfs_flush_device(struct xfs_inode *);
109 106
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 25488b6d9881..b52528bbbfff 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -33,7 +33,7 @@
33 33
34 34
35/* 35/*
36 * Dedicated vnode inactive/reclaim sync semaphores. 36 * Dedicated vnode inactive/reclaim sync wait queues.
37 * Prime number of hash buckets since address is used as the key. 37 * Prime number of hash buckets since address is used as the key.
38 */ 38 */
39#define NVSYNC 37 39#define NVSYNC 37
@@ -82,24 +82,6 @@ vn_ioerror(
82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); 82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
83} 83}
84 84
85
86/*
87 * Add a reference to a referenced vnode.
88 */
89bhv_vnode_t *
90vn_hold(
91 bhv_vnode_t *vp)
92{
93 struct inode *inode;
94
95 XFS_STATS_INC(vn_hold);
96
97 inode = igrab(vn_to_inode(vp));
98 ASSERT(inode);
99
100 return vp;
101}
102
103#ifdef XFS_INODE_TRACE 85#ifdef XFS_INODE_TRACE
104 86
105/* 87/*
@@ -108,7 +90,7 @@ vn_hold(
108 */ 90 */
109static inline int xfs_icount(struct xfs_inode *ip) 91static inline int xfs_icount(struct xfs_inode *ip)
110{ 92{
111 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 93 struct inode *vp = VFS_I(ip);
112 94
113 if (vp) 95 if (vp)
114 return vn_count(vp); 96 return vn_count(vp);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 41ca2cec5d31..683ce16210ff 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -22,20 +22,6 @@ struct file;
22struct xfs_iomap; 22struct xfs_iomap;
23struct attrlist_cursor_kern; 23struct attrlist_cursor_kern;
24 24
25typedef struct inode bhv_vnode_t;
26
27/*
28 * Vnode to Linux inode mapping.
29 */
30static inline bhv_vnode_t *vn_from_inode(struct inode *inode)
31{
32 return inode;
33}
34static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
35{
36 return vnode;
37}
38
39/* 25/*
40 * Return values for xfs_inactive. A return value of 26 * Return values for xfs_inactive. A return value of
41 * VN_INACTIVE_NOCACHE implies that the file system behavior 27 * VN_INACTIVE_NOCACHE implies that the file system behavior
@@ -76,57 +62,52 @@ extern void vn_iowait(struct xfs_inode *ip);
76extern void vn_iowake(struct xfs_inode *ip); 62extern void vn_iowake(struct xfs_inode *ip);
77extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); 63extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l);
78 64
79static inline int vn_count(bhv_vnode_t *vp) 65static inline int vn_count(struct inode *vp)
80{ 66{
81 return atomic_read(&vn_to_inode(vp)->i_count); 67 return atomic_read(&vp->i_count);
82} 68}
83 69
84/* 70#define IHOLD(ip) \
85 * Vnode reference counting functions (and macros for compatibility). 71do { \
86 */ 72 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
87extern bhv_vnode_t *vn_hold(bhv_vnode_t *); 73 atomic_inc(&(VFS_I(ip)->i_count)); \
74 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
75} while (0)
88 76
89#if defined(XFS_INODE_TRACE) 77#define IRELE(ip) \
90#define VN_HOLD(vp) \ 78do { \
91 ((void)vn_hold(vp), \ 79 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
92 xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) 80 iput(VFS_I(ip)); \
93#define VN_RELE(vp) \ 81} while (0)
94 (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \
95 iput(vn_to_inode(vp)))
96#else
97#define VN_HOLD(vp) ((void)vn_hold(vp))
98#define VN_RELE(vp) (iput(vn_to_inode(vp)))
99#endif
100 82
101static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) 83static inline struct inode *vn_grab(struct inode *vp)
102{ 84{
103 struct inode *inode = igrab(vn_to_inode(vp)); 85 return igrab(vp);
104 return inode ? vn_from_inode(inode) : NULL;
105} 86}
106 87
107/* 88/*
108 * Dealing with bad inodes 89 * Dealing with bad inodes
109 */ 90 */
110static inline int VN_BAD(bhv_vnode_t *vp) 91static inline int VN_BAD(struct inode *vp)
111{ 92{
112 return is_bad_inode(vn_to_inode(vp)); 93 return is_bad_inode(vp);
113} 94}
114 95
115/* 96/*
116 * Extracting atime values in various formats 97 * Extracting atime values in various formats
117 */ 98 */
118static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime) 99static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime)
119{ 100{
120 bs_atime->tv_sec = vp->i_atime.tv_sec; 101 bs_atime->tv_sec = vp->i_atime.tv_sec;
121 bs_atime->tv_nsec = vp->i_atime.tv_nsec; 102 bs_atime->tv_nsec = vp->i_atime.tv_nsec;
122} 103}
123 104
124static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts) 105static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts)
125{ 106{
126 *ts = vp->i_atime; 107 *ts = vp->i_atime;
127} 108}
128 109
129static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) 110static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
130{ 111{
131 *tt = vp->i_atime.tv_sec; 112 *tt = vp->i_atime.tv_sec;
132} 113}
@@ -134,9 +115,9 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
134/* 115/*
135 * Some useful predicates. 116 * Some useful predicates.
136 */ 117 */
137#define VN_MAPPED(vp) mapping_mapped(vn_to_inode(vp)->i_mapping) 118#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
138#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) 119#define VN_CACHED(vp) (vp->i_mapping->nrpages)
139#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ 120#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
140 PAGECACHE_TAG_DIRTY) 121 PAGECACHE_TAG_DIRTY)
141 122
142 123
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index fc9f3fb39b7b..f2705f2fd43c 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,11 +101,18 @@ xfs_qm_dqinit(
101 if (brandnewdquot) { 101 if (brandnewdquot) {
102 dqp->dq_flnext = dqp->dq_flprev = dqp; 102 dqp->dq_flnext = dqp->dq_flprev = dqp;
103 mutex_init(&dqp->q_qlock); 103 mutex_init(&dqp->q_qlock);
104 initnsema(&dqp->q_flock, 1, "fdq");
105 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); 104 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
106 105
106 /*
107 * Because we want to use a counting completion, complete
108 * the flush completion once to allow a single access to
109 * the flush completion without blocking.
110 */
111 init_completion(&dqp->q_flush);
112 complete(&dqp->q_flush);
113
107#ifdef XFS_DQUOT_TRACE 114#ifdef XFS_DQUOT_TRACE
108 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP); 115 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);
109 xfs_dqtrace_entry(dqp, "DQINIT"); 116 xfs_dqtrace_entry(dqp, "DQINIT");
110#endif 117#endif
111 } else { 118 } else {
@@ -150,7 +157,6 @@ xfs_qm_dqdestroy(
150 ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); 157 ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
151 158
152 mutex_destroy(&dqp->q_qlock); 159 mutex_destroy(&dqp->q_qlock);
153 freesema(&dqp->q_flock);
154 sv_destroy(&dqp->q_pinwait); 160 sv_destroy(&dqp->q_pinwait);
155 161
156#ifdef XFS_DQUOT_TRACE 162#ifdef XFS_DQUOT_TRACE
@@ -431,7 +437,7 @@ xfs_qm_dqalloc(
431 * when it unlocks the inode. Since we want to keep the quota 437 * when it unlocks the inode. Since we want to keep the quota
432 * inode around, we bump the vnode ref count now. 438 * inode around, we bump the vnode ref count now.
433 */ 439 */
434 VN_HOLD(XFS_ITOV(quotip)); 440 IHOLD(quotip);
435 441
436 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); 442 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
437 nmaps = 1; 443 nmaps = 1;
@@ -1211,7 +1217,7 @@ xfs_qm_dqflush(
1211 int error; 1217 int error;
1212 1218
1213 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1219 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1214 ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); 1220 ASSERT(!completion_done(&dqp->q_flush));
1215 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1221 xfs_dqtrace_entry(dqp, "DQFLUSH");
1216 1222
1217 /* 1223 /*
@@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done(
1348 xfs_dqfunlock(dqp); 1354 xfs_dqfunlock(dqp);
1349} 1355}
1350 1356
1351
1352int
1353xfs_qm_dqflock_nowait(
1354 xfs_dquot_t *dqp)
1355{
1356 int locked;
1357
1358 locked = cpsema(&((dqp)->q_flock));
1359
1360 /* XXX ifdef these out */
1361 if (locked)
1362 (dqp)->dq_flags |= XFS_DQ_FLOCKED;
1363 return (locked);
1364}
1365
1366
1367int 1357int
1368xfs_qm_dqlock_nowait( 1358xfs_qm_dqlock_nowait(
1369 xfs_dquot_t *dqp) 1359 xfs_dquot_t *dqp)
1370{ 1360{
1371 return (mutex_trylock(&((dqp)->q_qlock))); 1361 return mutex_trylock(&dqp->q_qlock);
1372} 1362}
1373 1363
1374void 1364void
1375xfs_dqlock( 1365xfs_dqlock(
1376 xfs_dquot_t *dqp) 1366 xfs_dquot_t *dqp)
1377{ 1367{
1378 mutex_lock(&(dqp->q_qlock)); 1368 mutex_lock(&dqp->q_qlock);
1379} 1369}
1380 1370
1381void 1371void
@@ -1468,7 +1458,7 @@ xfs_qm_dqpurge(
1468 * if we're turning off quotas. Basically, we need this flush 1458 * if we're turning off quotas. Basically, we need this flush
1469 * lock, and are willing to block on it. 1459 * lock, and are willing to block on it.
1470 */ 1460 */
1471 if (! xfs_qm_dqflock_nowait(dqp)) { 1461 if (!xfs_dqflock_nowait(dqp)) {
1472 /* 1462 /*
1473 * Block on the flush lock after nudging dquot buffer, 1463 * Block on the flush lock after nudging dquot buffer,
1474 * if it is incore. 1464 * if it is incore.
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index f7393bba4e95..8958d0faf8d3 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -82,7 +82,7 @@ typedef struct xfs_dquot {
82 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ 82 xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
84 mutex_t q_qlock; /* quota lock */ 84 mutex_t q_qlock; /* quota lock */
85 sema_t q_flock; /* flush lock */ 85 struct completion q_flush; /* flush completion queue */
86 uint q_pincount; /* pin count for this dquot */ 86 uint q_pincount; /* pin count for this dquot */
87 sv_t q_pinwait; /* sync var for pinning */ 87 sv_t q_pinwait; /* sync var for pinning */
88#ifdef XFS_DQUOT_TRACE 88#ifdef XFS_DQUOT_TRACE
@@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
113 113
114 114
115/* 115/*
116 * The following three routines simply manage the q_flock 116 * Manage the q_flush completion queue embedded in the dquot. This completion
117 * semaphore embedded in the dquot. This semaphore synchronizes 117 * queue synchronizes processes attempting to flush the in-core dquot back to
118 * processes attempting to flush the in-core dquot back to disk. 118 * disk.
119 */ 119 */
120#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\ 120static inline void xfs_dqflock(xfs_dquot_t *dqp)
121 (dqp)->dq_flags |= XFS_DQ_FLOCKED; } 121{
122#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \ 122 wait_for_completion(&dqp->q_flush);
123 vsema(&((dqp)->q_flock)); \ 123}
124 (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } 124
125static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
126{
127 return try_wait_for_completion(&dqp->q_flush);
128}
129
130static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
131{
132 complete(&dqp->q_flush);
133}
125 134
126#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))
127#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) 135#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
128#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 136#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
129#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 137#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -167,7 +175,6 @@ extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
167extern int xfs_qm_dqpurge(xfs_dquot_t *); 175extern int xfs_qm_dqpurge(xfs_dquot_t *);
168extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 176extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
169extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); 177extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
170extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
171extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); 178extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
172extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 179extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
173 xfs_disk_dquot_t *); 180 xfs_disk_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 08d2fc89e6a1..f028644caa5e 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push(
151 dqp = logitem->qli_dquot; 151 dqp = logitem->qli_dquot;
152 152
153 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 153 ASSERT(XFS_DQ_IS_LOCKED(dqp));
154 ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); 154 ASSERT(!completion_done(&dqp->q_flush));
155 155
156 /* 156 /*
157 * Since we were able to lock the dquot's flush lock and 157 * Since we were able to lock the dquot's flush lock and
@@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf(
245 * inode flush completed and the inode was taken off the AIL. 245 * inode flush completed and the inode was taken off the AIL.
246 * So, just get out. 246 * So, just get out.
247 */ 247 */
248 if (!issemalocked(&(dqp->q_flock)) || 248 if (completion_done(&dqp->q_flush) ||
249 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 249 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
250 qip->qli_pushbuf_flag = 0; 250 qip->qli_pushbuf_flag = 0;
251 xfs_dqunlock(dqp); 251 xfs_dqunlock(dqp);
@@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf(
258 if (bp != NULL) { 258 if (bp != NULL) {
259 if (XFS_BUF_ISDELAYWRITE(bp)) { 259 if (XFS_BUF_ISDELAYWRITE(bp)) {
260 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && 260 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
261 issemalocked(&(dqp->q_flock))); 261 !completion_done(&dqp->q_flush));
262 qip->qli_pushbuf_flag = 0; 262 qip->qli_pushbuf_flag = 0;
263 xfs_dqunlock(dqp); 263 xfs_dqunlock(dqp);
264 264
@@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock(
317 return (XFS_ITEM_LOCKED); 317 return (XFS_ITEM_LOCKED);
318 318
319 retval = XFS_ITEM_SUCCESS; 319 retval = XFS_ITEM_SUCCESS;
320 if (! xfs_qm_dqflock_nowait(dqp)) { 320 if (!xfs_dqflock_nowait(dqp)) {
321 /* 321 /*
322 * The dquot is already being flushed. It may have been 322 * The dquot is already being flushed. It may have been
323 * flushed delayed write, however, and we don't want to 323 * flushed delayed write, however, and we don't want to
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 021934a3d456..df0ffef9775a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -310,8 +310,7 @@ xfs_qm_unmount_quotadestroy(
310 */ 310 */
311void 311void
312xfs_qm_mount_quotas( 312xfs_qm_mount_quotas(
313 xfs_mount_t *mp, 313 xfs_mount_t *mp)
314 int mfsi_flags)
315{ 314{
316 int error = 0; 315 int error = 0;
317 uint sbf; 316 uint sbf;
@@ -346,8 +345,7 @@ xfs_qm_mount_quotas(
346 /* 345 /*
347 * If any of the quotas are not consistent, do a quotacheck. 346 * If any of the quotas are not consistent, do a quotacheck.
348 */ 347 */
349 if (XFS_QM_NEED_QUOTACHECK(mp) && 348 if (XFS_QM_NEED_QUOTACHECK(mp)) {
350 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
351 error = xfs_qm_quotacheck(mp); 349 error = xfs_qm_quotacheck(mp);
352 if (error) { 350 if (error) {
353 /* Quotacheck failed and disabled quotas. */ 351 /* Quotacheck failed and disabled quotas. */
@@ -484,7 +482,7 @@ again:
484 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); 482 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
485 /* XXX a sentinel would be better */ 483 /* XXX a sentinel would be better */
486 recl = XFS_QI_MPLRECLAIMS(mp); 484 recl = XFS_QI_MPLRECLAIMS(mp);
487 if (! xfs_qm_dqflock_nowait(dqp)) { 485 if (!xfs_dqflock_nowait(dqp)) {
488 /* 486 /*
489 * If we can't grab the flush lock then check 487 * If we can't grab the flush lock then check
490 * to see if the dquot has been flushed delayed 488 * to see if the dquot has been flushed delayed
@@ -1062,7 +1060,7 @@ xfs_qm_sync(
1062 1060
1063 /* XXX a sentinel would be better */ 1061 /* XXX a sentinel would be better */
1064 recl = XFS_QI_MPLRECLAIMS(mp); 1062 recl = XFS_QI_MPLRECLAIMS(mp);
1065 if (! xfs_qm_dqflock_nowait(dqp)) { 1063 if (!xfs_dqflock_nowait(dqp)) {
1066 if (nowait) { 1064 if (nowait) {
1067 xfs_dqunlock(dqp); 1065 xfs_dqunlock(dqp);
1068 continue; 1066 continue;
@@ -2079,7 +2077,7 @@ xfs_qm_shake_freelist(
2079 * Try to grab the flush lock. If this dquot is in the process of 2077 * Try to grab the flush lock. If this dquot is in the process of
2080 * getting flushed to disk, we don't want to reclaim it. 2078 * getting flushed to disk, we don't want to reclaim it.
2081 */ 2079 */
2082 if (! xfs_qm_dqflock_nowait(dqp)) { 2080 if (!xfs_dqflock_nowait(dqp)) {
2083 xfs_dqunlock(dqp); 2081 xfs_dqunlock(dqp);
2084 dqp = dqp->dq_flnext; 2082 dqp = dqp->dq_flnext;
2085 continue; 2083 continue;
@@ -2257,7 +2255,7 @@ xfs_qm_dqreclaim_one(void)
2257 * Try to grab the flush lock. If this dquot is in the process of 2255 * Try to grab the flush lock. If this dquot is in the process of
2258 * getting flushed to disk, we don't want to reclaim it. 2256 * getting flushed to disk, we don't want to reclaim it.
2259 */ 2257 */
2260 if (! xfs_qm_dqflock_nowait(dqp)) { 2258 if (!xfs_dqflock_nowait(dqp)) {
2261 xfs_dqunlock(dqp); 2259 xfs_dqunlock(dqp);
2262 continue; 2260 continue;
2263 } 2261 }
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index cd2300e374af..44f25349e478 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
165#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) 165#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
166 166
167extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); 167extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
168extern void xfs_qm_mount_quotas(xfs_mount_t *, int); 168extern void xfs_qm_mount_quotas(xfs_mount_t *);
169extern int xfs_qm_quotacheck(xfs_mount_t *); 169extern int xfs_qm_quotacheck(xfs_mount_t *);
170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); 170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
171extern int xfs_qm_unmount_quotas(xfs_mount_t *); 171extern int xfs_qm_unmount_quotas(xfs_mount_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index f4f6c4c861d7..eea2e60b456b 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -162,7 +162,7 @@ xfs_qm_newmount(
162 * mounting, and get on with the boring life 162 * mounting, and get on with the boring life
163 * without disk quotas. 163 * without disk quotas.
164 */ 164 */
165 xfs_qm_mount_quotas(mp, 0); 165 xfs_qm_mount_quotas(mp);
166 } else { 166 } else {
167 /* 167 /*
168 * Clear the quota flags, but remember them. This 168 * Clear the quota flags, but remember them. This
@@ -184,13 +184,12 @@ STATIC int
184xfs_qm_endmount( 184xfs_qm_endmount(
185 xfs_mount_t *mp, 185 xfs_mount_t *mp,
186 uint needquotamount, 186 uint needquotamount,
187 uint quotaflags, 187 uint quotaflags)
188 int mfsi_flags)
189{ 188{
190 if (needquotamount) { 189 if (needquotamount) {
191 ASSERT(mp->m_qflags == 0); 190 ASSERT(mp->m_qflags == 0);
192 mp->m_qflags = quotaflags; 191 mp->m_qflags = quotaflags;
193 xfs_qm_mount_quotas(mp, mfsi_flags); 192 xfs_qm_mount_quotas(mp);
194 } 193 }
195 194
196#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) 195#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index adfb8723f65a..1a3b803dfa55 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1034,7 +1034,7 @@ xfs_qm_dqrele_all_inodes(
1034{ 1034{
1035 xfs_inode_t *ip, *topino; 1035 xfs_inode_t *ip, *topino;
1036 uint ireclaims; 1036 uint ireclaims;
1037 bhv_vnode_t *vp; 1037 struct inode *vp;
1038 boolean_t vnode_refd; 1038 boolean_t vnode_refd;
1039 1039
1040 ASSERT(mp->m_quotainfo); 1040 ASSERT(mp->m_quotainfo);
@@ -1059,7 +1059,7 @@ again:
1059 ip = ip->i_mnext; 1059 ip = ip->i_mnext;
1060 continue; 1060 continue;
1061 } 1061 }
1062 vp = XFS_ITOV_NULL(ip); 1062 vp = VFS_I(ip);
1063 if (!vp) { 1063 if (!vp) {
1064 ASSERT(ip->i_udquot == NULL); 1064 ASSERT(ip->i_udquot == NULL);
1065 ASSERT(ip->i_gdquot == NULL); 1065 ASSERT(ip->i_gdquot == NULL);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 3e4648ad9cfc..b2f639a1416f 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -37,15 +37,15 @@
37#include <linux/capability.h> 37#include <linux/capability.h>
38#include <linux/posix_acl_xattr.h> 38#include <linux/posix_acl_xattr.h>
39 39
40STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *); 40STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
41STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); 41STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
42STATIC void xfs_acl_get_endian(xfs_acl_t *); 42STATIC void xfs_acl_get_endian(xfs_acl_t *);
43STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); 43STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
44STATIC int xfs_acl_invalid(xfs_acl_t *); 44STATIC int xfs_acl_invalid(xfs_acl_t *);
45STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); 45STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
46STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *); 46STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
47STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *); 47STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
48STATIC int xfs_acl_allow_set(bhv_vnode_t *, int); 48STATIC int xfs_acl_allow_set(struct inode *, int);
49 49
50kmem_zone_t *xfs_acl_zone; 50kmem_zone_t *xfs_acl_zone;
51 51
@@ -55,7 +55,7 @@ kmem_zone_t *xfs_acl_zone;
55 */ 55 */
56int 56int
57xfs_acl_vhasacl_access( 57xfs_acl_vhasacl_access(
58 bhv_vnode_t *vp) 58 struct inode *vp)
59{ 59{
60 int error; 60 int error;
61 61
@@ -68,7 +68,7 @@ xfs_acl_vhasacl_access(
68 */ 68 */
69int 69int
70xfs_acl_vhasacl_default( 70xfs_acl_vhasacl_default(
71 bhv_vnode_t *vp) 71 struct inode *vp)
72{ 72{
73 int error; 73 int error;
74 74
@@ -207,7 +207,7 @@ posix_acl_xfs_to_xattr(
207 207
208int 208int
209xfs_acl_vget( 209xfs_acl_vget(
210 bhv_vnode_t *vp, 210 struct inode *vp,
211 void *acl, 211 void *acl,
212 size_t size, 212 size_t size,
213 int kind) 213 int kind)
@@ -217,7 +217,6 @@ xfs_acl_vget(
217 posix_acl_xattr_header *ext_acl = acl; 217 posix_acl_xattr_header *ext_acl = acl;
218 int flags = 0; 218 int flags = 0;
219 219
220 VN_HOLD(vp);
221 if(size) { 220 if(size) {
222 if (!(_ACL_ALLOC(xfs_acl))) { 221 if (!(_ACL_ALLOC(xfs_acl))) {
223 error = ENOMEM; 222 error = ENOMEM;
@@ -239,11 +238,10 @@ xfs_acl_vget(
239 goto out; 238 goto out;
240 } 239 }
241 if (kind == _ACL_TYPE_ACCESS) 240 if (kind == _ACL_TYPE_ACCESS)
242 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl); 241 xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
243 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); 242 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
244 } 243 }
245out: 244out:
246 VN_RELE(vp);
247 if(xfs_acl) 245 if(xfs_acl)
248 _ACL_FREE(xfs_acl); 246 _ACL_FREE(xfs_acl);
249 return -error; 247 return -error;
@@ -251,28 +249,26 @@ out:
251 249
252int 250int
253xfs_acl_vremove( 251xfs_acl_vremove(
254 bhv_vnode_t *vp, 252 struct inode *vp,
255 int kind) 253 int kind)
256{ 254{
257 int error; 255 int error;
258 256
259 VN_HOLD(vp);
260 error = xfs_acl_allow_set(vp, kind); 257 error = xfs_acl_allow_set(vp, kind);
261 if (!error) { 258 if (!error) {
262 error = xfs_attr_remove(xfs_vtoi(vp), 259 error = xfs_attr_remove(XFS_I(vp),
263 kind == _ACL_TYPE_DEFAULT? 260 kind == _ACL_TYPE_DEFAULT?
264 SGI_ACL_DEFAULT: SGI_ACL_FILE, 261 SGI_ACL_DEFAULT: SGI_ACL_FILE,
265 ATTR_ROOT); 262 ATTR_ROOT);
266 if (error == ENOATTR) 263 if (error == ENOATTR)
267 error = 0; /* 'scool */ 264 error = 0; /* 'scool */
268 } 265 }
269 VN_RELE(vp);
270 return -error; 266 return -error;
271} 267}
272 268
273int 269int
274xfs_acl_vset( 270xfs_acl_vset(
275 bhv_vnode_t *vp, 271 struct inode *vp,
276 void *acl, 272 void *acl,
277 size_t size, 273 size_t size,
278 int kind) 274 int kind)
@@ -298,7 +294,6 @@ xfs_acl_vset(
298 return 0; 294 return 0;
299 } 295 }
300 296
301 VN_HOLD(vp);
302 error = xfs_acl_allow_set(vp, kind); 297 error = xfs_acl_allow_set(vp, kind);
303 298
304 /* Incoming ACL exists, set file mode based on its value */ 299 /* Incoming ACL exists, set file mode based on its value */
@@ -321,7 +316,6 @@ xfs_acl_vset(
321 } 316 }
322 317
323out: 318out:
324 VN_RELE(vp);
325 _ACL_FREE(xfs_acl); 319 _ACL_FREE(xfs_acl);
326 return -error; 320 return -error;
327} 321}
@@ -363,7 +357,7 @@ xfs_acl_iaccess(
363 357
364STATIC int 358STATIC int
365xfs_acl_allow_set( 359xfs_acl_allow_set(
366 bhv_vnode_t *vp, 360 struct inode *vp,
367 int kind) 361 int kind)
368{ 362{
369 if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) 363 if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
@@ -372,7 +366,7 @@ xfs_acl_allow_set(
372 return ENOTDIR; 366 return ENOTDIR;
373 if (vp->i_sb->s_flags & MS_RDONLY) 367 if (vp->i_sb->s_flags & MS_RDONLY)
374 return EROFS; 368 return EROFS;
375 if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) 369 if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
376 return EPERM; 370 return EPERM;
377 return 0; 371 return 0;
378} 372}
@@ -566,7 +560,7 @@ xfs_acl_get_endian(
566 */ 560 */
567STATIC void 561STATIC void
568xfs_acl_get_attr( 562xfs_acl_get_attr(
569 bhv_vnode_t *vp, 563 struct inode *vp,
570 xfs_acl_t *aclp, 564 xfs_acl_t *aclp,
571 int kind, 565 int kind,
572 int flags, 566 int flags,
@@ -576,7 +570,7 @@ xfs_acl_get_attr(
576 570
577 ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); 571 ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
578 flags |= ATTR_ROOT; 572 flags |= ATTR_ROOT;
579 *error = xfs_attr_get(xfs_vtoi(vp), 573 *error = xfs_attr_get(XFS_I(vp),
580 kind == _ACL_TYPE_ACCESS ? 574 kind == _ACL_TYPE_ACCESS ?
581 SGI_ACL_FILE : SGI_ACL_DEFAULT, 575 SGI_ACL_FILE : SGI_ACL_DEFAULT,
582 (char *)aclp, &len, flags); 576 (char *)aclp, &len, flags);
@@ -590,7 +584,7 @@ xfs_acl_get_attr(
590 */ 584 */
591STATIC void 585STATIC void
592xfs_acl_set_attr( 586xfs_acl_set_attr(
593 bhv_vnode_t *vp, 587 struct inode *vp,
594 xfs_acl_t *aclp, 588 xfs_acl_t *aclp,
595 int kind, 589 int kind,
596 int *error) 590 int *error)
@@ -615,7 +609,7 @@ xfs_acl_set_attr(
615 INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); 609 INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
616 } 610 }
617 INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); 611 INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
618 *error = xfs_attr_set(xfs_vtoi(vp), 612 *error = xfs_attr_set(XFS_I(vp),
619 kind == _ACL_TYPE_ACCESS ? 613 kind == _ACL_TYPE_ACCESS ?
620 SGI_ACL_FILE: SGI_ACL_DEFAULT, 614 SGI_ACL_FILE: SGI_ACL_DEFAULT,
621 (char *)newacl, len, ATTR_ROOT); 615 (char *)newacl, len, ATTR_ROOT);
@@ -624,7 +618,7 @@ xfs_acl_set_attr(
624 618
625int 619int
626xfs_acl_vtoacl( 620xfs_acl_vtoacl(
627 bhv_vnode_t *vp, 621 struct inode *vp,
628 xfs_acl_t *access_acl, 622 xfs_acl_t *access_acl,
629 xfs_acl_t *default_acl) 623 xfs_acl_t *default_acl)
630{ 624{
@@ -639,7 +633,7 @@ xfs_acl_vtoacl(
639 if (error) 633 if (error)
640 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; 634 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
641 else /* We have a good ACL and the file mode, synchronize. */ 635 else /* We have a good ACL and the file mode, synchronize. */
642 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl); 636 xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
643 } 637 }
644 638
645 if (default_acl) { 639 if (default_acl) {
@@ -656,7 +650,7 @@ xfs_acl_vtoacl(
656 */ 650 */
657int 651int
658xfs_acl_inherit( 652xfs_acl_inherit(
659 bhv_vnode_t *vp, 653 struct inode *vp,
660 mode_t mode, 654 mode_t mode,
661 xfs_acl_t *pdaclp) 655 xfs_acl_t *pdaclp)
662{ 656{
@@ -715,7 +709,7 @@ out_error:
715 */ 709 */
716STATIC int 710STATIC int
717xfs_acl_setmode( 711xfs_acl_setmode(
718 bhv_vnode_t *vp, 712 struct inode *vp,
719 xfs_acl_t *acl, 713 xfs_acl_t *acl,
720 int *basicperms) 714 int *basicperms)
721{ 715{
@@ -734,7 +728,7 @@ xfs_acl_setmode(
734 * mode. The m:: bits take precedence over the g:: bits. 728 * mode. The m:: bits take precedence over the g:: bits.
735 */ 729 */
736 iattr.ia_valid = ATTR_MODE; 730 iattr.ia_valid = ATTR_MODE;
737 iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode; 731 iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
738 iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 732 iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
739 ap = acl->acl_entry; 733 ap = acl->acl_entry;
740 for (i = 0; i < acl->acl_cnt; ++i) { 734 for (i = 0; i < acl->acl_cnt; ++i) {
@@ -764,7 +758,7 @@ xfs_acl_setmode(
764 if (gap && nomask) 758 if (gap && nomask)
765 iattr.ia_mode |= gap->ae_perm << 3; 759 iattr.ia_mode |= gap->ae_perm << 3;
766 760
767 return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred); 761 return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred);
768} 762}
769 763
770/* 764/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 323ee94cf831..a4e293b93efa 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -59,14 +59,14 @@ extern struct kmem_zone *xfs_acl_zone;
59 (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) 59 (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
60#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) 60#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone)
61 61
62extern int xfs_acl_inherit(bhv_vnode_t *, mode_t mode, xfs_acl_t *); 62extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
63extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); 63extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
64extern int xfs_acl_vtoacl(bhv_vnode_t *, xfs_acl_t *, xfs_acl_t *); 64extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
65extern int xfs_acl_vhasacl_access(bhv_vnode_t *); 65extern int xfs_acl_vhasacl_access(struct inode *);
66extern int xfs_acl_vhasacl_default(bhv_vnode_t *); 66extern int xfs_acl_vhasacl_default(struct inode *);
67extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int); 67extern int xfs_acl_vset(struct inode *, void *, size_t, int);
68extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); 68extern int xfs_acl_vget(struct inode *, void *, size_t, int);
69extern int xfs_acl_vremove(bhv_vnode_t *, int); 69extern int xfs_acl_vremove(struct inode *, int);
70 70
71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) 71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
72 72
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index f9472a2076d4..0b3b5efe848c 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -92,16 +92,6 @@
92 ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ 92 ((__u8*)(pointer))[1] = (((value) ) & 0xff); \
93 } 93 }
94 94
95/* define generic INT_ macros */
96
97#define INT_GET(reference,arch) \
98 (((arch) == ARCH_NOCONVERT) \
99 ? \
100 (reference) \
101 : \
102 INT_SWAP((reference),(reference)) \
103 )
104
105/* does not return a value */ 95/* does not return a value */
106#define INT_SET(reference,arch,valueref) \ 96#define INT_SET(reference,arch,valueref) \
107 (__builtin_constant_p(valueref) ? \ 97 (__builtin_constant_p(valueref) ? \
@@ -112,64 +102,6 @@
112 ) \ 102 ) \
113 ) 103 )
114 104
115/* does not return a value */
116#define INT_MOD_EXPR(reference,arch,code) \
117 (((arch) == ARCH_NOCONVERT) \
118 ? \
119 (void)((reference) code) \
120 : \
121 (void)( \
122 (reference) = INT_GET((reference),arch) , \
123 ((reference) code), \
124 INT_SET(reference, arch, reference) \
125 ) \
126 )
127
128/* does not return a value */
129#define INT_MOD(reference,arch,delta) \
130 (void)( \
131 INT_MOD_EXPR(reference,arch,+=(delta)) \
132 )
133
134/*
135 * INT_COPY - copy a value between two locations with the
136 * _same architecture_ but _potentially different sizes_
137 *
138 * if the types of the two parameters are equal or they are
139 * in native architecture, a simple copy is done
140 *
141 * otherwise, architecture conversions are done
142 *
143 */
144
145/* does not return a value */
146#define INT_COPY(dst,src,arch) \
147 ( \
148 ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \
149 ? \
150 (void)((dst) = (src)) \
151 : \
152 INT_SET(dst, arch, INT_GET(src, arch)) \
153 )
154
155/*
156 * INT_XLATE - copy a value in either direction between two locations
157 * with different architectures
158 *
159 * dir < 0 - copy from memory to buffer (native to arch)
160 * dir > 0 - copy from buffer to memory (arch to native)
161 */
162
163/* does not return a value */
164#define INT_XLATE(buf,mem,dir,arch) {\
165 ASSERT(dir); \
166 if (dir>0) { \
167 (mem)=INT_GET(buf, arch); \
168 } else { \
169 INT_SET(buf, arch, mem); \
170 } \
171}
172
173/* 105/*
174 * In directories inode numbers are stored as unaligned arrays of unsigned 106 * In directories inode numbers are stored as unaligned arrays of unsigned
175 * 8bit integers on disk. 107 * 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 78de80e3caa2..f7cdc28aff41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -194,6 +194,46 @@ xfs_attr_get(
194 return(error); 194 return(error);
195} 195}
196 196
197/*
198 * Calculate how many blocks we need for the new attribute,
199 */
200int
201xfs_attr_calc_size(
202 struct xfs_inode *ip,
203 int namelen,
204 int valuelen,
205 int *local)
206{
207 struct xfs_mount *mp = ip->i_mount;
208 int size;
209 int nblks;
210
211 /*
212 * Determine space new attribute will use, and if it would be
213 * "local" or "remote" (note: local != inline).
214 */
215 size = xfs_attr_leaf_newentsize(namelen, valuelen,
216 mp->m_sb.sb_blocksize, local);
217
218 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
219 if (*local) {
220 if (size > (mp->m_sb.sb_blocksize >> 1)) {
221 /* Double split possible */
222 nblks *= 2;
223 }
224 } else {
225 /*
226 * Out of line attribute, cannot double split, but
227 * make room for the attribute value itself.
228 */
229 uint dblocks = XFS_B_TO_FSB(mp, valuelen);
230 nblks += dblocks;
231 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
232 }
233
234 return nblks;
235}
236
197STATIC int 237STATIC int
198xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, 238xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
199 char *value, int valuelen, int flags) 239 char *value, int valuelen, int flags)
@@ -202,10 +242,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
202 xfs_fsblock_t firstblock; 242 xfs_fsblock_t firstblock;
203 xfs_bmap_free_t flist; 243 xfs_bmap_free_t flist;
204 int error, err2, committed; 244 int error, err2, committed;
205 int local, size;
206 uint nblks;
207 xfs_mount_t *mp = dp->i_mount; 245 xfs_mount_t *mp = dp->i_mount;
208 int rsvd = (flags & ATTR_ROOT) != 0; 246 int rsvd = (flags & ATTR_ROOT) != 0;
247 int local;
209 248
210 /* 249 /*
211 * Attach the dquots to the inode. 250 * Attach the dquots to the inode.
@@ -241,30 +280,8 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
241 args.whichfork = XFS_ATTR_FORK; 280 args.whichfork = XFS_ATTR_FORK;
242 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; 281 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
243 282
244 /*
245 * Determine space new attribute will use, and if it would be
246 * "local" or "remote" (note: local != inline).
247 */
248 size = xfs_attr_leaf_newentsize(name->len, valuelen,
249 mp->m_sb.sb_blocksize, &local);
250
251 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
252 if (local) {
253 if (size > (mp->m_sb.sb_blocksize >> 1)) {
254 /* Double split possible */
255 nblks <<= 1;
256 }
257 } else {
258 uint dblocks = XFS_B_TO_FSB(mp, valuelen);
259 /* Out of line attribute, cannot double split, but make
260 * room for the attribute value itself.
261 */
262 nblks += dblocks;
263 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
264 }
265
266 /* Size is now blocks for attribute data */ 283 /* Size is now blocks for attribute data */
267 args.total = nblks; 284 args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local);
268 285
269 /* 286 /*
270 * Start our first transaction of the day. 287 * Start our first transaction of the day.
@@ -286,18 +303,17 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
286 if (rsvd) 303 if (rsvd)
287 args.trans->t_flags |= XFS_TRANS_RESERVE; 304 args.trans->t_flags |= XFS_TRANS_RESERVE;
288 305
289 if ((error = xfs_trans_reserve(args.trans, (uint) nblks, 306 if ((error = xfs_trans_reserve(args.trans, args.total,
290 XFS_ATTRSET_LOG_RES(mp, nblks), 307 XFS_ATTRSET_LOG_RES(mp, args.total), 0,
291 0, XFS_TRANS_PERM_LOG_RES, 308 XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) {
292 XFS_ATTRSET_LOG_COUNT))) {
293 xfs_trans_cancel(args.trans, 0); 309 xfs_trans_cancel(args.trans, 0);
294 return(error); 310 return(error);
295 } 311 }
296 xfs_ilock(dp, XFS_ILOCK_EXCL); 312 xfs_ilock(dp, XFS_ILOCK_EXCL);
297 313
298 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0, 314 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
299 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : 315 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
300 XFS_QMOPT_RES_REGBLKS); 316 XFS_QMOPT_RES_REGBLKS);
301 if (error) { 317 if (error) {
302 xfs_iunlock(dp, XFS_ILOCK_EXCL); 318 xfs_iunlock(dp, XFS_ILOCK_EXCL);
303 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); 319 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
@@ -384,7 +400,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
384 * Commit the leaf transformation. We'll need another (linked) 400 * Commit the leaf transformation. We'll need another (linked)
385 * transaction to add the new attribute to the leaf. 401 * transaction to add the new attribute to the leaf.
386 */ 402 */
387 if ((error = xfs_attr_rolltrans(&args.trans, dp))) 403
404 error = xfs_trans_roll(&args.trans, dp);
405 if (error)
388 goto out; 406 goto out;
389 407
390 } 408 }
@@ -964,7 +982,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
964 * Commit the current trans (including the inode) and start 982 * Commit the current trans (including the inode) and start
965 * a new one. 983 * a new one.
966 */ 984 */
967 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 985 error = xfs_trans_roll(&args->trans, dp);
986 if (error)
968 return (error); 987 return (error);
969 988
970 /* 989 /*
@@ -978,7 +997,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
978 * Commit the transaction that added the attr name so that 997 * Commit the transaction that added the attr name so that
979 * later routines can manage their own transactions. 998 * later routines can manage their own transactions.
980 */ 999 */
981 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1000 error = xfs_trans_roll(&args->trans, dp);
1001 if (error)
982 return (error); 1002 return (error);
983 1003
984 /* 1004 /*
@@ -1067,7 +1087,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1067 /* 1087 /*
1068 * Commit the remove and start the next trans in series. 1088 * Commit the remove and start the next trans in series.
1069 */ 1089 */
1070 error = xfs_attr_rolltrans(&args->trans, dp); 1090 error = xfs_trans_roll(&args->trans, dp);
1071 1091
1072 } else if (args->rmtblkno > 0) { 1092 } else if (args->rmtblkno > 0) {
1073 /* 1093 /*
@@ -1298,7 +1318,8 @@ restart:
1298 * Commit the node conversion and start the next 1318 * Commit the node conversion and start the next
1299 * trans in the chain. 1319 * trans in the chain.
1300 */ 1320 */
1301 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1321 error = xfs_trans_roll(&args->trans, dp);
1322 if (error)
1302 goto out; 1323 goto out;
1303 1324
1304 goto restart; 1325 goto restart;
@@ -1349,7 +1370,8 @@ restart:
1349 * Commit the leaf addition or btree split and start the next 1370 * Commit the leaf addition or btree split and start the next
1350 * trans in the chain. 1371 * trans in the chain.
1351 */ 1372 */
1352 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1373 error = xfs_trans_roll(&args->trans, dp);
1374 if (error)
1353 goto out; 1375 goto out;
1354 1376
1355 /* 1377 /*
@@ -1449,7 +1471,8 @@ restart:
1449 /* 1471 /*
1450 * Commit and start the next trans in the chain. 1472 * Commit and start the next trans in the chain.
1451 */ 1473 */
1452 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1474 error = xfs_trans_roll(&args->trans, dp);
1475 if (error)
1453 goto out; 1476 goto out;
1454 1477
1455 } else if (args->rmtblkno > 0) { 1478 } else if (args->rmtblkno > 0) {
@@ -1581,7 +1604,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1581 /* 1604 /*
1582 * Commit the Btree join operation and start a new trans. 1605 * Commit the Btree join operation and start a new trans.
1583 */ 1606 */
1584 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 1607 error = xfs_trans_roll(&args->trans, dp);
1608 if (error)
1585 goto out; 1609 goto out;
1586 } 1610 }
1587 1611
@@ -2082,7 +2106,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2082 /* 2106 /*
2083 * Start the next trans in the chain. 2107 * Start the next trans in the chain.
2084 */ 2108 */
2085 if ((error = xfs_attr_rolltrans(&args->trans, dp))) 2109 error = xfs_trans_roll(&args->trans, dp);
2110 if (error)
2086 return (error); 2111 return (error);
2087 } 2112 }
2088 2113
@@ -2232,7 +2257,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2232 /* 2257 /*
2233 * Close out trans and start the next one in the chain. 2258 * Close out trans and start the next one in the chain.
2234 */ 2259 */
2235 if ((error = xfs_attr_rolltrans(&args->trans, args->dp))) 2260 error = xfs_trans_roll(&args->trans, args->dp);
2261 if (error)
2236 return (error); 2262 return (error);
2237 } 2263 }
2238 return(0); 2264 return(0);
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 8b2d31c19e4d..fb3b2a68b9b9 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -129,6 +129,7 @@ typedef struct xfs_attr_list_context {
129/* 129/*
130 * Overall external interface routines. 130 * Overall external interface routines.
131 */ 131 */
132int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
132int xfs_attr_inactive(struct xfs_inode *dp); 133int xfs_attr_inactive(struct xfs_inode *dp);
133int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); 134int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
134int xfs_attr_rmtval_get(struct xfs_da_args *args); 135int xfs_attr_rmtval_get(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 23ef5d7c87e1..79da6b2ea99e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -2498,9 +2498,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2498 /* 2498 /*
2499 * Commit the flag value change and start the next trans in series. 2499 * Commit the flag value change and start the next trans in series.
2500 */ 2500 */
2501 error = xfs_attr_rolltrans(&args->trans, args->dp); 2501 return xfs_trans_roll(&args->trans, args->dp);
2502
2503 return(error);
2504} 2502}
2505 2503
2506/* 2504/*
@@ -2547,9 +2545,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
2547 /* 2545 /*
2548 * Commit the flag value change and start the next trans in series. 2546 * Commit the flag value change and start the next trans in series.
2549 */ 2547 */
2550 error = xfs_attr_rolltrans(&args->trans, args->dp); 2548 return xfs_trans_roll(&args->trans, args->dp);
2551
2552 return(error);
2553} 2549}
2554 2550
2555/* 2551/*
@@ -2665,7 +2661,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2665 /* 2661 /*
2666 * Commit the flag value change and start the next trans in series. 2662 * Commit the flag value change and start the next trans in series.
2667 */ 2663 */
2668 error = xfs_attr_rolltrans(&args->trans, args->dp); 2664 error = xfs_trans_roll(&args->trans, args->dp);
2669 2665
2670 return(error); 2666 return(error);
2671} 2667}
@@ -2723,7 +2719,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
2723 /* 2719 /*
2724 * Commit the invalidate and start the next transaction. 2720 * Commit the invalidate and start the next transaction.
2725 */ 2721 */
2726 error = xfs_attr_rolltrans(trans, dp); 2722 error = xfs_trans_roll(trans, dp);
2727 2723
2728 return (error); 2724 return (error);
2729} 2725}
@@ -2825,7 +2821,8 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
2825 /* 2821 /*
2826 * Atomically commit the whole invalidate stuff. 2822 * Atomically commit the whole invalidate stuff.
2827 */ 2823 */
2828 if ((error = xfs_attr_rolltrans(trans, dp))) 2824 error = xfs_trans_roll(trans, dp);
2825 if (error)
2829 return (error); 2826 return (error);
2830 } 2827 }
2831 2828
@@ -2964,7 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2964 /* 2961 /*
2965 * Roll to next transaction. 2962 * Roll to next transaction.
2966 */ 2963 */
2967 if ((error = xfs_attr_rolltrans(trans, dp))) 2964 error = xfs_trans_roll(trans, dp);
2965 if (error)
2968 return (error); 2966 return (error);
2969 } 2967 }
2970 2968
@@ -2974,60 +2972,3 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2974 2972
2975 return(0); 2973 return(0);
2976} 2974}
2977
2978
2979/*
2980 * Roll from one trans in the sequence of PERMANENT transactions to the next.
2981 */
2982int
2983xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp)
2984{
2985 xfs_trans_t *trans;
2986 unsigned int logres, count;
2987 int error;
2988
2989 /*
2990 * Ensure that the inode is always logged.
2991 */
2992 trans = *transp;
2993 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
2994
2995 /*
2996 * Copy the critical parameters from one trans to the next.
2997 */
2998 logres = trans->t_log_res;
2999 count = trans->t_log_count;
3000 *transp = xfs_trans_dup(trans);
3001
3002 /*
3003 * Commit the current transaction.
3004 * If this commit failed, then it'd just unlock those items that
3005 * are not marked ihold. That also means that a filesystem shutdown
3006 * is in progress. The caller takes the responsibility to cancel
3007 * the duplicate transaction that gets returned.
3008 */
3009 if ((error = xfs_trans_commit(trans, 0)))
3010 return (error);
3011
3012 trans = *transp;
3013
3014 /*
3015 * Reserve space in the log for th next transaction.
3016 * This also pushes items in the "AIL", the list of logged items,
3017 * out to disk if they are taking up space at the tail of the log
3018 * that we want to use. This requires that either nothing be locked
3019 * across this call, or that anything that is locked be logged in
3020 * the prior and the next transactions.
3021 */
3022 error = xfs_trans_reserve(trans, 0, logres, 0,
3023 XFS_TRANS_PERM_LOG_RES, count);
3024 /*
3025 * Ensure that the inode is in the new transaction and locked.
3026 */
3027 if (!error) {
3028 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
3029 xfs_trans_ihold(trans, dp);
3030 }
3031 return (error);
3032
3033}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 5ecf437b7825..83e9af417ca2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -274,6 +274,4 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
274 struct xfs_dabuf *leaf2_bp); 274 struct xfs_dabuf *leaf2_bp);
275int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, 275int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
276 int *local); 276 int *local);
277int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
278
279#endif /* __XFS_ATTR_LEAF_H__ */ 277#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index fab0b6d5a41b..48228848f5ae 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -25,109 +25,6 @@
25 * XFS bit manipulation routines, used in non-realtime code. 25 * XFS bit manipulation routines, used in non-realtime code.
26 */ 26 */
27 27
28#ifndef HAVE_ARCH_HIGHBIT
29/*
30 * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
31 */
32static const char xfs_highbit[256] = {
33 -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */
34 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */
35 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */
36 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */
37 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */
38 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */
39 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */
40 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */
41 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */
42 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */
43 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */
44 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */
45 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */
46 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */
47 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */
48 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */
49 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */
50 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */
51 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */
52 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */
53 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */
54 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */
55 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */
56 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */
57 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */
58 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */
59 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */
60 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */
61 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */
62 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */
63 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */
64 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */
65};
66#endif
67
68/*
69 * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
70 */
71inline int
72xfs_highbit32(
73 __uint32_t v)
74{
75#ifdef HAVE_ARCH_HIGHBIT
76 return highbit32(v);
77#else
78 int i;
79
80 if (v & 0xffff0000)
81 if (v & 0xff000000)
82 i = 24;
83 else
84 i = 16;
85 else if (v & 0x0000ffff)
86 if (v & 0x0000ff00)
87 i = 8;
88 else
89 i = 0;
90 else
91 return -1;
92 return i + xfs_highbit[(v >> i) & 0xff];
93#endif
94}
95
96/*
97 * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
98 */
99int
100xfs_lowbit64(
101 __uint64_t v)
102{
103 __uint32_t w = (__uint32_t)v;
104 int n = 0;
105
106 if (w) { /* lower bits */
107 n = ffs(w);
108 } else { /* upper bits */
109 w = (__uint32_t)(v >> 32);
110 if (w && (n = ffs(w)))
111 n += 32;
112 }
113 return n - 1;
114}
115
116/*
117 * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
118 */
119int
120xfs_highbit64(
121 __uint64_t v)
122{
123 __uint32_t h = (__uint32_t)(v >> 32);
124
125 if (h)
126 return xfs_highbit32(h) + 32;
127 return xfs_highbit32((__uint32_t)v);
128}
129
130
131/* 28/*
132 * Return whether bitmap is empty. 29 * Return whether bitmap is empty.
133 * Size is number of words in the bitmap, which is padded to word boundary 30 * Size is number of words in the bitmap, which is padded to word boundary
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 082641a9782c..8e0e463dae2d 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -47,13 +47,39 @@ static inline __uint64_t xfs_mask64lo(int n)
47} 47}
48 48
49/* Get high bit set out of 32-bit argument, -1 if none set */ 49/* Get high bit set out of 32-bit argument, -1 if none set */
50extern int xfs_highbit32(__uint32_t v); 50static inline int xfs_highbit32(__uint32_t v)
51{
52 return fls(v) - 1;
53}
54
55/* Get high bit set out of 64-bit argument, -1 if none set */
56static inline int xfs_highbit64(__uint64_t v)
57{
58 return fls64(v) - 1;
59}
60
61/* Get low bit set out of 32-bit argument, -1 if none set */
62static inline int xfs_lowbit32(__uint32_t v)
63{
64 unsigned long t = v;
65 return (v) ? find_first_bit(&t, 32) : -1;
66}
51 67
52/* Get low bit set out of 64-bit argument, -1 if none set */ 68/* Get low bit set out of 64-bit argument, -1 if none set */
53extern int xfs_lowbit64(__uint64_t v); 69static inline int xfs_lowbit64(__uint64_t v)
70{
71 __uint32_t w = (__uint32_t)v;
72 int n = 0;
54 73
55/* Get high bit set out of 64-bit argument, -1 if none set */ 74 if (w) { /* lower bits */
56extern int xfs_highbit64(__uint64_t); 75 n = ffs(w);
76 } else { /* upper bits */
77 w = (__uint32_t)(v >> 32);
78 if (w && (n = ffs(w)))
79 n += 32;
80 }
81 return n - 1;
82}
57 83
58/* Return whether bitmap is empty (1 == empty) */ 84/* Return whether bitmap is empty (1 == empty) */
59extern int xfs_bitmap_empty(uint *map, uint size); 85extern int xfs_bitmap_empty(uint *map, uint size);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3c4beb3a4326..a1aab9275d5a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -384,14 +384,14 @@ xfs_bmap_count_tree(
384 int levelin, 384 int levelin,
385 int *count); 385 int *count);
386 386
387STATIC int 387STATIC void
388xfs_bmap_count_leaves( 388xfs_bmap_count_leaves(
389 xfs_ifork_t *ifp, 389 xfs_ifork_t *ifp,
390 xfs_extnum_t idx, 390 xfs_extnum_t idx,
391 int numrecs, 391 int numrecs,
392 int *count); 392 int *count);
393 393
394STATIC int 394STATIC void
395xfs_bmap_disk_count_leaves( 395xfs_bmap_disk_count_leaves(
396 xfs_extnum_t idx, 396 xfs_extnum_t idx,
397 xfs_bmbt_block_t *block, 397 xfs_bmbt_block_t *block,
@@ -4000,7 +4000,7 @@ xfs_bmap_add_attrfork(
4000 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 4000 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
4001 } 4001 }
4002 ASSERT(ip->i_d.di_anextents == 0); 4002 ASSERT(ip->i_d.di_anextents == 0);
4003 VN_HOLD(XFS_ITOV(ip)); 4003 IHOLD(ip);
4004 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4004 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
4005 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4005 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4006 switch (ip->i_d.di_format) { 4006 switch (ip->i_d.di_format) {
@@ -6096,7 +6096,7 @@ xfs_bmap_get_bp(
6096 tp = cur->bc_tp; 6096 tp = cur->bc_tp;
6097 licp = &tp->t_items; 6097 licp = &tp->t_items;
6098 while (!bp && licp != NULL) { 6098 while (!bp && licp != NULL) {
6099 if (XFS_LIC_ARE_ALL_FREE(licp)) { 6099 if (xfs_lic_are_all_free(licp)) {
6100 licp = licp->lic_next; 6100 licp = licp->lic_next;
6101 continue; 6101 continue;
6102 } 6102 }
@@ -6106,11 +6106,11 @@ xfs_bmap_get_bp(
6106 xfs_buf_log_item_t *bip; 6106 xfs_buf_log_item_t *bip;
6107 xfs_buf_t *lbp; 6107 xfs_buf_t *lbp;
6108 6108
6109 if (XFS_LIC_ISFREE(licp, i)) { 6109 if (xfs_lic_isfree(licp, i)) {
6110 continue; 6110 continue;
6111 } 6111 }
6112 6112
6113 lidp = XFS_LIC_SLOT(licp, i); 6113 lidp = xfs_lic_slot(licp, i);
6114 lip = lidp->lid_item; 6114 lip = lidp->lid_item;
6115 if (lip->li_type != XFS_LI_BUF) 6115 if (lip->li_type != XFS_LI_BUF)
6116 continue; 6116 continue;
@@ -6367,13 +6367,9 @@ xfs_bmap_count_blocks(
6367 mp = ip->i_mount; 6367 mp = ip->i_mount;
6368 ifp = XFS_IFORK_PTR(ip, whichfork); 6368 ifp = XFS_IFORK_PTR(ip, whichfork);
6369 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { 6369 if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
6370 if (unlikely(xfs_bmap_count_leaves(ifp, 0, 6370 xfs_bmap_count_leaves(ifp, 0,
6371 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), 6371 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
6372 count) < 0)) { 6372 count);
6373 XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)",
6374 XFS_ERRLEVEL_LOW, mp);
6375 return XFS_ERROR(EFSCORRUPTED);
6376 }
6377 return 0; 6373 return 0;
6378 } 6374 }
6379 6375
@@ -6454,13 +6450,7 @@ xfs_bmap_count_tree(
6454 for (;;) { 6450 for (;;) {
6455 nextbno = be64_to_cpu(block->bb_rightsib); 6451 nextbno = be64_to_cpu(block->bb_rightsib);
6456 numrecs = be16_to_cpu(block->bb_numrecs); 6452 numrecs = be16_to_cpu(block->bb_numrecs);
6457 if (unlikely(xfs_bmap_disk_count_leaves(0, 6453 xfs_bmap_disk_count_leaves(0, block, numrecs, count);
6458 block, numrecs, count) < 0)) {
6459 xfs_trans_brelse(tp, bp);
6460 XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
6461 XFS_ERRLEVEL_LOW, mp);
6462 return XFS_ERROR(EFSCORRUPTED);
6463 }
6464 xfs_trans_brelse(tp, bp); 6454 xfs_trans_brelse(tp, bp);
6465 if (nextbno == NULLFSBLOCK) 6455 if (nextbno == NULLFSBLOCK)
6466 break; 6456 break;
@@ -6478,7 +6468,7 @@ xfs_bmap_count_tree(
6478/* 6468/*
6479 * Count leaf blocks given a range of extent records. 6469 * Count leaf blocks given a range of extent records.
6480 */ 6470 */
6481STATIC int 6471STATIC void
6482xfs_bmap_count_leaves( 6472xfs_bmap_count_leaves(
6483 xfs_ifork_t *ifp, 6473 xfs_ifork_t *ifp,
6484 xfs_extnum_t idx, 6474 xfs_extnum_t idx,
@@ -6491,14 +6481,13 @@ xfs_bmap_count_leaves(
6491 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); 6481 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
6492 *count += xfs_bmbt_get_blockcount(frp); 6482 *count += xfs_bmbt_get_blockcount(frp);
6493 } 6483 }
6494 return 0;
6495} 6484}
6496 6485
6497/* 6486/*
6498 * Count leaf blocks given a range of extent records originally 6487 * Count leaf blocks given a range of extent records originally
6499 * in btree format. 6488 * in btree format.
6500 */ 6489 */
6501STATIC int 6490STATIC void
6502xfs_bmap_disk_count_leaves( 6491xfs_bmap_disk_count_leaves(
6503 xfs_extnum_t idx, 6492 xfs_extnum_t idx,
6504 xfs_bmbt_block_t *block, 6493 xfs_bmbt_block_t *block,
@@ -6512,5 +6501,4 @@ xfs_bmap_disk_count_leaves(
6512 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); 6501 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);
6513 *count += xfs_bmbt_disk_get_blockcount(frp); 6502 *count += xfs_bmbt_disk_get_blockcount(frp);
6514 } 6503 }
6515 return 0;
6516} 6504}
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index aeb87ca69fcc..cc593a84c345 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -46,38 +46,11 @@ kmem_zone_t *xfs_btree_cur_zone;
46/* 46/*
47 * Btree magic numbers. 47 * Btree magic numbers.
48 */ 48 */
49const __uint32_t xfs_magics[XFS_BTNUM_MAX] = 49const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50{
51 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC 50 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
52}; 51};
53 52
54/* 53/*
55 * Prototypes for internal routines.
56 */
57
58/*
59 * Checking routine: return maxrecs for the block.
60 */
61STATIC int /* number of records fitting in block */
62xfs_btree_maxrecs(
63 xfs_btree_cur_t *cur, /* btree cursor */
64 xfs_btree_block_t *block);/* generic btree block pointer */
65
66/*
67 * Internal routines.
68 */
69
70/*
71 * Retrieve the block pointer from the cursor at the given level.
72 * This may be a bmap btree root or from a buffer.
73 */
74STATIC xfs_btree_block_t * /* generic btree block pointer */
75xfs_btree_get_block(
76 xfs_btree_cur_t *cur, /* btree cursor */
77 int level, /* level in btree */
78 struct xfs_buf **bpp); /* buffer containing the block */
79
80/*
81 * Checking routine: return maxrecs for the block. 54 * Checking routine: return maxrecs for the block.
82 */ 55 */
83STATIC int /* number of records fitting in block */ 56STATIC int /* number of records fitting in block */
@@ -457,35 +430,6 @@ xfs_btree_dup_cursor(
457} 430}
458 431
459/* 432/*
460 * Change the cursor to point to the first record at the given level.
461 * Other levels are unaffected.
462 */
463int /* success=1, failure=0 */
464xfs_btree_firstrec(
465 xfs_btree_cur_t *cur, /* btree cursor */
466 int level) /* level to change */
467{
468 xfs_btree_block_t *block; /* generic btree block pointer */
469 xfs_buf_t *bp; /* buffer containing block */
470
471 /*
472 * Get the block pointer for this level.
473 */
474 block = xfs_btree_get_block(cur, level, &bp);
475 xfs_btree_check_block(cur, block, level, bp);
476 /*
477 * It's empty, there is no such record.
478 */
479 if (!block->bb_h.bb_numrecs)
480 return 0;
481 /*
482 * Set the ptr value to 1, that's the first record/key.
483 */
484 cur->bc_ptrs[level] = 1;
485 return 1;
486}
487
488/*
489 * Retrieve the block pointer from the cursor at the given level. 433 * Retrieve the block pointer from the cursor at the given level.
490 * This may be a bmap btree root or from a buffer. 434 * This may be a bmap btree root or from a buffer.
491 */ 435 */
@@ -626,6 +570,13 @@ xfs_btree_init_cursor(
626 cur->bc_private.a.agbp = agbp; 570 cur->bc_private.a.agbp = agbp;
627 cur->bc_private.a.agno = agno; 571 cur->bc_private.a.agno = agno;
628 break; 572 break;
573 case XFS_BTNUM_INO:
574 /*
575 * Inode allocation btree fields.
576 */
577 cur->bc_private.a.agbp = agbp;
578 cur->bc_private.a.agno = agno;
579 break;
629 case XFS_BTNUM_BMAP: 580 case XFS_BTNUM_BMAP:
630 /* 581 /*
631 * Bmap btree fields. 582 * Bmap btree fields.
@@ -638,13 +589,6 @@ xfs_btree_init_cursor(
638 cur->bc_private.b.flags = 0; 589 cur->bc_private.b.flags = 0;
639 cur->bc_private.b.whichfork = whichfork; 590 cur->bc_private.b.whichfork = whichfork;
640 break; 591 break;
641 case XFS_BTNUM_INO:
642 /*
643 * Inode allocation btree fields.
644 */
645 cur->bc_private.i.agbp = agbp;
646 cur->bc_private.i.agno = agno;
647 break;
648 default: 592 default:
649 ASSERT(0); 593 ASSERT(0);
650 } 594 }
@@ -671,6 +615,35 @@ xfs_btree_islastblock(
671} 615}
672 616
673/* 617/*
618 * Change the cursor to point to the first record at the given level.
619 * Other levels are unaffected.
620 */
621int /* success=1, failure=0 */
622xfs_btree_firstrec(
623 xfs_btree_cur_t *cur, /* btree cursor */
624 int level) /* level to change */
625{
626 xfs_btree_block_t *block; /* generic btree block pointer */
627 xfs_buf_t *bp; /* buffer containing block */
628
629 /*
630 * Get the block pointer for this level.
631 */
632 block = xfs_btree_get_block(cur, level, &bp);
633 xfs_btree_check_block(cur, block, level, bp);
634 /*
635 * It's empty, there is no such record.
636 */
637 if (!block->bb_h.bb_numrecs)
638 return 0;
639 /*
640 * Set the ptr value to 1, that's the first record/key.
641 */
642 cur->bc_ptrs[level] = 1;
643 return 1;
644}
645
646/*
674 * Change the cursor to point to the last record in the current block 647 * Change the cursor to point to the last record in the current block
675 * at the given level. Other levels are unaffected. 648 * at the given level. Other levels are unaffected.
676 */ 649 */
@@ -890,12 +863,12 @@ xfs_btree_readahead_core(
890 case XFS_BTNUM_INO: 863 case XFS_BTNUM_INO:
891 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); 864 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
892 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { 865 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
893 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, 866 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
894 be32_to_cpu(i->bb_leftsib), 1); 867 be32_to_cpu(i->bb_leftsib), 1);
895 rval++; 868 rval++;
896 } 869 }
897 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { 870 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
898 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, 871 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
899 be32_to_cpu(i->bb_rightsib), 1); 872 be32_to_cpu(i->bb_rightsib), 1);
900 rval++; 873 rval++;
901 } 874 }
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 7440b78f9cec..1f528a2a3754 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -158,8 +158,8 @@ typedef struct xfs_btree_cur
158 __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ 158 __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
159 xfs_btnum_t bc_btnum; /* identifies which btree type */ 159 xfs_btnum_t bc_btnum; /* identifies which btree type */
160 union { 160 union {
161 struct { /* needed for BNO, CNT */ 161 struct { /* needed for BNO, CNT, INO */
162 struct xfs_buf *agbp; /* agf buffer pointer */ 162 struct xfs_buf *agbp; /* agf/agi buffer pointer */
163 xfs_agnumber_t agno; /* ag number */ 163 xfs_agnumber_t agno; /* ag number */
164 } a; 164 } a;
165 struct { /* needed for BMAP */ 165 struct { /* needed for BMAP */
@@ -172,10 +172,6 @@ typedef struct xfs_btree_cur
172 char flags; /* flags */ 172 char flags; /* flags */
173#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ 173#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */
174 } b; 174 } b;
175 struct { /* needed for INO */
176 struct xfs_buf *agbp; /* agi buffer pointer */
177 xfs_agnumber_t agno; /* ag number */
178 } i;
179 } bc_private; /* per-btree type data */ 175 } bc_private; /* per-btree type data */
180} xfs_btree_cur_t; 176} xfs_btree_cur_t;
181 177
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index d86ca2c03a70..608c30c3f76b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -737,7 +737,7 @@ xfs_buf_item_init(
737 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); 737 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
738 bip->bli_format.blf_map_size = map_size; 738 bip->bli_format.blf_map_size = map_size;
739#ifdef XFS_BLI_TRACE 739#ifdef XFS_BLI_TRACE
740 bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_SLEEP); 740 bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);
741#endif 741#endif
742 742
743#ifdef XFS_TRANS_DEBUG 743#ifdef XFS_TRANS_DEBUG
@@ -1056,7 +1056,7 @@ xfs_buf_iodone_callbacks(
1056 anyway. */ 1056 anyway. */
1057 XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); 1057 XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);
1058 XFS_BUF_DONE(bp); 1058 XFS_BUF_DONE(bp);
1059 XFS_BUF_V_IODONESEMA(bp); 1059 XFS_BUF_FINISH_IOWAIT(bp);
1060 } 1060 }
1061 return; 1061 return;
1062 } 1062 }
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 2211e885ef24..760f4c5b5160 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -128,10 +128,8 @@ xfs_swap_extents(
128 xfs_swapext_t *sxp) 128 xfs_swapext_t *sxp)
129{ 129{
130 xfs_mount_t *mp; 130 xfs_mount_t *mp;
131 xfs_inode_t *ips[2];
132 xfs_trans_t *tp; 131 xfs_trans_t *tp;
133 xfs_bstat_t *sbp = &sxp->sx_stat; 132 xfs_bstat_t *sbp = &sxp->sx_stat;
134 bhv_vnode_t *vp, *tvp;
135 xfs_ifork_t *tempifp, *ifp, *tifp; 133 xfs_ifork_t *tempifp, *ifp, *tifp;
136 int ilf_fields, tilf_fields; 134 int ilf_fields, tilf_fields;
137 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 135 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
@@ -150,19 +148,8 @@ xfs_swap_extents(
150 } 148 }
151 149
152 sbp = &sxp->sx_stat; 150 sbp = &sxp->sx_stat;
153 vp = XFS_ITOV(ip);
154 tvp = XFS_ITOV(tip);
155
156 /* Lock in i_ino order */
157 if (ip->i_ino < tip->i_ino) {
158 ips[0] = ip;
159 ips[1] = tip;
160 } else {
161 ips[0] = tip;
162 ips[1] = ip;
163 }
164 151
165 xfs_lock_inodes(ips, 2, lock_flags); 152 xfs_lock_two_inodes(ip, tip, lock_flags);
166 locked = 1; 153 locked = 1;
167 154
168 /* Verify that both files have the same format */ 155 /* Verify that both files have the same format */
@@ -184,7 +171,7 @@ xfs_swap_extents(
184 goto error0; 171 goto error0;
185 } 172 }
186 173
187 if (VN_CACHED(tvp) != 0) { 174 if (VN_CACHED(VFS_I(tip)) != 0) {
188 xfs_inval_cached_trace(tip, 0, -1, 0, -1); 175 xfs_inval_cached_trace(tip, 0, -1, 0, -1);
189 error = xfs_flushinval_pages(tip, 0, -1, 176 error = xfs_flushinval_pages(tip, 0, -1,
190 FI_REMAPF_LOCKED); 177 FI_REMAPF_LOCKED);
@@ -193,7 +180,7 @@ xfs_swap_extents(
193 } 180 }
194 181
195 /* Verify O_DIRECT for ftmp */ 182 /* Verify O_DIRECT for ftmp */
196 if (VN_CACHED(tvp) != 0) { 183 if (VN_CACHED(VFS_I(tip)) != 0) {
197 error = XFS_ERROR(EINVAL); 184 error = XFS_ERROR(EINVAL);
198 goto error0; 185 goto error0;
199 } 186 }
@@ -237,7 +224,7 @@ xfs_swap_extents(
237 * vop_read (or write in the case of autogrow) they block on the iolock 224 * vop_read (or write in the case of autogrow) they block on the iolock
238 * until we have switched the extents. 225 * until we have switched the extents.
239 */ 226 */
240 if (VN_MAPPED(vp)) { 227 if (VN_MAPPED(VFS_I(ip))) {
241 error = XFS_ERROR(EBUSY); 228 error = XFS_ERROR(EBUSY);
242 goto error0; 229 goto error0;
243 } 230 }
@@ -265,7 +252,7 @@ xfs_swap_extents(
265 locked = 0; 252 locked = 0;
266 goto error0; 253 goto error0;
267 } 254 }
268 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); 255 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
269 256
270 /* 257 /*
271 * Count the number of extended attribute blocks 258 * Count the number of extended attribute blocks
@@ -350,15 +337,11 @@ xfs_swap_extents(
350 break; 337 break;
351 } 338 }
352 339
353 /*
354 * Increment vnode ref counts since xfs_trans_commit &
355 * xfs_trans_cancel will both unlock the inodes and
356 * decrement the associated ref counts.
357 */
358 VN_HOLD(vp);
359 VN_HOLD(tvp);
360 340
341 IHOLD(ip);
361 xfs_trans_ijoin(tp, ip, lock_flags); 342 xfs_trans_ijoin(tp, ip, lock_flags);
343
344 IHOLD(tip);
362 xfs_trans_ijoin(tp, tip, lock_flags); 345 xfs_trans_ijoin(tp, tip, lock_flags);
363 346
364 xfs_trans_log_inode(tp, ip, ilf_fields); 347 xfs_trans_log_inode(tp, ip, ilf_fields);
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index cdc2d3464a1a..2813cdd72375 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -18,7 +18,6 @@
18#ifndef __XFS_DMAPI_H__ 18#ifndef __XFS_DMAPI_H__
19#define __XFS_DMAPI_H__ 19#define __XFS_DMAPI_H__
20 20
21#include <linux/version.h>
22/* Values used to define the on-disk version of dm_attrname_t. All 21/* Values used to define the on-disk version of dm_attrname_t. All
23 * on-disk attribute names start with the 8-byte string "SGI_DMI_". 22 * on-disk attribute names start with the 8-byte string "SGI_DMI_".
24 * 23 *
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index f66756cfb5e8..f227ecd1a294 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,9 +58,6 @@ xfs_error_trap(int e)
58 } 58 }
59 return e; 59 return e;
60} 60}
61#endif
62
63#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
64 61
65int xfs_etest[XFS_NUM_INJECT_ERROR]; 62int xfs_etest[XFS_NUM_INJECT_ERROR];
66int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; 63int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
@@ -154,7 +151,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
154 151
155 return 0; 152 return 0;
156} 153}
157#endif /* DEBUG || INDUCE_IO_ERROR */ 154#endif /* DEBUG */
158 155
159static void 156static void
160xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) 157xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index d8559d132efa..11543f10b0c6 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -125,22 +125,14 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
125#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) 125#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
126#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 126#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
127 127
128#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) 128#ifdef DEBUG
129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); 129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
130 130
131#define XFS_NUM_INJECT_ERROR 10 131#define XFS_NUM_INJECT_ERROR 10
132
133#ifdef __ANSI_CPP__
134#define XFS_TEST_ERROR(expr, mp, tag, rf) \
135 ((expr) || \
136 xfs_error_test((tag), (mp)->m_fixedfsid, #expr, __LINE__, __FILE__, \
137 (rf)))
138#else
139#define XFS_TEST_ERROR(expr, mp, tag, rf) \ 132#define XFS_TEST_ERROR(expr, mp, tag, rf) \
140 ((expr) || \ 133 ((expr) || \
141 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ 134 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
142 (rf))) 135 (rf)))
143#endif /* __ANSI_CPP__ */
144 136
145extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); 137extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
146extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); 138extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
@@ -148,7 +140,7 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
148#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) 140#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr)
149#define xfs_errortag_add(tag, mp) (ENOSYS) 141#define xfs_errortag_add(tag, mp) (ENOSYS)
150#define xfs_errortag_clearall(mp, loud) (ENOSYS) 142#define xfs_errortag_clearall(mp, loud) (ENOSYS)
151#endif /* (DEBUG || INDUCE_IO_ERROR) */ 143#endif /* DEBUG */
152 144
153/* 145/*
154 * XFS panic tags -- allow a call to xfs_cmn_err() be turned into 146 * XFS panic tags -- allow a call to xfs_cmn_err() be turned into
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index c38fd14fca29..f3bb75da384e 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -400,7 +400,7 @@ xfs_filestream_init(void)
400 if (!item_zone) 400 if (!item_zone)
401 return -ENOMEM; 401 return -ENOMEM;
402#ifdef XFS_FILESTREAMS_TRACE 402#ifdef XFS_FILESTREAMS_TRACE
403 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); 403 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);
404#endif 404#endif
405 return 0; 405 return 0;
406} 406}
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index e5310c90e50f..83502f3edef0 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -181,7 +181,7 @@ xfs_inobt_delrec(
181 * then we can get rid of this level. 181 * then we can get rid of this level.
182 */ 182 */
183 if (numrecs == 1 && level > 0) { 183 if (numrecs == 1 && level > 0) {
184 agbp = cur->bc_private.i.agbp; 184 agbp = cur->bc_private.a.agbp;
185 agi = XFS_BUF_TO_AGI(agbp); 185 agi = XFS_BUF_TO_AGI(agbp);
186 /* 186 /*
187 * pp is still set to the first pointer in the block. 187 * pp is still set to the first pointer in the block.
@@ -194,7 +194,7 @@ xfs_inobt_delrec(
194 * Free the block. 194 * Free the block.
195 */ 195 */
196 if ((error = xfs_free_extent(cur->bc_tp, 196 if ((error = xfs_free_extent(cur->bc_tp,
197 XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1))) 197 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
198 return error; 198 return error;
199 xfs_trans_binval(cur->bc_tp, bp); 199 xfs_trans_binval(cur->bc_tp, bp);
200 xfs_ialloc_log_agi(cur->bc_tp, agbp, 200 xfs_ialloc_log_agi(cur->bc_tp, agbp,
@@ -379,7 +379,7 @@ xfs_inobt_delrec(
379 rrecs = be16_to_cpu(right->bb_numrecs); 379 rrecs = be16_to_cpu(right->bb_numrecs);
380 rbp = bp; 380 rbp = bp;
381 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 381 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
382 cur->bc_private.i.agno, lbno, 0, &lbp, 382 cur->bc_private.a.agno, lbno, 0, &lbp,
383 XFS_INO_BTREE_REF))) 383 XFS_INO_BTREE_REF)))
384 return error; 384 return error;
385 left = XFS_BUF_TO_INOBT_BLOCK(lbp); 385 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
@@ -401,7 +401,7 @@ xfs_inobt_delrec(
401 lrecs = be16_to_cpu(left->bb_numrecs); 401 lrecs = be16_to_cpu(left->bb_numrecs);
402 lbp = bp; 402 lbp = bp;
403 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 403 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
404 cur->bc_private.i.agno, rbno, 0, &rbp, 404 cur->bc_private.a.agno, rbno, 0, &rbp,
405 XFS_INO_BTREE_REF))) 405 XFS_INO_BTREE_REF)))
406 return error; 406 return error;
407 right = XFS_BUF_TO_INOBT_BLOCK(rbp); 407 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
@@ -484,7 +484,7 @@ xfs_inobt_delrec(
484 xfs_buf_t *rrbp; 484 xfs_buf_t *rrbp;
485 485
486 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 486 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
487 cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0, 487 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
488 &rrbp, XFS_INO_BTREE_REF))) 488 &rrbp, XFS_INO_BTREE_REF)))
489 return error; 489 return error;
490 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); 490 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
@@ -497,7 +497,7 @@ xfs_inobt_delrec(
497 * Free the deleting block. 497 * Free the deleting block.
498 */ 498 */
499 if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, 499 if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
500 cur->bc_private.i.agno, rbno), 1))) 500 cur->bc_private.a.agno, rbno), 1)))
501 return error; 501 return error;
502 xfs_trans_binval(cur->bc_tp, rbp); 502 xfs_trans_binval(cur->bc_tp, rbp);
503 /* 503 /*
@@ -854,7 +854,7 @@ xfs_inobt_lookup(
854 { 854 {
855 xfs_agi_t *agi; /* a.g. inode header */ 855 xfs_agi_t *agi; /* a.g. inode header */
856 856
857 agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); 857 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
858 agno = be32_to_cpu(agi->agi_seqno); 858 agno = be32_to_cpu(agi->agi_seqno);
859 agbno = be32_to_cpu(agi->agi_root); 859 agbno = be32_to_cpu(agi->agi_root);
860 } 860 }
@@ -1089,7 +1089,7 @@ xfs_inobt_lshift(
1089 * Set up the left neighbor as "left". 1089 * Set up the left neighbor as "left".
1090 */ 1090 */
1091 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1091 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1092 cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib), 1092 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1093 0, &lbp, XFS_INO_BTREE_REF))) 1093 0, &lbp, XFS_INO_BTREE_REF)))
1094 return error; 1094 return error;
1095 left = XFS_BUF_TO_INOBT_BLOCK(lbp); 1095 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
@@ -1207,10 +1207,10 @@ xfs_inobt_newroot(
1207 /* 1207 /*
1208 * Get a block & a buffer. 1208 * Get a block & a buffer.
1209 */ 1209 */
1210 agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); 1210 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
1211 args.tp = cur->bc_tp; 1211 args.tp = cur->bc_tp;
1212 args.mp = cur->bc_mp; 1212 args.mp = cur->bc_mp;
1213 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, 1213 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno,
1214 be32_to_cpu(agi->agi_root)); 1214 be32_to_cpu(agi->agi_root));
1215 args.mod = args.minleft = args.alignment = args.total = args.wasdel = 1215 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1216 args.isfl = args.userdata = args.minalignslop = 0; 1216 args.isfl = args.userdata = args.minalignslop = 0;
@@ -1233,7 +1233,7 @@ xfs_inobt_newroot(
1233 */ 1233 */
1234 agi->agi_root = cpu_to_be32(args.agbno); 1234 agi->agi_root = cpu_to_be32(args.agbno);
1235 be32_add_cpu(&agi->agi_level, 1); 1235 be32_add_cpu(&agi->agi_level, 1);
1236 xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, 1236 xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
1237 XFS_AGI_ROOT | XFS_AGI_LEVEL); 1237 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1238 /* 1238 /*
1239 * At the previous root level there are now two blocks: the old 1239 * At the previous root level there are now two blocks: the old
@@ -1376,7 +1376,7 @@ xfs_inobt_rshift(
1376 * Set up the right neighbor as "right". 1376 * Set up the right neighbor as "right".
1377 */ 1377 */
1378 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1378 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1379 cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 1379 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1380 0, &rbp, XFS_INO_BTREE_REF))) 1380 0, &rbp, XFS_INO_BTREE_REF)))
1381 return error; 1381 return error;
1382 right = XFS_BUF_TO_INOBT_BLOCK(rbp); 1382 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
@@ -1492,7 +1492,7 @@ xfs_inobt_split(
1492 * Allocate the new block. 1492 * Allocate the new block.
1493 * If we can't do it, we're toast. Give up. 1493 * If we can't do it, we're toast. Give up.
1494 */ 1494 */
1495 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno); 1495 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
1496 args.mod = args.minleft = args.alignment = args.total = args.wasdel = 1496 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1497 args.isfl = args.userdata = args.minalignslop = 0; 1497 args.isfl = args.userdata = args.minalignslop = 0;
1498 args.minlen = args.maxlen = args.prod = 1; 1498 args.minlen = args.maxlen = args.prod = 1;
@@ -1725,7 +1725,7 @@ xfs_inobt_decrement(
1725 1725
1726 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 1726 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1727 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1727 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1728 cur->bc_private.i.agno, agbno, 0, &bp, 1728 cur->bc_private.a.agno, agbno, 0, &bp,
1729 XFS_INO_BTREE_REF))) 1729 XFS_INO_BTREE_REF)))
1730 return error; 1730 return error;
1731 lev--; 1731 lev--;
@@ -1897,7 +1897,7 @@ xfs_inobt_increment(
1897 1897
1898 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 1898 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1899 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 1899 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1900 cur->bc_private.i.agno, agbno, 0, &bp, 1900 cur->bc_private.a.agno, agbno, 0, &bp,
1901 XFS_INO_BTREE_REF))) 1901 XFS_INO_BTREE_REF)))
1902 return error; 1902 return error;
1903 lev--; 1903 lev--;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b07604b94d9f..e229e9e001c2 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -216,7 +216,14 @@ finish_inode:
216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
217 init_waitqueue_head(&ip->i_ipin_wait); 217 init_waitqueue_head(&ip->i_ipin_wait);
218 atomic_set(&ip->i_pincount, 0); 218 atomic_set(&ip->i_pincount, 0);
219 initnsema(&ip->i_flock, 1, "xfsfino"); 219
220 /*
221 * Because we want to use a counting completion, complete
222 * the flush completion once to allow a single access to
223 * the flush completion without blocking.
224 */
225 init_completion(&ip->i_flush);
226 complete(&ip->i_flush);
220 227
221 if (lock_flags) 228 if (lock_flags)
222 xfs_ilock(ip, lock_flags); 229 xfs_ilock(ip, lock_flags);
@@ -288,10 +295,17 @@ finish_inode:
288 *ipp = ip; 295 *ipp = ip;
289 296
290 /* 297 /*
298 * Set up the Linux with the Linux inode.
299 */
300 ip->i_vnode = inode;
301 inode->i_private = ip;
302
303 /*
291 * If we have a real type for an on-disk inode, we can set ops(&unlock) 304 * If we have a real type for an on-disk inode, we can set ops(&unlock)
292 * now. If it's a new inode being created, xfs_ialloc will handle it. 305 * now. If it's a new inode being created, xfs_ialloc will handle it.
293 */ 306 */
294 xfs_initialize_vnode(mp, inode, ip); 307 if (ip->i_d.di_mode != 0)
308 xfs_setup_inode(ip);
295 return 0; 309 return 0;
296} 310}
297 311
@@ -411,10 +425,11 @@ xfs_iput(xfs_inode_t *ip,
411 * Special iput for brand-new inodes that are still locked 425 * Special iput for brand-new inodes that are still locked
412 */ 426 */
413void 427void
414xfs_iput_new(xfs_inode_t *ip, 428xfs_iput_new(
415 uint lock_flags) 429 xfs_inode_t *ip,
430 uint lock_flags)
416{ 431{
417 struct inode *inode = ip->i_vnode; 432 struct inode *inode = VFS_I(ip);
418 433
419 xfs_itrace_entry(ip); 434 xfs_itrace_entry(ip);
420 435
@@ -775,26 +790,3 @@ xfs_isilocked(
775} 790}
776#endif 791#endif
777 792
778/*
779 * The following three routines simply manage the i_flock
780 * semaphore embedded in the inode. This semaphore synchronizes
781 * processes attempting to flush the in-core inode back to disk.
782 */
783void
784xfs_iflock(xfs_inode_t *ip)
785{
786 psema(&(ip->i_flock), PINOD|PLTWAIT);
787}
788
789int
790xfs_iflock_nowait(xfs_inode_t *ip)
791{
792 return (cpsema(&(ip->i_flock)));
793}
794
795void
796xfs_ifunlock(xfs_inode_t *ip)
797{
798 ASSERT(issemalocked(&(ip->i_flock)));
799 vsema(&(ip->i_flock));
800}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bedc66163176..00e80df9dd9d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -580,8 +580,8 @@ xfs_iformat_extents(
580 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 580 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
581 for (i = 0; i < nex; i++, dp++) { 581 for (i = 0; i < nex; i++, dp++) {
582 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 582 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
583 ep->l0 = be64_to_cpu(get_unaligned(&dp->l0)); 583 ep->l0 = get_unaligned_be64(&dp->l0);
584 ep->l1 = be64_to_cpu(get_unaligned(&dp->l1)); 584 ep->l1 = get_unaligned_be64(&dp->l1);
585 } 585 }
586 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 586 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
587 if (whichfork != XFS_DATA_FORK || 587 if (whichfork != XFS_DATA_FORK ||
@@ -835,22 +835,22 @@ xfs_iread(
835 * Do this before xfs_iformat in case it adds entries. 835 * Do this before xfs_iformat in case it adds entries.
836 */ 836 */
837#ifdef XFS_INODE_TRACE 837#ifdef XFS_INODE_TRACE
838 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); 838 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
839#endif 839#endif
840#ifdef XFS_BMAP_TRACE 840#ifdef XFS_BMAP_TRACE
841 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); 841 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
842#endif 842#endif
843#ifdef XFS_BMBT_TRACE 843#ifdef XFS_BMBT_TRACE
844 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); 844 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
845#endif 845#endif
846#ifdef XFS_RW_TRACE 846#ifdef XFS_RW_TRACE
847 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); 847 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
848#endif 848#endif
849#ifdef XFS_ILOCK_TRACE 849#ifdef XFS_ILOCK_TRACE
850 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); 850 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
851#endif 851#endif
852#ifdef XFS_DIR2_TRACE 852#ifdef XFS_DIR2_TRACE
853 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); 853 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
854#endif 854#endif
855 855
856 /* 856 /*
@@ -1046,9 +1046,9 @@ xfs_ialloc(
1046{ 1046{
1047 xfs_ino_t ino; 1047 xfs_ino_t ino;
1048 xfs_inode_t *ip; 1048 xfs_inode_t *ip;
1049 bhv_vnode_t *vp;
1050 uint flags; 1049 uint flags;
1051 int error; 1050 int error;
1051 timespec_t tv;
1052 1052
1053 /* 1053 /*
1054 * Call the space management code to pick 1054 * Call the space management code to pick
@@ -1077,13 +1077,12 @@ xfs_ialloc(
1077 } 1077 }
1078 ASSERT(ip != NULL); 1078 ASSERT(ip != NULL);
1079 1079
1080 vp = XFS_ITOV(ip);
1081 ip->i_d.di_mode = (__uint16_t)mode; 1080 ip->i_d.di_mode = (__uint16_t)mode;
1082 ip->i_d.di_onlink = 0; 1081 ip->i_d.di_onlink = 0;
1083 ip->i_d.di_nlink = nlink; 1082 ip->i_d.di_nlink = nlink;
1084 ASSERT(ip->i_d.di_nlink == nlink); 1083 ASSERT(ip->i_d.di_nlink == nlink);
1085 ip->i_d.di_uid = current_fsuid(cr); 1084 ip->i_d.di_uid = current_fsuid();
1086 ip->i_d.di_gid = current_fsgid(cr); 1085 ip->i_d.di_gid = current_fsgid();
1087 ip->i_d.di_projid = prid; 1086 ip->i_d.di_projid = prid;
1088 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1087 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
1089 1088
@@ -1130,7 +1129,13 @@ xfs_ialloc(
1130 ip->i_size = 0; 1129 ip->i_size = 0;
1131 ip->i_d.di_nextents = 0; 1130 ip->i_d.di_nextents = 0;
1132 ASSERT(ip->i_d.di_nblocks == 0); 1131 ASSERT(ip->i_d.di_nblocks == 0);
1133 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); 1132
1133 nanotime(&tv);
1134 ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
1135 ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
1136 ip->i_d.di_atime = ip->i_d.di_mtime;
1137 ip->i_d.di_ctime = ip->i_d.di_mtime;
1138
1134 /* 1139 /*
1135 * di_gen will have been taken care of in xfs_iread. 1140 * di_gen will have been taken care of in xfs_iread.
1136 */ 1141 */
@@ -1220,7 +1225,7 @@ xfs_ialloc(
1220 xfs_trans_log_inode(tp, ip, flags); 1225 xfs_trans_log_inode(tp, ip, flags);
1221 1226
1222 /* now that we have an i_mode we can setup inode ops and unlock */ 1227 /* now that we have an i_mode we can setup inode ops and unlock */
1223 xfs_initialize_vnode(tp->t_mountp, vp, ip); 1228 xfs_setup_inode(ip);
1224 1229
1225 *ipp = ip; 1230 *ipp = ip;
1226 return 0; 1231 return 0;
@@ -1399,7 +1404,6 @@ xfs_itruncate_start(
1399 xfs_fsize_t last_byte; 1404 xfs_fsize_t last_byte;
1400 xfs_off_t toss_start; 1405 xfs_off_t toss_start;
1401 xfs_mount_t *mp; 1406 xfs_mount_t *mp;
1402 bhv_vnode_t *vp;
1403 int error = 0; 1407 int error = 0;
1404 1408
1405 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1409 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
@@ -1408,7 +1412,6 @@ xfs_itruncate_start(
1408 (flags == XFS_ITRUNC_MAYBE)); 1412 (flags == XFS_ITRUNC_MAYBE));
1409 1413
1410 mp = ip->i_mount; 1414 mp = ip->i_mount;
1411 vp = XFS_ITOV(ip);
1412 1415
1413 /* wait for the completion of any pending DIOs */ 1416 /* wait for the completion of any pending DIOs */
1414 if (new_size < ip->i_size) 1417 if (new_size < ip->i_size)
@@ -1457,7 +1460,7 @@ xfs_itruncate_start(
1457 1460
1458#ifdef DEBUG 1461#ifdef DEBUG
1459 if (new_size == 0) { 1462 if (new_size == 0) {
1460 ASSERT(VN_CACHED(vp) == 0); 1463 ASSERT(VN_CACHED(VFS_I(ip)) == 0);
1461 } 1464 }
1462#endif 1465#endif
1463 return error; 1466 return error;
@@ -2630,7 +2633,6 @@ xfs_idestroy(
2630 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 2633 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
2631 mrfree(&ip->i_lock); 2634 mrfree(&ip->i_lock);
2632 mrfree(&ip->i_iolock); 2635 mrfree(&ip->i_iolock);
2633 freesema(&ip->i_flock);
2634 2636
2635#ifdef XFS_INODE_TRACE 2637#ifdef XFS_INODE_TRACE
2636 ktrace_free(ip->i_trace); 2638 ktrace_free(ip->i_trace);
@@ -3048,10 +3050,10 @@ cluster_corrupt_out:
3048/* 3050/*
3049 * xfs_iflush() will write a modified inode's changes out to the 3051 * xfs_iflush() will write a modified inode's changes out to the
3050 * inode's on disk home. The caller must have the inode lock held 3052 * inode's on disk home. The caller must have the inode lock held
3051 * in at least shared mode and the inode flush semaphore must be 3053 * in at least shared mode and the inode flush completion must be
3052 * held as well. The inode lock will still be held upon return from 3054 * active as well. The inode lock will still be held upon return from
3053 * the call and the caller is free to unlock it. 3055 * the call and the caller is free to unlock it.
3054 * The inode flush lock will be unlocked when the inode reaches the disk. 3056 * The inode flush will be completed when the inode reaches the disk.
3055 * The flags indicate how the inode's buffer should be written out. 3057 * The flags indicate how the inode's buffer should be written out.
3056 */ 3058 */
3057int 3059int
@@ -3070,7 +3072,7 @@ xfs_iflush(
3070 XFS_STATS_INC(xs_iflush_count); 3072 XFS_STATS_INC(xs_iflush_count);
3071 3073
3072 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3074 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3073 ASSERT(issemalocked(&(ip->i_flock))); 3075 ASSERT(!completion_done(&ip->i_flush));
3074 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3076 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3075 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3077 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3076 3078
@@ -3233,7 +3235,7 @@ xfs_iflush_int(
3233#endif 3235#endif
3234 3236
3235 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3237 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3236 ASSERT(issemalocked(&(ip->i_flock))); 3238 ASSERT(!completion_done(&ip->i_flush));
3237 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3239 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3238 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3240 ip->i_d.di_nextents > ip->i_df.if_ext_max);
3239 3241
@@ -3465,7 +3467,6 @@ xfs_iflush_all(
3465 xfs_mount_t *mp) 3467 xfs_mount_t *mp)
3466{ 3468{
3467 xfs_inode_t *ip; 3469 xfs_inode_t *ip;
3468 bhv_vnode_t *vp;
3469 3470
3470 again: 3471 again:
3471 XFS_MOUNT_ILOCK(mp); 3472 XFS_MOUNT_ILOCK(mp);
@@ -3480,14 +3481,13 @@ xfs_iflush_all(
3480 continue; 3481 continue;
3481 } 3482 }
3482 3483
3483 vp = XFS_ITOV_NULL(ip); 3484 if (!VFS_I(ip)) {
3484 if (!vp) {
3485 XFS_MOUNT_IUNLOCK(mp); 3485 XFS_MOUNT_IUNLOCK(mp);
3486 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); 3486 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
3487 goto again; 3487 goto again;
3488 } 3488 }
3489 3489
3490 ASSERT(vn_count(vp) == 0); 3490 ASSERT(vn_count(VFS_I(ip)) == 0);
3491 3491
3492 ip = ip->i_mnext; 3492 ip = ip->i_mnext;
3493 } while (ip != mp->m_inodes); 3493 } while (ip != mp->m_inodes);
@@ -3707,7 +3707,7 @@ xfs_iext_add_indirect_multi(
3707 * (all extents past */ 3707 * (all extents past */
3708 if (nex2) { 3708 if (nex2) {
3709 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3709 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
3710 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); 3710 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
3711 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 3711 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
3712 erp->er_extcount -= nex2; 3712 erp->er_extcount -= nex2;
3713 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 3713 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
@@ -4007,8 +4007,7 @@ xfs_iext_realloc_direct(
4007 ifp->if_u1.if_extents = 4007 ifp->if_u1.if_extents =
4008 kmem_realloc(ifp->if_u1.if_extents, 4008 kmem_realloc(ifp->if_u1.if_extents,
4009 rnew_size, 4009 rnew_size,
4010 ifp->if_real_bytes, 4010 ifp->if_real_bytes, KM_NOFS);
4011 KM_SLEEP);
4012 } 4011 }
4013 if (rnew_size > ifp->if_real_bytes) { 4012 if (rnew_size > ifp->if_real_bytes) {
4014 memset(&ifp->if_u1.if_extents[ifp->if_bytes / 4013 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
@@ -4067,7 +4066,7 @@ xfs_iext_inline_to_direct(
4067 xfs_ifork_t *ifp, /* inode fork pointer */ 4066 xfs_ifork_t *ifp, /* inode fork pointer */
4068 int new_size) /* number of extents in file */ 4067 int new_size) /* number of extents in file */
4069{ 4068{
4070 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP); 4069 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
4071 memset(ifp->if_u1.if_extents, 0, new_size); 4070 memset(ifp->if_u1.if_extents, 0, new_size);
4072 if (ifp->if_bytes) { 4071 if (ifp->if_bytes) {
4073 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 4072 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
@@ -4099,7 +4098,7 @@ xfs_iext_realloc_indirect(
4099 } else { 4098 } else {
4100 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 4099 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
4101 kmem_realloc(ifp->if_u1.if_ext_irec, 4100 kmem_realloc(ifp->if_u1.if_ext_irec,
4102 new_size, size, KM_SLEEP); 4101 new_size, size, KM_NOFS);
4103 } 4102 }
4104} 4103}
4105 4104
@@ -4341,11 +4340,10 @@ xfs_iext_irec_init(
4341 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4340 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4342 ASSERT(nextents <= XFS_LINEAR_EXTS); 4341 ASSERT(nextents <= XFS_LINEAR_EXTS);
4343 4342
4344 erp = (xfs_ext_irec_t *) 4343 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
4345 kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
4346 4344
4347 if (nextents == 0) { 4345 if (nextents == 0) {
4348 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4346 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
4349 } else if (!ifp->if_real_bytes) { 4347 } else if (!ifp->if_real_bytes) {
4350 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 4348 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
4351 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 4349 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
@@ -4393,7 +4391,7 @@ xfs_iext_irec_new(
4393 4391
4394 /* Initialize new extent record */ 4392 /* Initialize new extent record */
4395 erp = ifp->if_u1.if_ext_irec; 4393 erp = ifp->if_u1.if_ext_irec;
4396 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4394 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
4397 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4395 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4398 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 4396 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
4399 erp[erp_idx].er_extcount = 0; 4397 erp[erp_idx].er_extcount = 0;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 17a04b6321ed..1420c49674d7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -87,8 +87,7 @@ typedef struct xfs_ifork {
87 * Flags for xfs_ichgtime(). 87 * Flags for xfs_ichgtime().
88 */ 88 */
89#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 89#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
90#define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */ 90#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
91#define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */
92 91
93/* 92/*
94 * Per-fork incore inode flags. 93 * Per-fork incore inode flags.
@@ -204,7 +203,7 @@ typedef struct xfs_inode {
204 struct xfs_inode *i_mprev; /* ptr to prev inode */ 203 struct xfs_inode *i_mprev; /* ptr to prev inode */
205 struct xfs_mount *i_mount; /* fs mount struct ptr */ 204 struct xfs_mount *i_mount; /* fs mount struct ptr */
206 struct list_head i_reclaim; /* reclaim list */ 205 struct list_head i_reclaim; /* reclaim list */
207 bhv_vnode_t *i_vnode; /* vnode backpointer */ 206 struct inode *i_vnode; /* vnode backpointer */
208 struct xfs_dquot *i_udquot; /* user dquot */ 207 struct xfs_dquot *i_udquot; /* user dquot */
209 struct xfs_dquot *i_gdquot; /* group dquot */ 208 struct xfs_dquot *i_gdquot; /* group dquot */
210 209
@@ -223,7 +222,7 @@ typedef struct xfs_inode {
223 struct xfs_inode_log_item *i_itemp; /* logging information */ 222 struct xfs_inode_log_item *i_itemp; /* logging information */
224 mrlock_t i_lock; /* inode lock */ 223 mrlock_t i_lock; /* inode lock */
225 mrlock_t i_iolock; /* inode IO lock */ 224 mrlock_t i_iolock; /* inode IO lock */
226 sema_t i_flock; /* inode flush lock */ 225 struct completion i_flush; /* inode flush completion q */
227 atomic_t i_pincount; /* inode pin count */ 226 atomic_t i_pincount; /* inode pin count */
228 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 227 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
229 spinlock_t i_flags_lock; /* inode i_flags lock */ 228 spinlock_t i_flags_lock; /* inode i_flags lock */
@@ -263,6 +262,18 @@ typedef struct xfs_inode {
263#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ 262#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
264 (ip)->i_size : (ip)->i_d.di_size; 263 (ip)->i_size : (ip)->i_d.di_size;
265 264
265/* Convert from vfs inode to xfs inode */
266static inline struct xfs_inode *XFS_I(struct inode *inode)
267{
268 return (struct xfs_inode *)inode->i_private;
269}
270
271/* convert from xfs inode to vfs inode */
272static inline struct inode *VFS_I(struct xfs_inode *ip)
273{
274 return (struct inode *)ip->i_vnode;
275}
276
266/* 277/*
267 * i_flags helper functions 278 * i_flags helper functions
268 */ 279 */
@@ -439,9 +450,6 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
439#define XFS_ITRUNC_DEFINITE 0x1 450#define XFS_ITRUNC_DEFINITE 0x1
440#define XFS_ITRUNC_MAYBE 0x2 451#define XFS_ITRUNC_MAYBE 0x2
441 452
442#define XFS_ITOV(ip) ((ip)->i_vnode)
443#define XFS_ITOV_NULL(ip) ((ip)->i_vnode)
444
445/* 453/*
446 * For multiple groups support: if S_ISGID bit is set in the parent 454 * For multiple groups support: if S_ISGID bit is set in the parent
447 * directory, group of new file is set to that of the parent, and 455 * directory, group of new file is set to that of the parent, and
@@ -473,11 +481,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint);
473void xfs_iunlock(xfs_inode_t *, uint); 481void xfs_iunlock(xfs_inode_t *, uint);
474void xfs_ilock_demote(xfs_inode_t *, uint); 482void xfs_ilock_demote(xfs_inode_t *, uint);
475int xfs_isilocked(xfs_inode_t *, uint); 483int xfs_isilocked(xfs_inode_t *, uint);
476void xfs_iflock(xfs_inode_t *);
477int xfs_iflock_nowait(xfs_inode_t *);
478uint xfs_ilock_map_shared(xfs_inode_t *); 484uint xfs_ilock_map_shared(xfs_inode_t *);
479void xfs_iunlock_map_shared(xfs_inode_t *, uint); 485void xfs_iunlock_map_shared(xfs_inode_t *, uint);
480void xfs_ifunlock(xfs_inode_t *);
481void xfs_ireclaim(xfs_inode_t *); 486void xfs_ireclaim(xfs_inode_t *);
482int xfs_finish_reclaim(xfs_inode_t *, int, int); 487int xfs_finish_reclaim(xfs_inode_t *, int, int);
483int xfs_finish_reclaim_all(struct xfs_mount *, int); 488int xfs_finish_reclaim_all(struct xfs_mount *, int);
@@ -522,6 +527,7 @@ void xfs_iflush_all(struct xfs_mount *);
522void xfs_ichgtime(xfs_inode_t *, int); 527void xfs_ichgtime(xfs_inode_t *, int);
523xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 528xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
524void xfs_lock_inodes(xfs_inode_t **, int, uint); 529void xfs_lock_inodes(xfs_inode_t **, int, uint);
530void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
525 531
526void xfs_synchronize_atime(xfs_inode_t *); 532void xfs_synchronize_atime(xfs_inode_t *);
527void xfs_mark_inode_dirty_sync(xfs_inode_t *); 533void xfs_mark_inode_dirty_sync(xfs_inode_t *);
@@ -570,6 +576,26 @@ extern struct kmem_zone *xfs_ifork_zone;
570extern struct kmem_zone *xfs_inode_zone; 576extern struct kmem_zone *xfs_inode_zone;
571extern struct kmem_zone *xfs_ili_zone; 577extern struct kmem_zone *xfs_ili_zone;
572 578
579/*
580 * Manage the i_flush queue embedded in the inode. This completion
581 * queue synchronizes processes attempting to flush the in-core
582 * inode back to disk.
583 */
584static inline void xfs_iflock(xfs_inode_t *ip)
585{
586 wait_for_completion(&ip->i_flush);
587}
588
589static inline int xfs_iflock_nowait(xfs_inode_t *ip)
590{
591 return try_wait_for_completion(&ip->i_flush);
592}
593
594static inline void xfs_ifunlock(xfs_inode_t *ip)
595{
596 complete(&ip->i_flush);
597}
598
573#endif /* __KERNEL__ */ 599#endif /* __KERNEL__ */
574 600
575#endif /* __XFS_INODE_H__ */ 601#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 0eee08a32c26..97c7452e2620 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -779,11 +779,10 @@ xfs_inode_item_pushbuf(
779 ASSERT(iip->ili_push_owner == current_pid()); 779 ASSERT(iip->ili_push_owner == current_pid());
780 780
781 /* 781 /*
782 * If flushlock isn't locked anymore, chances are that the 782 * If a flush is not in progress anymore, chances are that the
783 * inode flush completed and the inode was taken off the AIL. 783 * inode was taken off the AIL. So, just get out.
784 * So, just get out.
785 */ 784 */
786 if (!issemalocked(&(ip->i_flock)) || 785 if (completion_done(&ip->i_flush) ||
787 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 786 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
788 iip->ili_pushbuf_flag = 0; 787 iip->ili_pushbuf_flag = 0;
789 xfs_iunlock(ip, XFS_ILOCK_SHARED); 788 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -805,7 +804,7 @@ xfs_inode_item_pushbuf(
805 * If not, we can flush it async. 804 * If not, we can flush it async.
806 */ 805 */
807 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && 806 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
808 issemalocked(&(ip->i_flock))); 807 !completion_done(&ip->i_flush));
809 iip->ili_pushbuf_flag = 0; 808 iip->ili_pushbuf_flag = 0;
810 xfs_iunlock(ip, XFS_ILOCK_SHARED); 809 xfs_iunlock(ip, XFS_ILOCK_SHARED);
811 xfs_buftrace("INODE ITEM PUSH", bp); 810 xfs_buftrace("INODE ITEM PUSH", bp);
@@ -858,7 +857,7 @@ xfs_inode_item_push(
858 ip = iip->ili_inode; 857 ip = iip->ili_inode;
859 858
860 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 859 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
861 ASSERT(issemalocked(&(ip->i_flock))); 860 ASSERT(!completion_done(&ip->i_flush));
862 /* 861 /*
863 * Since we were able to lock the inode's flush lock and 862 * Since we were able to lock the inode's flush lock and
864 * we found it on the AIL, the inode must be dirty. This 863 * we found it on the AIL, the inode must be dirty. This
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 9a3ef9dcaeb9..cf6754a3c5b3 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -59,7 +59,6 @@ xfs_bulkstat_one_iget(
59{ 59{
60 xfs_icdinode_t *dic; /* dinode core info pointer */ 60 xfs_icdinode_t *dic; /* dinode core info pointer */
61 xfs_inode_t *ip; /* incore inode pointer */ 61 xfs_inode_t *ip; /* incore inode pointer */
62 bhv_vnode_t *vp;
63 int error; 62 int error;
64 63
65 error = xfs_iget(mp, NULL, ino, 64 error = xfs_iget(mp, NULL, ino,
@@ -72,7 +71,6 @@ xfs_bulkstat_one_iget(
72 ASSERT(ip != NULL); 71 ASSERT(ip != NULL);
73 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 72 ASSERT(ip->i_blkno != (xfs_daddr_t)0);
74 73
75 vp = XFS_ITOV(ip);
76 dic = &ip->i_d; 74 dic = &ip->i_d;
77 75
78 /* xfs_iget returns the following without needing 76 /* xfs_iget returns the following without needing
@@ -85,7 +83,7 @@ xfs_bulkstat_one_iget(
85 buf->bs_uid = dic->di_uid; 83 buf->bs_uid = dic->di_uid;
86 buf->bs_gid = dic->di_gid; 84 buf->bs_gid = dic->di_gid;
87 buf->bs_size = dic->di_size; 85 buf->bs_size = dic->di_size;
88 vn_atime_to_bstime(vp, &buf->bs_atime); 86 vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime);
89 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 87 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
90 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 88 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
91 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 89 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 91b00a5686cd..ccba14eb9dbe 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -160,7 +160,7 @@ void
160xlog_trace_iclog(xlog_in_core_t *iclog, uint state) 160xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
161{ 161{
162 if (!iclog->ic_trace) 162 if (!iclog->ic_trace)
163 iclog->ic_trace = ktrace_alloc(256, KM_SLEEP); 163 iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
164 ktrace_enter(iclog->ic_trace, 164 ktrace_enter(iclog->ic_trace,
165 (void *)((unsigned long)state), 165 (void *)((unsigned long)state),
166 (void *)((unsigned long)current_pid()), 166 (void *)((unsigned long)current_pid()),
@@ -336,15 +336,12 @@ xfs_log_done(xfs_mount_t *mp,
336 } else { 336 } else {
337 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 337 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
338 xlog_regrant_reserve_log_space(log, ticket); 338 xlog_regrant_reserve_log_space(log, ticket);
339 } 339 /* If this ticket was a permanent reservation and we aren't
340 340 * trying to release it, reset the inited flags; so next time
341 /* If this ticket was a permanent reservation and we aren't 341 * we write, a start record will be written out.
342 * trying to release it, reset the inited flags; so next time 342 */
343 * we write, a start record will be written out.
344 */
345 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) &&
346 (flags & XFS_LOG_REL_PERM_RESERV) == 0)
347 ticket->t_flags |= XLOG_TIC_INITED; 343 ticket->t_flags |= XLOG_TIC_INITED;
344 }
348 345
349 return lsn; 346 return lsn;
350} /* xfs_log_done */ 347} /* xfs_log_done */
@@ -357,11 +354,11 @@ xfs_log_done(xfs_mount_t *mp,
357 * Asynchronous forces are implemented by setting the WANT_SYNC 354 * Asynchronous forces are implemented by setting the WANT_SYNC
358 * bit in the appropriate in-core log and then returning. 355 * bit in the appropriate in-core log and then returning.
359 * 356 *
360 * Synchronous forces are implemented with a semaphore. All callers 357 * Synchronous forces are implemented with a signal variable. All callers
361 * to force a given lsn to disk will wait on a semaphore attached to the 358 * to force a given lsn to disk will wait on a the sv attached to the
362 * specific in-core log. When given in-core log finally completes its 359 * specific in-core log. When given in-core log finally completes its
363 * write to disk, that thread will wake up all threads waiting on the 360 * write to disk, that thread will wake up all threads waiting on the
364 * semaphore. 361 * sv.
365 */ 362 */
366int 363int
367_xfs_log_force( 364_xfs_log_force(
@@ -588,12 +585,12 @@ error:
588 * mp - ubiquitous xfs mount point structure 585 * mp - ubiquitous xfs mount point structure
589 */ 586 */
590int 587int
591xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) 588xfs_log_mount_finish(xfs_mount_t *mp)
592{ 589{
593 int error; 590 int error;
594 591
595 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 592 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
596 error = xlog_recover_finish(mp->m_log, mfsi_flags); 593 error = xlog_recover_finish(mp->m_log);
597 else { 594 else {
598 error = 0; 595 error = 0;
599 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 596 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
@@ -707,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
707 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 704 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
708 iclog->ic_state == XLOG_STATE_DIRTY)) { 705 iclog->ic_state == XLOG_STATE_DIRTY)) {
709 if (!XLOG_FORCED_SHUTDOWN(log)) { 706 if (!XLOG_FORCED_SHUTDOWN(log)) {
710 sv_wait(&iclog->ic_forcesema, PMEM, 707 sv_wait(&iclog->ic_force_wait, PMEM,
711 &log->l_icloglock, s); 708 &log->l_icloglock, s);
712 } else { 709 } else {
713 spin_unlock(&log->l_icloglock); 710 spin_unlock(&log->l_icloglock);
@@ -748,7 +745,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
748 || iclog->ic_state == XLOG_STATE_DIRTY 745 || iclog->ic_state == XLOG_STATE_DIRTY
749 || iclog->ic_state == XLOG_STATE_IOERROR) ) { 746 || iclog->ic_state == XLOG_STATE_IOERROR) ) {
750 747
751 sv_wait(&iclog->ic_forcesema, PMEM, 748 sv_wait(&iclog->ic_force_wait, PMEM,
752 &log->l_icloglock, s); 749 &log->l_icloglock, s);
753 } else { 750 } else {
754 spin_unlock(&log->l_icloglock); 751 spin_unlock(&log->l_icloglock);
@@ -838,7 +835,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
838 break; 835 break;
839 tail_lsn = 0; 836 tail_lsn = 0;
840 free_bytes -= tic->t_unit_res; 837 free_bytes -= tic->t_unit_res;
841 sv_signal(&tic->t_sema); 838 sv_signal(&tic->t_wait);
842 tic = tic->t_next; 839 tic = tic->t_next;
843 } while (tic != log->l_write_headq); 840 } while (tic != log->l_write_headq);
844 } 841 }
@@ -859,7 +856,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
859 break; 856 break;
860 tail_lsn = 0; 857 tail_lsn = 0;
861 free_bytes -= need_bytes; 858 free_bytes -= need_bytes;
862 sv_signal(&tic->t_sema); 859 sv_signal(&tic->t_wait);
863 tic = tic->t_next; 860 tic = tic->t_next;
864 } while (tic != log->l_reserve_headq); 861 } while (tic != log->l_reserve_headq);
865 } 862 }
@@ -1285,8 +1282,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1285 1282
1286 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1283 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1287 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1284 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1288 sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); 1285 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1289 sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); 1286 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1290 1287
1291 iclogp = &iclog->ic_next; 1288 iclogp = &iclog->ic_next;
1292 } 1289 }
@@ -1565,8 +1562,8 @@ xlog_dealloc_log(xlog_t *log)
1565 1562
1566 iclog = log->l_iclog; 1563 iclog = log->l_iclog;
1567 for (i=0; i<log->l_iclog_bufs; i++) { 1564 for (i=0; i<log->l_iclog_bufs; i++) {
1568 sv_destroy(&iclog->ic_forcesema); 1565 sv_destroy(&iclog->ic_force_wait);
1569 sv_destroy(&iclog->ic_writesema); 1566 sv_destroy(&iclog->ic_write_wait);
1570 xfs_buf_free(iclog->ic_bp); 1567 xfs_buf_free(iclog->ic_bp);
1571#ifdef XFS_LOG_TRACE 1568#ifdef XFS_LOG_TRACE
1572 if (iclog->ic_trace != NULL) { 1569 if (iclog->ic_trace != NULL) {
@@ -1976,7 +1973,7 @@ xlog_write(xfs_mount_t * mp,
1976/* Clean iclogs starting from the head. This ordering must be 1973/* Clean iclogs starting from the head. This ordering must be
1977 * maintained, so an iclog doesn't become ACTIVE beyond one that 1974 * maintained, so an iclog doesn't become ACTIVE beyond one that
1978 * is SYNCING. This is also required to maintain the notion that we use 1975 * is SYNCING. This is also required to maintain the notion that we use
1979 * a counting semaphore to hold off would be writers to the log when every 1976 * a ordered wait queue to hold off would be writers to the log when every
1980 * iclog is trying to sync to disk. 1977 * iclog is trying to sync to disk.
1981 * 1978 *
1982 * State Change: DIRTY -> ACTIVE 1979 * State Change: DIRTY -> ACTIVE
@@ -2240,7 +2237,7 @@ xlog_state_do_callback(
2240 xlog_state_clean_log(log); 2237 xlog_state_clean_log(log);
2241 2238
2242 /* wake up threads waiting in xfs_log_force() */ 2239 /* wake up threads waiting in xfs_log_force() */
2243 sv_broadcast(&iclog->ic_forcesema); 2240 sv_broadcast(&iclog->ic_force_wait);
2244 2241
2245 iclog = iclog->ic_next; 2242 iclog = iclog->ic_next;
2246 } while (first_iclog != iclog); 2243 } while (first_iclog != iclog);
@@ -2302,8 +2299,7 @@ xlog_state_do_callback(
2302 * the second completion goes through. 2299 * the second completion goes through.
2303 * 2300 *
2304 * Callbacks could take time, so they are done outside the scope of the 2301 * Callbacks could take time, so they are done outside the scope of the
2305 * global state machine log lock. Assume that the calls to cvsema won't 2302 * global state machine log lock.
2306 * take a long time. At least we know it won't sleep.
2307 */ 2303 */
2308STATIC void 2304STATIC void
2309xlog_state_done_syncing( 2305xlog_state_done_syncing(
@@ -2339,7 +2335,7 @@ xlog_state_done_syncing(
2339 * iclog buffer, we wake them all, one will get to do the 2335 * iclog buffer, we wake them all, one will get to do the
2340 * I/O, the others get to wait for the result. 2336 * I/O, the others get to wait for the result.
2341 */ 2337 */
2342 sv_broadcast(&iclog->ic_writesema); 2338 sv_broadcast(&iclog->ic_write_wait);
2343 spin_unlock(&log->l_icloglock); 2339 spin_unlock(&log->l_icloglock);
2344 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ 2340 xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2345} /* xlog_state_done_syncing */ 2341} /* xlog_state_done_syncing */
@@ -2347,11 +2343,9 @@ xlog_state_done_syncing(
2347 2343
2348/* 2344/*
2349 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must 2345 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
2350 * sleep. The flush semaphore is set to the number of in-core buffers and 2346 * sleep. We wait on the flush queue on the head iclog as that should be
2351 * decremented around disk syncing. Therefore, if all buffers are syncing, 2347 * the first iclog to complete flushing. Hence if all iclogs are syncing,
2352 * this semaphore will cause new writes to sleep until a sync completes. 2348 * we will wait here and all new writes will sleep until a sync completes.
2353 * Otherwise, this code just does p() followed by v(). This approximates
2354 * a sleep/wakeup except we can't race.
2355 * 2349 *
2356 * The in-core logs are used in a circular fashion. They are not used 2350 * The in-core logs are used in a circular fashion. They are not used
2357 * out-of-order even when an iclog past the head is free. 2351 * out-of-order even when an iclog past the head is free.
@@ -2508,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log,
2508 goto error_return; 2502 goto error_return;
2509 2503
2510 XFS_STATS_INC(xs_sleep_logspace); 2504 XFS_STATS_INC(xs_sleep_logspace);
2511 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2505 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2512 /* 2506 /*
2513 * If we got an error, and the filesystem is shutting down, 2507 * If we got an error, and the filesystem is shutting down,
2514 * we'll catch it down below. So just continue... 2508 * we'll catch it down below. So just continue...
@@ -2534,7 +2528,7 @@ redo:
2534 xlog_trace_loggrant(log, tic, 2528 xlog_trace_loggrant(log, tic,
2535 "xlog_grant_log_space: sleep 2"); 2529 "xlog_grant_log_space: sleep 2");
2536 XFS_STATS_INC(xs_sleep_logspace); 2530 XFS_STATS_INC(xs_sleep_logspace);
2537 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2531 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2538 2532
2539 if (XLOG_FORCED_SHUTDOWN(log)) { 2533 if (XLOG_FORCED_SHUTDOWN(log)) {
2540 spin_lock(&log->l_grant_lock); 2534 spin_lock(&log->l_grant_lock);
@@ -2633,7 +2627,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2633 if (free_bytes < ntic->t_unit_res) 2627 if (free_bytes < ntic->t_unit_res)
2634 break; 2628 break;
2635 free_bytes -= ntic->t_unit_res; 2629 free_bytes -= ntic->t_unit_res;
2636 sv_signal(&ntic->t_sema); 2630 sv_signal(&ntic->t_wait);
2637 ntic = ntic->t_next; 2631 ntic = ntic->t_next;
2638 } while (ntic != log->l_write_headq); 2632 } while (ntic != log->l_write_headq);
2639 2633
@@ -2644,7 +2638,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2644 xlog_trace_loggrant(log, tic, 2638 xlog_trace_loggrant(log, tic,
2645 "xlog_regrant_write_log_space: sleep 1"); 2639 "xlog_regrant_write_log_space: sleep 1");
2646 XFS_STATS_INC(xs_sleep_logspace); 2640 XFS_STATS_INC(xs_sleep_logspace);
2647 sv_wait(&tic->t_sema, PINOD|PLTWAIT, 2641 sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2648 &log->l_grant_lock, s); 2642 &log->l_grant_lock, s);
2649 2643
2650 /* If we're shutting down, this tic is already 2644 /* If we're shutting down, this tic is already
@@ -2673,7 +2667,7 @@ redo:
2673 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2667 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2674 xlog_ins_ticketq(&log->l_write_headq, tic); 2668 xlog_ins_ticketq(&log->l_write_headq, tic);
2675 XFS_STATS_INC(xs_sleep_logspace); 2669 XFS_STATS_INC(xs_sleep_logspace);
2676 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2670 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2677 2671
2678 /* If we're shutting down, this tic is already off the queue */ 2672 /* If we're shutting down, this tic is already off the queue */
2679 if (XLOG_FORCED_SHUTDOWN(log)) { 2673 if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2916,7 +2910,7 @@ xlog_state_switch_iclogs(xlog_t *log,
2916 * 2. the current iclog is drity, and the previous iclog is in the 2910 * 2. the current iclog is drity, and the previous iclog is in the
2917 * active or dirty state. 2911 * active or dirty state.
2918 * 2912 *
2919 * We may sleep (call psema) if: 2913 * We may sleep if:
2920 * 2914 *
2921 * 1. the current iclog is not in the active nor dirty state. 2915 * 1. the current iclog is not in the active nor dirty state.
2922 * 2. the current iclog dirty, and the previous iclog is not in the 2916 * 2. the current iclog dirty, and the previous iclog is not in the
@@ -3013,7 +3007,7 @@ maybe_sleep:
3013 return XFS_ERROR(EIO); 3007 return XFS_ERROR(EIO);
3014 } 3008 }
3015 XFS_STATS_INC(xs_log_force_sleep); 3009 XFS_STATS_INC(xs_log_force_sleep);
3016 sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); 3010 sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
3017 /* 3011 /*
3018 * No need to grab the log lock here since we're 3012 * No need to grab the log lock here since we're
3019 * only deciding whether or not to return EIO 3013 * only deciding whether or not to return EIO
@@ -3096,7 +3090,7 @@ try_again:
3096 XLOG_STATE_SYNCING))) { 3090 XLOG_STATE_SYNCING))) {
3097 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3091 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3098 XFS_STATS_INC(xs_log_force_sleep); 3092 XFS_STATS_INC(xs_log_force_sleep);
3099 sv_wait(&iclog->ic_prev->ic_writesema, PSWP, 3093 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
3100 &log->l_icloglock, s); 3094 &log->l_icloglock, s);
3101 *log_flushed = 1; 3095 *log_flushed = 1;
3102 already_slept = 1; 3096 already_slept = 1;
@@ -3116,7 +3110,7 @@ try_again:
3116 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3110 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3117 3111
3118 /* 3112 /*
3119 * Don't wait on the forcesema if we know that we've 3113 * Don't wait on completion if we know that we've
3120 * gotten a log write error. 3114 * gotten a log write error.
3121 */ 3115 */
3122 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3116 if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3124,7 +3118,7 @@ try_again:
3124 return XFS_ERROR(EIO); 3118 return XFS_ERROR(EIO);
3125 } 3119 }
3126 XFS_STATS_INC(xs_log_force_sleep); 3120 XFS_STATS_INC(xs_log_force_sleep);
3127 sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); 3121 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3128 /* 3122 /*
3129 * No need to grab the log lock here since we're 3123 * No need to grab the log lock here since we're
3130 * only deciding whether or not to return EIO 3124 * only deciding whether or not to return EIO
@@ -3180,7 +3174,7 @@ STATIC void
3180xlog_ticket_put(xlog_t *log, 3174xlog_ticket_put(xlog_t *log,
3181 xlog_ticket_t *ticket) 3175 xlog_ticket_t *ticket)
3182{ 3176{
3183 sv_destroy(&ticket->t_sema); 3177 sv_destroy(&ticket->t_wait);
3184 kmem_zone_free(xfs_log_ticket_zone, ticket); 3178 kmem_zone_free(xfs_log_ticket_zone, ticket);
3185} /* xlog_ticket_put */ 3179} /* xlog_ticket_put */
3186 3180
@@ -3270,7 +3264,7 @@ xlog_ticket_get(xlog_t *log,
3270 tic->t_trans_type = 0; 3264 tic->t_trans_type = 0;
3271 if (xflags & XFS_LOG_PERM_RESERV) 3265 if (xflags & XFS_LOG_PERM_RESERV)
3272 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3266 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3273 sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); 3267 sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
3274 3268
3275 xlog_tic_reset_res(tic); 3269 xlog_tic_reset_res(tic);
3276 3270
@@ -3557,14 +3551,14 @@ xfs_log_force_umount(
3557 */ 3551 */
3558 if ((tic = log->l_reserve_headq)) { 3552 if ((tic = log->l_reserve_headq)) {
3559 do { 3553 do {
3560 sv_signal(&tic->t_sema); 3554 sv_signal(&tic->t_wait);
3561 tic = tic->t_next; 3555 tic = tic->t_next;
3562 } while (tic != log->l_reserve_headq); 3556 } while (tic != log->l_reserve_headq);
3563 } 3557 }
3564 3558
3565 if ((tic = log->l_write_headq)) { 3559 if ((tic = log->l_write_headq)) {
3566 do { 3560 do {
3567 sv_signal(&tic->t_sema); 3561 sv_signal(&tic->t_wait);
3568 tic = tic->t_next; 3562 tic = tic->t_next;
3569 } while (tic != log->l_write_headq); 3563 } while (tic != log->l_write_headq);
3570 } 3564 }
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d1d678ecb63e..d47b91f10822 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -149,7 +149,7 @@ int xfs_log_mount(struct xfs_mount *mp,
149 struct xfs_buftarg *log_target, 149 struct xfs_buftarg *log_target,
150 xfs_daddr_t start_block, 150 xfs_daddr_t start_block,
151 int num_bblocks); 151 int num_bblocks);
152int xfs_log_mount_finish(struct xfs_mount *mp, int); 152int xfs_log_mount_finish(struct xfs_mount *mp);
153void xfs_log_move_tail(struct xfs_mount *mp, 153void xfs_log_move_tail(struct xfs_mount *mp,
154 xfs_lsn_t tail_lsn); 154 xfs_lsn_t tail_lsn);
155int xfs_log_notify(struct xfs_mount *mp, 155int xfs_log_notify(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 6245913196b4..c8a5b22ee3e3 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -241,7 +241,7 @@ typedef struct xlog_res {
241} xlog_res_t; 241} xlog_res_t;
242 242
243typedef struct xlog_ticket { 243typedef struct xlog_ticket {
244 sv_t t_sema; /* sleep on this semaphore : 20 */ 244 sv_t t_wait; /* ticket wait queue : 20 */
245 struct xlog_ticket *t_next; /* :4|8 */ 245 struct xlog_ticket *t_next; /* :4|8 */
246 struct xlog_ticket *t_prev; /* :4|8 */ 246 struct xlog_ticket *t_prev; /* :4|8 */
247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 247 xlog_tid_t t_tid; /* transaction identifier : 4 */
@@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header {
314 * xlog_rec_header_t into the reserved space. 314 * xlog_rec_header_t into the reserved space.
315 * - ic_data follows, so a write to disk can start at the beginning of 315 * - ic_data follows, so a write to disk can start at the beginning of
316 * the iclog. 316 * the iclog.
317 * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. 317 * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
318 * - ic_next is the pointer to the next iclog in the ring. 318 * - ic_next is the pointer to the next iclog in the ring.
319 * - ic_bp is a pointer to the buffer used to write this incore log to disk. 319 * - ic_bp is a pointer to the buffer used to write this incore log to disk.
320 * - ic_log is a pointer back to the global log structure. 320 * - ic_log is a pointer back to the global log structure.
@@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header {
339 * and move everything else out to subsequent cachelines. 339 * and move everything else out to subsequent cachelines.
340 */ 340 */
341typedef struct xlog_iclog_fields { 341typedef struct xlog_iclog_fields {
342 sv_t ic_forcesema; 342 sv_t ic_force_wait;
343 sv_t ic_writesema; 343 sv_t ic_write_wait;
344 struct xlog_in_core *ic_next; 344 struct xlog_in_core *ic_next;
345 struct xlog_in_core *ic_prev; 345 struct xlog_in_core *ic_prev;
346 struct xfs_buf *ic_bp; 346 struct xfs_buf *ic_bp;
@@ -377,8 +377,8 @@ typedef struct xlog_in_core {
377/* 377/*
378 * Defines to save our code from this glop. 378 * Defines to save our code from this glop.
379 */ 379 */
380#define ic_forcesema hic_fields.ic_forcesema 380#define ic_force_wait hic_fields.ic_force_wait
381#define ic_writesema hic_fields.ic_writesema 381#define ic_write_wait hic_fields.ic_write_wait
382#define ic_next hic_fields.ic_next 382#define ic_next hic_fields.ic_next
383#define ic_prev hic_fields.ic_prev 383#define ic_prev hic_fields.ic_prev
384#define ic_bp hic_fields.ic_bp 384#define ic_bp hic_fields.ic_bp
@@ -468,7 +468,7 @@ extern int xlog_find_tail(xlog_t *log,
468 xfs_daddr_t *head_blk, 468 xfs_daddr_t *head_blk,
469 xfs_daddr_t *tail_blk); 469 xfs_daddr_t *tail_blk);
470extern int xlog_recover(xlog_t *log); 470extern int xlog_recover(xlog_t *log);
471extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); 471extern int xlog_recover_finish(xlog_t *log);
472extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 472extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
473extern void xlog_recover_process_iunlinks(xlog_t *log); 473extern void xlog_recover_process_iunlinks(xlog_t *log);
474 474
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9eb722ec744e..82d46ce69d5f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3940,8 +3940,7 @@ xlog_recover(
3940 */ 3940 */
3941int 3941int
3942xlog_recover_finish( 3942xlog_recover_finish(
3943 xlog_t *log, 3943 xlog_t *log)
3944 int mfsi_flags)
3945{ 3944{
3946 /* 3945 /*
3947 * Now we're ready to do the transactions needed for the 3946 * Now we're ready to do the transactions needed for the
@@ -3969,9 +3968,7 @@ xlog_recover_finish(
3969 xfs_log_force(log->l_mp, (xfs_lsn_t)0, 3968 xfs_log_force(log->l_mp, (xfs_lsn_t)0,
3970 (XFS_LOG_FORCE | XFS_LOG_SYNC)); 3969 (XFS_LOG_FORCE | XFS_LOG_SYNC));
3971 3970
3972 if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { 3971 xlog_recover_process_iunlinks(log);
3973 xlog_recover_process_iunlinks(log);
3974 }
3975 3972
3976 xlog_recover_check_summary(log); 3973 xlog_recover_check_summary(log);
3977 3974
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6c5d1325e7f6..a4503f5e9497 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -128,7 +128,7 @@ static const struct {
128 * initialized. 128 * initialized.
129 */ 129 */
130STATIC void 130STATIC void
131xfs_mount_free( 131xfs_free_perag(
132 xfs_mount_t *mp) 132 xfs_mount_t *mp)
133{ 133{
134 if (mp->m_perag) { 134 if (mp->m_perag) {
@@ -139,20 +139,6 @@ xfs_mount_free(
139 kmem_free(mp->m_perag[agno].pagb_list); 139 kmem_free(mp->m_perag[agno].pagb_list);
140 kmem_free(mp->m_perag); 140 kmem_free(mp->m_perag);
141 } 141 }
142
143 spinlock_destroy(&mp->m_ail_lock);
144 spinlock_destroy(&mp->m_sb_lock);
145 mutex_destroy(&mp->m_ilock);
146 mutex_destroy(&mp->m_growlock);
147 if (mp->m_quotainfo)
148 XFS_QM_DONE(mp);
149
150 if (mp->m_fsname != NULL)
151 kmem_free(mp->m_fsname);
152 if (mp->m_rtname != NULL)
153 kmem_free(mp->m_rtname);
154 if (mp->m_logname != NULL)
155 kmem_free(mp->m_logname);
156} 142}
157 143
158/* 144/*
@@ -704,11 +690,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
704 * Update alignment values based on mount options and sb values 690 * Update alignment values based on mount options and sb values
705 */ 691 */
706STATIC int 692STATIC int
707xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) 693xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags)
708{ 694{
709 xfs_sb_t *sbp = &(mp->m_sb); 695 xfs_sb_t *sbp = &(mp->m_sb);
710 696
711 if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 697 if (mp->m_dalign) {
712 /* 698 /*
713 * If stripe unit and stripe width are not multiples 699 * If stripe unit and stripe width are not multiples
714 * of the fs blocksize turn off alignment. 700 * of the fs blocksize turn off alignment.
@@ -864,7 +850,7 @@ xfs_set_inoalignment(xfs_mount_t *mp)
864 * Check that the data (and log if separate) are an ok size. 850 * Check that the data (and log if separate) are an ok size.
865 */ 851 */
866STATIC int 852STATIC int
867xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) 853xfs_check_sizes(xfs_mount_t *mp)
868{ 854{
869 xfs_buf_t *bp; 855 xfs_buf_t *bp;
870 xfs_daddr_t d; 856 xfs_daddr_t d;
@@ -887,8 +873,7 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
887 return error; 873 return error;
888 } 874 }
889 875
890 if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 876 if (mp->m_logdev_targp != mp->m_ddev_targp) {
891 mp->m_logdev_targp != mp->m_ddev_targp) {
892 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 877 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
893 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 878 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
894 cmn_err(CE_WARN, "XFS: size check 3 failed"); 879 cmn_err(CE_WARN, "XFS: size check 3 failed");
@@ -923,15 +908,13 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
923 */ 908 */
924int 909int
925xfs_mountfs( 910xfs_mountfs(
926 xfs_mount_t *mp, 911 xfs_mount_t *mp)
927 int mfsi_flags)
928{ 912{
929 xfs_sb_t *sbp = &(mp->m_sb); 913 xfs_sb_t *sbp = &(mp->m_sb);
930 xfs_inode_t *rip; 914 xfs_inode_t *rip;
931 __uint64_t resblks; 915 __uint64_t resblks;
932 __int64_t update_flags = 0LL; 916 __int64_t update_flags = 0LL;
933 uint quotamount, quotaflags; 917 uint quotamount, quotaflags;
934 int agno;
935 int uuid_mounted = 0; 918 int uuid_mounted = 0;
936 int error = 0; 919 int error = 0;
937 920
@@ -985,7 +968,7 @@ xfs_mountfs(
985 * allocator alignment is within an ag, therefore ag has 968 * allocator alignment is within an ag, therefore ag has
986 * to be aligned at stripe boundary. 969 * to be aligned at stripe boundary.
987 */ 970 */
988 error = xfs_update_alignment(mp, mfsi_flags, &update_flags); 971 error = xfs_update_alignment(mp, &update_flags);
989 if (error) 972 if (error)
990 goto error1; 973 goto error1;
991 974
@@ -1004,8 +987,7 @@ xfs_mountfs(
1004 * since a single partition filesystem is identical to a single 987 * since a single partition filesystem is identical to a single
1005 * partition volume/filesystem. 988 * partition volume/filesystem.
1006 */ 989 */
1007 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 990 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
1008 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
1009 if (xfs_uuid_mount(mp)) { 991 if (xfs_uuid_mount(mp)) {
1010 error = XFS_ERROR(EINVAL); 992 error = XFS_ERROR(EINVAL);
1011 goto error1; 993 goto error1;
@@ -1033,7 +1015,7 @@ xfs_mountfs(
1033 /* 1015 /*
1034 * Check that the data (and log if separate) are an ok size. 1016 * Check that the data (and log if separate) are an ok size.
1035 */ 1017 */
1036 error = xfs_check_sizes(mp, mfsi_flags); 1018 error = xfs_check_sizes(mp);
1037 if (error) 1019 if (error)
1038 goto error1; 1020 goto error1;
1039 1021
@@ -1047,13 +1029,6 @@ xfs_mountfs(
1047 } 1029 }
1048 1030
1049 /* 1031 /*
1050 * For client case we are done now
1051 */
1052 if (mfsi_flags & XFS_MFSI_CLIENT) {
1053 return 0;
1054 }
1055
1056 /*
1057 * Copies the low order bits of the timestamp and the randomly 1032 * Copies the low order bits of the timestamp and the randomly
1058 * set "sequence" number out of a UUID. 1033 * set "sequence" number out of a UUID.
1059 */ 1034 */
@@ -1077,8 +1052,10 @@ xfs_mountfs(
1077 * Allocate and initialize the per-ag data. 1052 * Allocate and initialize the per-ag data.
1078 */ 1053 */
1079 init_rwsem(&mp->m_peraglock); 1054 init_rwsem(&mp->m_peraglock);
1080 mp->m_perag = 1055 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),
1081 kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 1056 KM_MAYFAIL);
1057 if (!mp->m_perag)
1058 goto error1;
1082 1059
1083 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); 1060 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1084 1061
@@ -1190,7 +1167,7 @@ xfs_mountfs(
1190 * delayed until after the root and real-time bitmap inodes 1167 * delayed until after the root and real-time bitmap inodes
1191 * were consistently read in. 1168 * were consistently read in.
1192 */ 1169 */
1193 error = xfs_log_mount_finish(mp, mfsi_flags); 1170 error = xfs_log_mount_finish(mp);
1194 if (error) { 1171 if (error) {
1195 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1172 cmn_err(CE_WARN, "XFS: log mount finish failed");
1196 goto error4; 1173 goto error4;
@@ -1199,7 +1176,7 @@ xfs_mountfs(
1199 /* 1176 /*
1200 * Complete the quota initialisation, post-log-replay component. 1177 * Complete the quota initialisation, post-log-replay component.
1201 */ 1178 */
1202 error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); 1179 error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
1203 if (error) 1180 if (error)
1204 goto error4; 1181 goto error4;
1205 1182
@@ -1233,12 +1210,7 @@ xfs_mountfs(
1233 error3: 1210 error3:
1234 xfs_log_unmount_dealloc(mp); 1211 xfs_log_unmount_dealloc(mp);
1235 error2: 1212 error2:
1236 for (agno = 0; agno < sbp->sb_agcount; agno++) 1213 xfs_free_perag(mp);
1237 if (mp->m_perag[agno].pagb_list)
1238 kmem_free(mp->m_perag[agno].pagb_list);
1239 kmem_free(mp->m_perag);
1240 mp->m_perag = NULL;
1241 /* FALLTHROUGH */
1242 error1: 1214 error1:
1243 if (uuid_mounted) 1215 if (uuid_mounted)
1244 uuid_table_remove(&mp->m_sb.sb_uuid); 1216 uuid_table_remove(&mp->m_sb.sb_uuid);
@@ -1246,16 +1218,17 @@ xfs_mountfs(
1246} 1218}
1247 1219
1248/* 1220/*
1249 * xfs_unmountfs
1250 *
1251 * This flushes out the inodes,dquots and the superblock, unmounts the 1221 * This flushes out the inodes,dquots and the superblock, unmounts the
1252 * log and makes sure that incore structures are freed. 1222 * log and makes sure that incore structures are freed.
1253 */ 1223 */
1254int 1224void
1255xfs_unmountfs(xfs_mount_t *mp) 1225xfs_unmountfs(
1226 struct xfs_mount *mp)
1256{ 1227{
1257 __uint64_t resblks; 1228 __uint64_t resblks;
1258 int error = 0; 1229 int error;
1230
1231 IRELE(mp->m_rootip);
1259 1232
1260 /* 1233 /*
1261 * We can potentially deadlock here if we have an inode cluster 1234 * We can potentially deadlock here if we have an inode cluster
@@ -1312,8 +1285,6 @@ xfs_unmountfs(xfs_mount_t *mp)
1312 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1285 xfs_unmountfs_wait(mp); /* wait for async bufs */
1313 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1286 xfs_log_unmount(mp); /* Done! No more fs ops. */
1314 1287
1315 xfs_freesb(mp);
1316
1317 /* 1288 /*
1318 * All inodes from this mount point should be freed. 1289 * All inodes from this mount point should be freed.
1319 */ 1290 */
@@ -1322,11 +1293,12 @@ xfs_unmountfs(xfs_mount_t *mp)
1322 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1293 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1323 uuid_table_remove(&mp->m_sb.sb_uuid); 1294 uuid_table_remove(&mp->m_sb.sb_uuid);
1324 1295
1325#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1296#if defined(DEBUG)
1326 xfs_errortag_clearall(mp, 0); 1297 xfs_errortag_clearall(mp, 0);
1327#endif 1298#endif
1328 xfs_mount_free(mp); 1299 xfs_free_perag(mp);
1329 return 0; 1300 if (mp->m_quotainfo)
1301 XFS_QM_DONE(mp);
1330} 1302}
1331 1303
1332STATIC void 1304STATIC void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5269bd6e3df0..f3c1024b1241 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -114,7 +114,7 @@ struct xfs_dqtrxops;
114struct xfs_quotainfo; 114struct xfs_quotainfo;
115 115
116typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); 116typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
117typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint, int); 117typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
118typedef int (*xfs_qmunmount_t)(struct xfs_mount *); 118typedef int (*xfs_qmunmount_t)(struct xfs_mount *);
119typedef void (*xfs_qmdone_t)(struct xfs_mount *); 119typedef void (*xfs_qmdone_t)(struct xfs_mount *);
120typedef void (*xfs_dqrele_t)(struct xfs_dquot *); 120typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
@@ -158,8 +158,8 @@ typedef struct xfs_qmops {
158 158
159#define XFS_QM_INIT(mp, mnt, fl) \ 159#define XFS_QM_INIT(mp, mnt, fl) \
160 (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) 160 (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
161#define XFS_QM_MOUNT(mp, mnt, fl, mfsi_flags) \ 161#define XFS_QM_MOUNT(mp, mnt, fl) \
162 (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl, mfsi_flags) 162 (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
163#define XFS_QM_UNMOUNT(mp) \ 163#define XFS_QM_UNMOUNT(mp) \
164 (*(mp)->m_qm_ops->xfs_qmunmount)(mp) 164 (*(mp)->m_qm_ops->xfs_qmunmount)(mp)
165#define XFS_QM_DONE(mp) \ 165#define XFS_QM_DONE(mp) \
@@ -442,13 +442,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
442/* 442/*
443 * Flags for xfs_mountfs 443 * Flags for xfs_mountfs
444 */ 444 */
445#define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */
446#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */
447/* XFS_MFSI_RRINODES */
448#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */
449 /* log recovery */
450#define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */
451/* XFS_MFSI_CONVERT_SUNIT */
452#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ 445#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */
453 446
454#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) 447#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d)
@@ -517,10 +510,10 @@ typedef struct xfs_mod_sb {
517 510
518extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 511extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
519extern int xfs_log_sbcount(xfs_mount_t *, uint); 512extern int xfs_log_sbcount(xfs_mount_t *, uint);
520extern int xfs_mountfs(xfs_mount_t *mp, int); 513extern int xfs_mountfs(xfs_mount_t *mp);
521extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 514extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
522 515
523extern int xfs_unmountfs(xfs_mount_t *); 516extern void xfs_unmountfs(xfs_mount_t *);
524extern int xfs_unmountfs_writesb(xfs_mount_t *); 517extern int xfs_unmountfs_writesb(xfs_mount_t *);
525extern int xfs_unmount_flush(xfs_mount_t *, int); 518extern int xfs_unmount_flush(xfs_mount_t *, int);
526extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 519extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index bf87a5913504..e2f68de16159 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -74,18 +74,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,
74 */ 74 */
75 75
76/* 76/*
77 * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
78 */
79STATIC int
80xfs_lowbit32(
81 __uint32_t v)
82{
83 if (v)
84 return ffs(v) - 1;
85 return -1;
86}
87
88/*
89 * Allocate space to the bitmap or summary file, and zero it, for growfs. 77 * Allocate space to the bitmap or summary file, and zero it, for growfs.
90 */ 78 */
91STATIC int /* error */ 79STATIC int /* error */
@@ -450,6 +438,7 @@ xfs_rtallocate_extent_near(
450 } 438 }
451 bbno = XFS_BITTOBLOCK(mp, bno); 439 bbno = XFS_BITTOBLOCK(mp, bno);
452 i = 0; 440 i = 0;
441 ASSERT(minlen != 0);
453 log2len = xfs_highbit32(minlen); 442 log2len = xfs_highbit32(minlen);
454 /* 443 /*
455 * Loop over all bitmap blocks (bbno + i is current block). 444 * Loop over all bitmap blocks (bbno + i is current block).
@@ -618,6 +607,8 @@ xfs_rtallocate_extent_size(
618 xfs_suminfo_t sum; /* summary information for extents */ 607 xfs_suminfo_t sum; /* summary information for extents */
619 608
620 ASSERT(minlen % prod == 0 && maxlen % prod == 0); 609 ASSERT(minlen % prod == 0 && maxlen % prod == 0);
610 ASSERT(maxlen != 0);
611
621 /* 612 /*
622 * Loop over all the levels starting with maxlen. 613 * Loop over all the levels starting with maxlen.
623 * At each level, look at all the bitmap blocks, to see if there 614 * At each level, look at all the bitmap blocks, to see if there
@@ -675,6 +666,9 @@ xfs_rtallocate_extent_size(
675 *rtblock = NULLRTBLOCK; 666 *rtblock = NULLRTBLOCK;
676 return 0; 667 return 0;
677 } 668 }
669 ASSERT(minlen != 0);
670 ASSERT(maxlen != 0);
671
678 /* 672 /*
679 * Loop over sizes, from maxlen down to minlen. 673 * Loop over sizes, from maxlen down to minlen.
680 * This time, when we do the allocations, allow smaller ones 674 * This time, when we do the allocations, allow smaller ones
@@ -1961,6 +1955,7 @@ xfs_growfs_rt(
1961 nsbp->sb_blocksize * nsbp->sb_rextsize); 1955 nsbp->sb_blocksize * nsbp->sb_rextsize);
1962 nsbp->sb_rextents = nsbp->sb_rblocks; 1956 nsbp->sb_rextents = nsbp->sb_rblocks;
1963 do_div(nsbp->sb_rextents, nsbp->sb_rextsize); 1957 do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
1958 ASSERT(nsbp->sb_rextents != 0);
1964 nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); 1959 nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
1965 nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; 1960 nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
1966 nrsumsize = 1961 nrsumsize =
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index b0f31c09a76d..3a82576dde9a 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -314,7 +314,7 @@ xfs_bioerror_relse(
314 * ASYNC buffers. 314 * ASYNC buffers.
315 */ 315 */
316 XFS_BUF_ERROR(bp, EIO); 316 XFS_BUF_ERROR(bp, EIO);
317 XFS_BUF_V_IODONESEMA(bp); 317 XFS_BUF_FINISH_IOWAIT(bp);
318 } else { 318 } else {
319 xfs_buf_relse(bp); 319 xfs_buf_relse(bp);
320 } 320 }
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index e4ebddd3c500..4e1c22a23be5 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -43,6 +43,7 @@
43#include "xfs_quota.h" 43#include "xfs_quota.h"
44#include "xfs_trans_priv.h" 44#include "xfs_trans_priv.h"
45#include "xfs_trans_space.h" 45#include "xfs_trans_space.h"
46#include "xfs_inode_item.h"
46 47
47 48
48STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *); 49STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *);
@@ -253,7 +254,7 @@ _xfs_trans_alloc(
253 tp->t_mountp = mp; 254 tp->t_mountp = mp;
254 tp->t_items_free = XFS_LIC_NUM_SLOTS; 255 tp->t_items_free = XFS_LIC_NUM_SLOTS;
255 tp->t_busy_free = XFS_LBC_NUM_SLOTS; 256 tp->t_busy_free = XFS_LBC_NUM_SLOTS;
256 XFS_LIC_INIT(&(tp->t_items)); 257 xfs_lic_init(&(tp->t_items));
257 XFS_LBC_INIT(&(tp->t_busy)); 258 XFS_LBC_INIT(&(tp->t_busy));
258 return tp; 259 return tp;
259} 260}
@@ -282,7 +283,7 @@ xfs_trans_dup(
282 ntp->t_mountp = tp->t_mountp; 283 ntp->t_mountp = tp->t_mountp;
283 ntp->t_items_free = XFS_LIC_NUM_SLOTS; 284 ntp->t_items_free = XFS_LIC_NUM_SLOTS;
284 ntp->t_busy_free = XFS_LBC_NUM_SLOTS; 285 ntp->t_busy_free = XFS_LBC_NUM_SLOTS;
285 XFS_LIC_INIT(&(ntp->t_items)); 286 xfs_lic_init(&(ntp->t_items));
286 XFS_LBC_INIT(&(ntp->t_busy)); 287 XFS_LBC_INIT(&(ntp->t_busy));
287 288
288 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 289 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1169,7 +1170,7 @@ xfs_trans_cancel(
1169 while (licp != NULL) { 1170 while (licp != NULL) {
1170 lidp = licp->lic_descs; 1171 lidp = licp->lic_descs;
1171 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1172 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1172 if (XFS_LIC_ISFREE(licp, i)) { 1173 if (xfs_lic_isfree(licp, i)) {
1173 continue; 1174 continue;
1174 } 1175 }
1175 1176
@@ -1216,6 +1217,68 @@ xfs_trans_free(
1216 kmem_zone_free(xfs_trans_zone, tp); 1217 kmem_zone_free(xfs_trans_zone, tp);
1217} 1218}
1218 1219
1220/*
1221 * Roll from one trans in the sequence of PERMANENT transactions to
1222 * the next: permanent transactions are only flushed out when
1223 * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
1224 * as possible to let chunks of it go to the log. So we commit the
1225 * chunk we've been working on and get a new transaction to continue.
1226 */
1227int
1228xfs_trans_roll(
1229 struct xfs_trans **tpp,
1230 struct xfs_inode *dp)
1231{
1232 struct xfs_trans *trans;
1233 unsigned int logres, count;
1234 int error;
1235
1236 /*
1237 * Ensure that the inode is always logged.
1238 */
1239 trans = *tpp;
1240 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
1241
1242 /*
1243 * Copy the critical parameters from one trans to the next.
1244 */
1245 logres = trans->t_log_res;
1246 count = trans->t_log_count;
1247 *tpp = xfs_trans_dup(trans);
1248
1249 /*
1250 * Commit the current transaction.
1251 * If this commit failed, then it'd just unlock those items that
1252 * are not marked ihold. That also means that a filesystem shutdown
1253 * is in progress. The caller takes the responsibility to cancel
1254 * the duplicate transaction that gets returned.
1255 */
1256 error = xfs_trans_commit(trans, 0);
1257 if (error)
1258 return (error);
1259
1260 trans = *tpp;
1261
1262 /*
1263 * Reserve space in the log for th next transaction.
1264 * This also pushes items in the "AIL", the list of logged items,
1265 * out to disk if they are taking up space at the tail of the log
1266 * that we want to use. This requires that either nothing be locked
1267 * across this call, or that anything that is locked be logged in
1268 * the prior and the next transactions.
1269 */
1270 error = xfs_trans_reserve(trans, 0, logres, 0,
1271 XFS_TRANS_PERM_LOG_RES, count);
1272 /*
1273 * Ensure that the inode is in the new transaction and locked.
1274 */
1275 if (error)
1276 return error;
1277
1278 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
1279 xfs_trans_ihold(trans, dp);
1280 return 0;
1281}
1219 1282
1220/* 1283/*
1221 * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). 1284 * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item().
@@ -1253,7 +1316,7 @@ xfs_trans_committed(
1253 * Special case the chunk embedded in the transaction. 1316 * Special case the chunk embedded in the transaction.
1254 */ 1317 */
1255 licp = &(tp->t_items); 1318 licp = &(tp->t_items);
1256 if (!(XFS_LIC_ARE_ALL_FREE(licp))) { 1319 if (!(xfs_lic_are_all_free(licp))) {
1257 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); 1320 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1258 } 1321 }
1259 1322
@@ -1262,7 +1325,7 @@ xfs_trans_committed(
1262 */ 1325 */
1263 licp = licp->lic_next; 1326 licp = licp->lic_next;
1264 while (licp != NULL) { 1327 while (licp != NULL) {
1265 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 1328 ASSERT(!xfs_lic_are_all_free(licp));
1266 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); 1329 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1267 next_licp = licp->lic_next; 1330 next_licp = licp->lic_next;
1268 kmem_free(licp); 1331 kmem_free(licp);
@@ -1325,7 +1388,7 @@ xfs_trans_chunk_committed(
1325 1388
1326 lidp = licp->lic_descs; 1389 lidp = licp->lic_descs;
1327 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1390 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1328 if (XFS_LIC_ISFREE(licp, i)) { 1391 if (xfs_lic_isfree(licp, i)) {
1329 continue; 1392 continue;
1330 } 1393 }
1331 1394
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 0804207c7391..74c80bd2b0ec 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -210,62 +210,52 @@ typedef struct xfs_log_item_chunk {
210 * lic_unused to the right value (0 matches all free). The 210 * lic_unused to the right value (0 matches all free). The
211 * lic_descs.lid_index values are set up as each desc is allocated. 211 * lic_descs.lid_index values are set up as each desc is allocated.
212 */ 212 */
213#define XFS_LIC_INIT(cp) xfs_lic_init(cp)
214static inline void xfs_lic_init(xfs_log_item_chunk_t *cp) 213static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
215{ 214{
216 cp->lic_free = XFS_LIC_FREEMASK; 215 cp->lic_free = XFS_LIC_FREEMASK;
217} 216}
218 217
219#define XFS_LIC_INIT_SLOT(cp,slot) xfs_lic_init_slot(cp, slot)
220static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot) 218static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
221{ 219{
222 cp->lic_descs[slot].lid_index = (unsigned char)(slot); 220 cp->lic_descs[slot].lid_index = (unsigned char)(slot);
223} 221}
224 222
225#define XFS_LIC_VACANCY(cp) xfs_lic_vacancy(cp)
226static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp) 223static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
227{ 224{
228 return cp->lic_free & XFS_LIC_FREEMASK; 225 return cp->lic_free & XFS_LIC_FREEMASK;
229} 226}
230 227
231#define XFS_LIC_ALL_FREE(cp) xfs_lic_all_free(cp)
232static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp) 228static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
233{ 229{
234 cp->lic_free = XFS_LIC_FREEMASK; 230 cp->lic_free = XFS_LIC_FREEMASK;
235} 231}
236 232
237#define XFS_LIC_ARE_ALL_FREE(cp) xfs_lic_are_all_free(cp)
238static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp) 233static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
239{ 234{
240 return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK); 235 return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
241} 236}
242 237
243#define XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot)
244static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot) 238static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
245{ 239{
246 return (cp->lic_free & (1 << slot)); 240 return (cp->lic_free & (1 << slot));
247} 241}
248 242
249#define XFS_LIC_CLAIM(cp,slot) xfs_lic_claim(cp,slot)
250static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot) 243static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
251{ 244{
252 cp->lic_free &= ~(1 << slot); 245 cp->lic_free &= ~(1 << slot);
253} 246}
254 247
255#define XFS_LIC_RELSE(cp,slot) xfs_lic_relse(cp,slot)
256static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot) 248static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
257{ 249{
258 cp->lic_free |= 1 << slot; 250 cp->lic_free |= 1 << slot;
259} 251}
260 252
261#define XFS_LIC_SLOT(cp,slot) xfs_lic_slot(cp,slot)
262static inline xfs_log_item_desc_t * 253static inline xfs_log_item_desc_t *
263xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot) 254xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
264{ 255{
265 return &(cp->lic_descs[slot]); 256 return &(cp->lic_descs[slot]);
266} 257}
267 258
268#define XFS_LIC_DESC_TO_SLOT(dp) xfs_lic_desc_to_slot(dp)
269static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) 259static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
270{ 260{
271 return (uint)dp->lid_index; 261 return (uint)dp->lid_index;
@@ -278,7 +268,6 @@ static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
278 * All of this yields the address of the chunk, which is 268 * All of this yields the address of the chunk, which is
279 * cast to a chunk pointer. 269 * cast to a chunk pointer.
280 */ 270 */
281#define XFS_LIC_DESC_TO_CHUNK(dp) xfs_lic_desc_to_chunk(dp)
282static inline xfs_log_item_chunk_t * 271static inline xfs_log_item_chunk_t *
283xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) 272xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
284{ 273{
@@ -986,6 +975,7 @@ int _xfs_trans_commit(xfs_trans_t *,
986 int *); 975 int *);
987#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) 976#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
988void xfs_trans_cancel(xfs_trans_t *, int); 977void xfs_trans_cancel(xfs_trans_t *, int);
978int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
989int xfs_trans_ail_init(struct xfs_mount *); 979int xfs_trans_ail_init(struct xfs_mount *);
990void xfs_trans_ail_destroy(struct xfs_mount *); 980void xfs_trans_ail_destroy(struct xfs_mount *);
991void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); 981void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index cb0c5839154b..4e855b5ced66 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -1021,16 +1021,16 @@ xfs_trans_buf_item_match(
1021 bp = NULL; 1021 bp = NULL;
1022 len = BBTOB(len); 1022 len = BBTOB(len);
1023 licp = &tp->t_items; 1023 licp = &tp->t_items;
1024 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 1024 if (!xfs_lic_are_all_free(licp)) {
1025 for (i = 0; i < licp->lic_unused; i++) { 1025 for (i = 0; i < licp->lic_unused; i++) {
1026 /* 1026 /*
1027 * Skip unoccupied slots. 1027 * Skip unoccupied slots.
1028 */ 1028 */
1029 if (XFS_LIC_ISFREE(licp, i)) { 1029 if (xfs_lic_isfree(licp, i)) {
1030 continue; 1030 continue;
1031 } 1031 }
1032 1032
1033 lidp = XFS_LIC_SLOT(licp, i); 1033 lidp = xfs_lic_slot(licp, i);
1034 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1034 blip = (xfs_buf_log_item_t *)lidp->lid_item;
1035 if (blip->bli_item.li_type != XFS_LI_BUF) { 1035 if (blip->bli_item.li_type != XFS_LI_BUF) {
1036 continue; 1036 continue;
@@ -1074,7 +1074,7 @@ xfs_trans_buf_item_match_all(
1074 bp = NULL; 1074 bp = NULL;
1075 len = BBTOB(len); 1075 len = BBTOB(len);
1076 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { 1076 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
1077 if (XFS_LIC_ARE_ALL_FREE(licp)) { 1077 if (xfs_lic_are_all_free(licp)) {
1078 ASSERT(licp == &tp->t_items); 1078 ASSERT(licp == &tp->t_items);
1079 ASSERT(licp->lic_next == NULL); 1079 ASSERT(licp->lic_next == NULL);
1080 return NULL; 1080 return NULL;
@@ -1083,11 +1083,11 @@ xfs_trans_buf_item_match_all(
1083 /* 1083 /*
1084 * Skip unoccupied slots. 1084 * Skip unoccupied slots.
1085 */ 1085 */
1086 if (XFS_LIC_ISFREE(licp, i)) { 1086 if (xfs_lic_isfree(licp, i)) {
1087 continue; 1087 continue;
1088 } 1088 }
1089 1089
1090 lidp = XFS_LIC_SLOT(licp, i); 1090 lidp = xfs_lic_slot(licp, i);
1091 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1091 blip = (xfs_buf_log_item_t *)lidp->lid_item;
1092 if (blip->bli_item.li_type != XFS_LI_BUF) { 1092 if (blip->bli_item.li_type != XFS_LI_BUF) {
1093 continue; 1093 continue;
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index db5c83595526..3c666e8317f8 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -53,11 +53,11 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
53 * Initialize the chunk, and then 53 * Initialize the chunk, and then
54 * claim the first slot in the newly allocated chunk. 54 * claim the first slot in the newly allocated chunk.
55 */ 55 */
56 XFS_LIC_INIT(licp); 56 xfs_lic_init(licp);
57 XFS_LIC_CLAIM(licp, 0); 57 xfs_lic_claim(licp, 0);
58 licp->lic_unused = 1; 58 licp->lic_unused = 1;
59 XFS_LIC_INIT_SLOT(licp, 0); 59 xfs_lic_init_slot(licp, 0);
60 lidp = XFS_LIC_SLOT(licp, 0); 60 lidp = xfs_lic_slot(licp, 0);
61 61
62 /* 62 /*
63 * Link in the new chunk and update the free count. 63 * Link in the new chunk and update the free count.
@@ -88,14 +88,14 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
88 */ 88 */
89 licp = &tp->t_items; 89 licp = &tp->t_items;
90 while (licp != NULL) { 90 while (licp != NULL) {
91 if (XFS_LIC_VACANCY(licp)) { 91 if (xfs_lic_vacancy(licp)) {
92 if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { 92 if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
93 i = licp->lic_unused; 93 i = licp->lic_unused;
94 ASSERT(XFS_LIC_ISFREE(licp, i)); 94 ASSERT(xfs_lic_isfree(licp, i));
95 break; 95 break;
96 } 96 }
97 for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { 97 for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
98 if (XFS_LIC_ISFREE(licp, i)) 98 if (xfs_lic_isfree(licp, i))
99 break; 99 break;
100 } 100 }
101 ASSERT(i <= XFS_LIC_MAX_SLOT); 101 ASSERT(i <= XFS_LIC_MAX_SLOT);
@@ -108,12 +108,12 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
108 * If we find a free descriptor, claim it, 108 * If we find a free descriptor, claim it,
109 * initialize it, and return it. 109 * initialize it, and return it.
110 */ 110 */
111 XFS_LIC_CLAIM(licp, i); 111 xfs_lic_claim(licp, i);
112 if (licp->lic_unused <= i) { 112 if (licp->lic_unused <= i) {
113 licp->lic_unused = i + 1; 113 licp->lic_unused = i + 1;
114 XFS_LIC_INIT_SLOT(licp, i); 114 xfs_lic_init_slot(licp, i);
115 } 115 }
116 lidp = XFS_LIC_SLOT(licp, i); 116 lidp = xfs_lic_slot(licp, i);
117 tp->t_items_free--; 117 tp->t_items_free--;
118 lidp->lid_item = lip; 118 lidp->lid_item = lip;
119 lidp->lid_flags = 0; 119 lidp->lid_flags = 0;
@@ -136,9 +136,9 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
136 xfs_log_item_chunk_t *licp; 136 xfs_log_item_chunk_t *licp;
137 xfs_log_item_chunk_t **licpp; 137 xfs_log_item_chunk_t **licpp;
138 138
139 slot = XFS_LIC_DESC_TO_SLOT(lidp); 139 slot = xfs_lic_desc_to_slot(lidp);
140 licp = XFS_LIC_DESC_TO_CHUNK(lidp); 140 licp = xfs_lic_desc_to_chunk(lidp);
141 XFS_LIC_RELSE(licp, slot); 141 xfs_lic_relse(licp, slot);
142 lidp->lid_item->li_desc = NULL; 142 lidp->lid_item->li_desc = NULL;
143 tp->t_items_free++; 143 tp->t_items_free++;
144 144
@@ -154,7 +154,7 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
154 * Also decrement the transaction structure's count of free items 154 * Also decrement the transaction structure's count of free items
155 * by the number in a chunk since we are freeing an empty chunk. 155 * by the number in a chunk since we are freeing an empty chunk.
156 */ 156 */
157 if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) { 157 if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {
158 licpp = &(tp->t_items.lic_next); 158 licpp = &(tp->t_items.lic_next);
159 while (*licpp != licp) { 159 while (*licpp != licp) {
160 ASSERT(*licpp != NULL); 160 ASSERT(*licpp != NULL);
@@ -207,20 +207,20 @@ xfs_trans_first_item(xfs_trans_t *tp)
207 /* 207 /*
208 * If it's not in the first chunk, skip to the second. 208 * If it's not in the first chunk, skip to the second.
209 */ 209 */
210 if (XFS_LIC_ARE_ALL_FREE(licp)) { 210 if (xfs_lic_are_all_free(licp)) {
211 licp = licp->lic_next; 211 licp = licp->lic_next;
212 } 212 }
213 213
214 /* 214 /*
215 * Return the first non-free descriptor in the chunk. 215 * Return the first non-free descriptor in the chunk.
216 */ 216 */
217 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 217 ASSERT(!xfs_lic_are_all_free(licp));
218 for (i = 0; i < licp->lic_unused; i++) { 218 for (i = 0; i < licp->lic_unused; i++) {
219 if (XFS_LIC_ISFREE(licp, i)) { 219 if (xfs_lic_isfree(licp, i)) {
220 continue; 220 continue;
221 } 221 }
222 222
223 return XFS_LIC_SLOT(licp, i); 223 return xfs_lic_slot(licp, i);
224 } 224 }
225 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); 225 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
226 return NULL; 226 return NULL;
@@ -242,18 +242,18 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
242 xfs_log_item_chunk_t *licp; 242 xfs_log_item_chunk_t *licp;
243 int i; 243 int i;
244 244
245 licp = XFS_LIC_DESC_TO_CHUNK(lidp); 245 licp = xfs_lic_desc_to_chunk(lidp);
246 246
247 /* 247 /*
248 * First search the rest of the chunk. The for loop keeps us 248 * First search the rest of the chunk. The for loop keeps us
249 * from referencing things beyond the end of the chunk. 249 * from referencing things beyond the end of the chunk.
250 */ 250 */
251 for (i = (int)XFS_LIC_DESC_TO_SLOT(lidp) + 1; i < licp->lic_unused; i++) { 251 for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) {
252 if (XFS_LIC_ISFREE(licp, i)) { 252 if (xfs_lic_isfree(licp, i)) {
253 continue; 253 continue;
254 } 254 }
255 255
256 return XFS_LIC_SLOT(licp, i); 256 return xfs_lic_slot(licp, i);
257 } 257 }
258 258
259 /* 259 /*
@@ -266,13 +266,13 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
266 } 266 }
267 267
268 licp = licp->lic_next; 268 licp = licp->lic_next;
269 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 269 ASSERT(!xfs_lic_are_all_free(licp));
270 for (i = 0; i < licp->lic_unused; i++) { 270 for (i = 0; i < licp->lic_unused; i++) {
271 if (XFS_LIC_ISFREE(licp, i)) { 271 if (xfs_lic_isfree(licp, i)) {
272 continue; 272 continue;
273 } 273 }
274 274
275 return XFS_LIC_SLOT(licp, i); 275 return xfs_lic_slot(licp, i);
276 } 276 }
277 ASSERT(0); 277 ASSERT(0);
278 /* NOTREACHED */ 278 /* NOTREACHED */
@@ -300,9 +300,9 @@ xfs_trans_free_items(
300 /* 300 /*
301 * Special case the embedded chunk so we don't free it below. 301 * Special case the embedded chunk so we don't free it below.
302 */ 302 */
303 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 303 if (!xfs_lic_are_all_free(licp)) {
304 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 304 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
305 XFS_LIC_ALL_FREE(licp); 305 xfs_lic_all_free(licp);
306 licp->lic_unused = 0; 306 licp->lic_unused = 0;
307 } 307 }
308 licp = licp->lic_next; 308 licp = licp->lic_next;
@@ -311,7 +311,7 @@ xfs_trans_free_items(
311 * Unlock each item in each chunk and free the chunks. 311 * Unlock each item in each chunk and free the chunks.
312 */ 312 */
313 while (licp != NULL) { 313 while (licp != NULL) {
314 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 314 ASSERT(!xfs_lic_are_all_free(licp));
315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
316 next_licp = licp->lic_next; 316 next_licp = licp->lic_next;
317 kmem_free(licp); 317 kmem_free(licp);
@@ -347,7 +347,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
347 /* 347 /*
348 * Special case the embedded chunk so we don't free. 348 * Special case the embedded chunk so we don't free.
349 */ 349 */
350 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 350 if (!xfs_lic_are_all_free(licp)) {
351 freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); 351 freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
352 } 352 }
353 licpp = &(tp->t_items.lic_next); 353 licpp = &(tp->t_items.lic_next);
@@ -358,10 +358,10 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
358 * and free empty chunks. 358 * and free empty chunks.
359 */ 359 */
360 while (licp != NULL) { 360 while (licp != NULL) {
361 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 361 ASSERT(!xfs_lic_are_all_free(licp));
362 freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); 362 freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
363 next_licp = licp->lic_next; 363 next_licp = licp->lic_next;
364 if (XFS_LIC_ARE_ALL_FREE(licp)) { 364 if (xfs_lic_are_all_free(licp)) {
365 *licpp = next_licp; 365 *licpp = next_licp;
366 kmem_free(licp); 366 kmem_free(licp);
367 freed -= XFS_LIC_NUM_SLOTS; 367 freed -= XFS_LIC_NUM_SLOTS;
@@ -402,7 +402,7 @@ xfs_trans_unlock_chunk(
402 freed = 0; 402 freed = 0;
403 lidp = licp->lic_descs; 403 lidp = licp->lic_descs;
404 for (i = 0; i < licp->lic_unused; i++, lidp++) { 404 for (i = 0; i < licp->lic_unused; i++, lidp++) {
405 if (XFS_LIC_ISFREE(licp, i)) { 405 if (xfs_lic_isfree(licp, i)) {
406 continue; 406 continue;
407 } 407 }
408 lip = lidp->lid_item; 408 lip = lidp->lid_item;
@@ -421,7 +421,7 @@ xfs_trans_unlock_chunk(
421 */ 421 */
422 if (!(freeing_chunk) && 422 if (!(freeing_chunk) &&
423 (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { 423 (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
424 XFS_LIC_RELSE(licp, i); 424 xfs_lic_relse(licp, i);
425 freed++; 425 freed++;
426 } 426 }
427 } 427 }
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 98e5f110ba5f..35d4d414bcc2 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -237,7 +237,7 @@ xfs_droplink(
237 237
238 ASSERT (ip->i_d.di_nlink > 0); 238 ASSERT (ip->i_d.di_nlink > 0);
239 ip->i_d.di_nlink--; 239 ip->i_d.di_nlink--;
240 drop_nlink(ip->i_vnode); 240 drop_nlink(VFS_I(ip));
241 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 241 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
242 242
243 error = 0; 243 error = 0;
@@ -301,7 +301,7 @@ xfs_bumplink(
301 301
302 ASSERT(ip->i_d.di_nlink > 0); 302 ASSERT(ip->i_d.di_nlink > 0);
303 ip->i_d.di_nlink++; 303 ip->i_d.di_nlink++;
304 inc_nlink(ip->i_vnode); 304 inc_nlink(VFS_I(ip));
305 if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && 305 if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) &&
306 (ip->i_d.di_nlink > XFS_MAXLINK_1)) { 306 (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
307 /* 307 /*
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f316cb85d8e2..ef321225d269 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,9 +18,6 @@
18#ifndef __XFS_UTILS_H__ 18#ifndef __XFS_UTILS_H__
19#define __XFS_UTILS_H__ 19#define __XFS_UTILS_H__
20 20
21#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
23
24extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); 21extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
25extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 22extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
26 xfs_dev_t, cred_t *, prid_t, int, 23 xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 4a9a43315a86..439dd3939dda 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -128,7 +128,6 @@ xfs_unmount_flush(
128 xfs_inode_t *rip = mp->m_rootip; 128 xfs_inode_t *rip = mp->m_rootip;
129 xfs_inode_t *rbmip; 129 xfs_inode_t *rbmip;
130 xfs_inode_t *rsumip = NULL; 130 xfs_inode_t *rsumip = NULL;
131 bhv_vnode_t *rvp = XFS_ITOV(rip);
132 int error; 131 int error;
133 132
134 xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 133 xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -146,7 +145,7 @@ xfs_unmount_flush(
146 if (error == EFSCORRUPTED) 145 if (error == EFSCORRUPTED)
147 goto fscorrupt_out; 146 goto fscorrupt_out;
148 147
149 ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); 148 ASSERT(vn_count(VFS_I(rbmip)) == 1);
150 149
151 rsumip = mp->m_rsumip; 150 rsumip = mp->m_rsumip;
152 xfs_ilock(rsumip, XFS_ILOCK_EXCL); 151 xfs_ilock(rsumip, XFS_ILOCK_EXCL);
@@ -157,7 +156,7 @@ xfs_unmount_flush(
157 if (error == EFSCORRUPTED) 156 if (error == EFSCORRUPTED)
158 goto fscorrupt_out; 157 goto fscorrupt_out;
159 158
160 ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); 159 ASSERT(vn_count(VFS_I(rsumip)) == 1);
161 } 160 }
162 161
163 /* 162 /*
@@ -167,7 +166,7 @@ xfs_unmount_flush(
167 if (error == EFSCORRUPTED) 166 if (error == EFSCORRUPTED)
168 goto fscorrupt_out2; 167 goto fscorrupt_out2;
169 168
170 if (vn_count(rvp) != 1 && !relocation) { 169 if (vn_count(VFS_I(rip)) != 1 && !relocation) {
171 xfs_iunlock(rip, XFS_ILOCK_EXCL); 170 xfs_iunlock(rip, XFS_ILOCK_EXCL);
172 return XFS_ERROR(EBUSY); 171 return XFS_ERROR(EBUSY);
173 } 172 }
@@ -284,7 +283,7 @@ xfs_sync_inodes(
284 int *bypassed) 283 int *bypassed)
285{ 284{
286 xfs_inode_t *ip = NULL; 285 xfs_inode_t *ip = NULL;
287 bhv_vnode_t *vp = NULL; 286 struct inode *vp = NULL;
288 int error; 287 int error;
289 int last_error; 288 int last_error;
290 uint64_t fflag; 289 uint64_t fflag;
@@ -404,7 +403,7 @@ xfs_sync_inodes(
404 continue; 403 continue;
405 } 404 }
406 405
407 vp = XFS_ITOV_NULL(ip); 406 vp = VFS_I(ip);
408 407
409 /* 408 /*
410 * If the vnode is gone then this is being torn down, 409 * If the vnode is gone then this is being torn down,
@@ -479,7 +478,7 @@ xfs_sync_inodes(
479 IPOINTER_INSERT(ip, mp); 478 IPOINTER_INSERT(ip, mp);
480 xfs_ilock(ip, lock_flags); 479 xfs_ilock(ip, lock_flags);
481 480
482 ASSERT(vp == XFS_ITOV(ip)); 481 ASSERT(vp == VFS_I(ip));
483 ASSERT(ip->i_mount == mp); 482 ASSERT(ip->i_mount == mp);
484 483
485 vnode_refed = B_TRUE; 484 vnode_refed = B_TRUE;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 76a1166af822..aa238c8fbd7a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -83,7 +83,7 @@ xfs_setattr(
83 cred_t *credp) 83 cred_t *credp)
84{ 84{
85 xfs_mount_t *mp = ip->i_mount; 85 xfs_mount_t *mp = ip->i_mount;
86 struct inode *inode = XFS_ITOV(ip); 86 struct inode *inode = VFS_I(ip);
87 int mask = iattr->ia_valid; 87 int mask = iattr->ia_valid;
88 xfs_trans_t *tp; 88 xfs_trans_t *tp;
89 int code; 89 int code;
@@ -182,7 +182,7 @@ xfs_setattr(
182 xfs_ilock(ip, lock_flags); 182 xfs_ilock(ip, lock_flags);
183 183
184 /* boolean: are we the file owner? */ 184 /* boolean: are we the file owner? */
185 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 185 file_owner = (current_fsuid() == ip->i_d.di_uid);
186 186
187 /* 187 /*
188 * Change various properties of a file. 188 * Change various properties of a file.
@@ -513,7 +513,6 @@ xfs_setattr(
513 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 513 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
514 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 514 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
515 ip->i_update_core = 1; 515 ip->i_update_core = 1;
516 timeflags &= ~XFS_ICHGTIME_ACC;
517 } 516 }
518 if (mask & ATTR_MTIME) { 517 if (mask & ATTR_MTIME) {
519 inode->i_mtime = iattr->ia_mtime; 518 inode->i_mtime = iattr->ia_mtime;
@@ -714,7 +713,7 @@ xfs_fsync(
714 return XFS_ERROR(EIO); 713 return XFS_ERROR(EIO);
715 714
716 /* capture size updates in I/O completion before writing the inode. */ 715 /* capture size updates in I/O completion before writing the inode. */
717 error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); 716 error = filemap_fdatawait(VFS_I(ip)->i_mapping);
718 if (error) 717 if (error)
719 return XFS_ERROR(error); 718 return XFS_ERROR(error);
720 719
@@ -1160,7 +1159,6 @@ int
1160xfs_release( 1159xfs_release(
1161 xfs_inode_t *ip) 1160 xfs_inode_t *ip)
1162{ 1161{
1163 bhv_vnode_t *vp = XFS_ITOV(ip);
1164 xfs_mount_t *mp = ip->i_mount; 1162 xfs_mount_t *mp = ip->i_mount;
1165 int error; 1163 int error;
1166 1164
@@ -1195,13 +1193,13 @@ xfs_release(
1195 * be exposed to that problem. 1193 * be exposed to that problem.
1196 */ 1194 */
1197 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1195 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
1198 if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) 1196 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
1199 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1197 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
1200 } 1198 }
1201 1199
1202 if (ip->i_d.di_nlink != 0) { 1200 if (ip->i_d.di_nlink != 0) {
1203 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1201 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1204 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1202 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
1205 ip->i_delayed_blks > 0)) && 1203 ip->i_delayed_blks > 0)) &&
1206 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1204 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1207 (!(ip->i_d.di_flags & 1205 (!(ip->i_d.di_flags &
@@ -1227,7 +1225,6 @@ int
1227xfs_inactive( 1225xfs_inactive(
1228 xfs_inode_t *ip) 1226 xfs_inode_t *ip)
1229{ 1227{
1230 bhv_vnode_t *vp = XFS_ITOV(ip);
1231 xfs_bmap_free_t free_list; 1228 xfs_bmap_free_t free_list;
1232 xfs_fsblock_t first_block; 1229 xfs_fsblock_t first_block;
1233 int committed; 1230 int committed;
@@ -1242,7 +1239,7 @@ xfs_inactive(
1242 * If the inode is already free, then there can be nothing 1239 * If the inode is already free, then there can be nothing
1243 * to clean up here. 1240 * to clean up here.
1244 */ 1241 */
1245 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1242 if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) {
1246 ASSERT(ip->i_df.if_real_bytes == 0); 1243 ASSERT(ip->i_df.if_real_bytes == 0);
1247 ASSERT(ip->i_df.if_broot_bytes == 0); 1244 ASSERT(ip->i_df.if_broot_bytes == 0);
1248 return VN_INACTIVE_CACHE; 1245 return VN_INACTIVE_CACHE;
@@ -1272,7 +1269,7 @@ xfs_inactive(
1272 1269
1273 if (ip->i_d.di_nlink != 0) { 1270 if (ip->i_d.di_nlink != 0) {
1274 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1271 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1275 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || 1272 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
1276 ip->i_delayed_blks > 0)) && 1273 ip->i_delayed_blks > 0)) &&
1277 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1274 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1278 (!(ip->i_d.di_flags & 1275 (!(ip->i_d.di_flags &
@@ -1536,7 +1533,7 @@ xfs_create(
1536 * Make sure that we have allocated dquot(s) on disk. 1533 * Make sure that we have allocated dquot(s) on disk.
1537 */ 1534 */
1538 error = XFS_QM_DQVOPALLOC(mp, dp, 1535 error = XFS_QM_DQVOPALLOC(mp, dp,
1539 current_fsuid(credp), current_fsgid(credp), prid, 1536 current_fsuid(), current_fsgid(), prid,
1540 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1537 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
1541 if (error) 1538 if (error)
1542 goto std_return; 1539 goto std_return;
@@ -1708,111 +1705,6 @@ std_return:
1708} 1705}
1709 1706
1710#ifdef DEBUG 1707#ifdef DEBUG
1711/*
1712 * Some counters to see if (and how often) we are hitting some deadlock
1713 * prevention code paths.
1714 */
1715
1716int xfs_rm_locks;
1717int xfs_rm_lock_delays;
1718int xfs_rm_attempts;
1719#endif
1720
1721/*
1722 * The following routine will lock the inodes associated with the
1723 * directory and the named entry in the directory. The locks are
1724 * acquired in increasing inode number.
1725 *
1726 * If the entry is "..", then only the directory is locked. The
1727 * vnode ref count will still include that from the .. entry in
1728 * this case.
1729 *
1730 * There is a deadlock we need to worry about. If the locked directory is
1731 * in the AIL, it might be blocking up the log. The next inode we lock
1732 * could be already locked by another thread waiting for log space (e.g
1733 * a permanent log reservation with a long running transaction (see
1734 * xfs_itruncate_finish)). To solve this, we must check if the directory
1735 * is in the ail and use lock_nowait. If we can't lock, we need to
1736 * drop the inode lock on the directory and try again. xfs_iunlock will
1737 * potentially push the tail if we were holding up the log.
1738 */
1739STATIC int
1740xfs_lock_dir_and_entry(
1741 xfs_inode_t *dp,
1742 xfs_inode_t *ip) /* inode of entry 'name' */
1743{
1744 int attempts;
1745 xfs_ino_t e_inum;
1746 xfs_inode_t *ips[2];
1747 xfs_log_item_t *lp;
1748
1749#ifdef DEBUG
1750 xfs_rm_locks++;
1751#endif
1752 attempts = 0;
1753
1754again:
1755 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1756
1757 e_inum = ip->i_ino;
1758
1759 xfs_itrace_ref(ip);
1760
1761 /*
1762 * We want to lock in increasing inum. Since we've already
1763 * acquired the lock on the directory, we may need to release
1764 * if if the inum of the entry turns out to be less.
1765 */
1766 if (e_inum > dp->i_ino) {
1767 /*
1768 * We are already in the right order, so just
1769 * lock on the inode of the entry.
1770 * We need to use nowait if dp is in the AIL.
1771 */
1772
1773 lp = (xfs_log_item_t *)dp->i_itemp;
1774 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1775 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
1776 attempts++;
1777#ifdef DEBUG
1778 xfs_rm_attempts++;
1779#endif
1780
1781 /*
1782 * Unlock dp and try again.
1783 * xfs_iunlock will try to push the tail
1784 * if the inode is in the AIL.
1785 */
1786
1787 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1788
1789 if ((attempts % 5) == 0) {
1790 delay(1); /* Don't just spin the CPU */
1791#ifdef DEBUG
1792 xfs_rm_lock_delays++;
1793#endif
1794 }
1795 goto again;
1796 }
1797 } else {
1798 xfs_ilock(ip, XFS_ILOCK_EXCL);
1799 }
1800 } else if (e_inum < dp->i_ino) {
1801 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1802
1803 ips[0] = ip;
1804 ips[1] = dp;
1805 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
1806 }
1807 /* else e_inum == dp->i_ino */
1808 /* This can happen if we're asked to lock /x/..
1809 * the entry is "..", which is also the parent directory.
1810 */
1811
1812 return 0;
1813}
1814
1815#ifdef DEBUG
1816int xfs_locked_n; 1708int xfs_locked_n;
1817int xfs_small_retries; 1709int xfs_small_retries;
1818int xfs_middle_retries; 1710int xfs_middle_retries;
@@ -1946,6 +1838,45 @@ again:
1946#endif 1838#endif
1947} 1839}
1948 1840
1841void
1842xfs_lock_two_inodes(
1843 xfs_inode_t *ip0,
1844 xfs_inode_t *ip1,
1845 uint lock_mode)
1846{
1847 xfs_inode_t *temp;
1848 int attempts = 0;
1849 xfs_log_item_t *lp;
1850
1851 ASSERT(ip0->i_ino != ip1->i_ino);
1852
1853 if (ip0->i_ino > ip1->i_ino) {
1854 temp = ip0;
1855 ip0 = ip1;
1856 ip1 = temp;
1857 }
1858
1859 again:
1860 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
1861
1862 /*
1863 * If the first lock we have locked is in the AIL, we must TRY to get
1864 * the second lock. If we can't get it, we must release the first one
1865 * and try again.
1866 */
1867 lp = (xfs_log_item_t *)ip0->i_itemp;
1868 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
1869 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
1870 xfs_iunlock(ip0, lock_mode);
1871 if ((++attempts % 5) == 0)
1872 delay(1); /* Don't just spin the CPU */
1873 goto again;
1874 }
1875 } else {
1876 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
1877 }
1878}
1879
1949int 1880int
1950xfs_remove( 1881xfs_remove(
1951 xfs_inode_t *dp, 1882 xfs_inode_t *dp,
@@ -2018,9 +1949,7 @@ xfs_remove(
2018 goto out_trans_cancel; 1949 goto out_trans_cancel;
2019 } 1950 }
2020 1951
2021 error = xfs_lock_dir_and_entry(dp, ip); 1952 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
2022 if (error)
2023 goto out_trans_cancel;
2024 1953
2025 /* 1954 /*
2026 * At this point, we've gotten both the directory and the entry 1955 * At this point, we've gotten both the directory and the entry
@@ -2047,9 +1976,6 @@ xfs_remove(
2047 } 1976 }
2048 } 1977 }
2049 1978
2050 /*
2051 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2052 */
2053 XFS_BMAP_INIT(&free_list, &first_block); 1979 XFS_BMAP_INIT(&free_list, &first_block);
2054 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 1980 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2055 &first_block, &free_list, resblks); 1981 &first_block, &free_list, resblks);
@@ -2155,7 +2081,6 @@ xfs_link(
2155{ 2081{
2156 xfs_mount_t *mp = tdp->i_mount; 2082 xfs_mount_t *mp = tdp->i_mount;
2157 xfs_trans_t *tp; 2083 xfs_trans_t *tp;
2158 xfs_inode_t *ips[2];
2159 int error; 2084 int error;
2160 xfs_bmap_free_t free_list; 2085 xfs_bmap_free_t free_list;
2161 xfs_fsblock_t first_block; 2086 xfs_fsblock_t first_block;
@@ -2203,15 +2128,7 @@ xfs_link(
2203 goto error_return; 2128 goto error_return;
2204 } 2129 }
2205 2130
2206 if (sip->i_ino < tdp->i_ino) { 2131 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
2207 ips[0] = sip;
2208 ips[1] = tdp;
2209 } else {
2210 ips[0] = tdp;
2211 ips[1] = sip;
2212 }
2213
2214 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2215 2132
2216 /* 2133 /*
2217 * Increment vnode ref counts since xfs_trans_commit & 2134 * Increment vnode ref counts since xfs_trans_commit &
@@ -2352,7 +2269,7 @@ xfs_mkdir(
2352 * Make sure that we have allocated dquot(s) on disk. 2269 * Make sure that we have allocated dquot(s) on disk.
2353 */ 2270 */
2354 error = XFS_QM_DQVOPALLOC(mp, dp, 2271 error = XFS_QM_DQVOPALLOC(mp, dp,
2355 current_fsuid(credp), current_fsgid(credp), prid, 2272 current_fsuid(), current_fsgid(), prid,
2356 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2273 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2357 if (error) 2274 if (error)
2358 goto std_return; 2275 goto std_return;
@@ -2578,7 +2495,7 @@ xfs_symlink(
2578 * Make sure that we have allocated dquot(s) on disk. 2495 * Make sure that we have allocated dquot(s) on disk.
2579 */ 2496 */
2580 error = XFS_QM_DQVOPALLOC(mp, dp, 2497 error = XFS_QM_DQVOPALLOC(mp, dp,
2581 current_fsuid(credp), current_fsgid(credp), prid, 2498 current_fsuid(), current_fsgid(), prid,
2582 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2499 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2583 if (error) 2500 if (error)
2584 goto std_return; 2501 goto std_return;
@@ -2873,14 +2790,13 @@ int
2873xfs_reclaim( 2790xfs_reclaim(
2874 xfs_inode_t *ip) 2791 xfs_inode_t *ip)
2875{ 2792{
2876 bhv_vnode_t *vp = XFS_ITOV(ip);
2877 2793
2878 xfs_itrace_entry(ip); 2794 xfs_itrace_entry(ip);
2879 2795
2880 ASSERT(!VN_MAPPED(vp)); 2796 ASSERT(!VN_MAPPED(VFS_I(ip)));
2881 2797
2882 /* bad inode, get out here ASAP */ 2798 /* bad inode, get out here ASAP */
2883 if (VN_BAD(vp)) { 2799 if (VN_BAD(VFS_I(ip))) {
2884 xfs_ireclaim(ip); 2800 xfs_ireclaim(ip);
2885 return 0; 2801 return 0;
2886 } 2802 }
@@ -2917,7 +2833,7 @@ xfs_reclaim(
2917 XFS_MOUNT_ILOCK(mp); 2833 XFS_MOUNT_ILOCK(mp);
2918 spin_lock(&ip->i_flags_lock); 2834 spin_lock(&ip->i_flags_lock);
2919 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 2835 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2920 vn_to_inode(vp)->i_private = NULL; 2836 VFS_I(ip)->i_private = NULL;
2921 ip->i_vnode = NULL; 2837 ip->i_vnode = NULL;
2922 spin_unlock(&ip->i_flags_lock); 2838 spin_unlock(&ip->i_flags_lock);
2923 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); 2839 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
@@ -2933,7 +2849,7 @@ xfs_finish_reclaim(
2933 int sync_mode) 2849 int sync_mode)
2934{ 2850{
2935 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); 2851 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
2936 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 2852 struct inode *vp = VFS_I(ip);
2937 2853
2938 if (vp && VN_BAD(vp)) 2854 if (vp && VN_BAD(vp))
2939 goto reclaim; 2855 goto reclaim;
@@ -3321,7 +3237,6 @@ xfs_free_file_space(
3321 xfs_off_t len, 3237 xfs_off_t len,
3322 int attr_flags) 3238 int attr_flags)
3323{ 3239{
3324 bhv_vnode_t *vp;
3325 int committed; 3240 int committed;
3326 int done; 3241 int done;
3327 xfs_off_t end_dmi_offset; 3242 xfs_off_t end_dmi_offset;
@@ -3341,7 +3256,6 @@ xfs_free_file_space(
3341 xfs_trans_t *tp; 3256 xfs_trans_t *tp;
3342 int need_iolock = 1; 3257 int need_iolock = 1;
3343 3258
3344 vp = XFS_ITOV(ip);
3345 mp = ip->i_mount; 3259 mp = ip->i_mount;
3346 3260
3347 xfs_itrace_entry(ip); 3261 xfs_itrace_entry(ip);
@@ -3378,7 +3292,7 @@ xfs_free_file_space(
3378 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 3292 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
3379 ioffset = offset & ~(rounding - 1); 3293 ioffset = offset & ~(rounding - 1);
3380 3294
3381 if (VN_CACHED(vp) != 0) { 3295 if (VN_CACHED(VFS_I(ip)) != 0) {
3382 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); 3296 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
3383 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 3297 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
3384 if (error) 3298 if (error)