aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/flock.c2
-rw-r--r--fs/afs/proc.c8
-rw-r--r--fs/aio.c57
-rw-r--r--fs/anon_inodes.c68
-rw-r--r--fs/autofs/dirhash.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/binfmt_elf.c44
-rw-r--r--fs/binfmt_elf_fdpic.c56
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/inode.c28
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/buffer.c57
-rw-r--r--fs/cifs/cifs_dfs_ref.c4
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/compat.c7
-rw-r--r--fs/devpts/inode.c3
-rw-r--r--fs/dlm/debug_fs.c12
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/eventfd.c67
-rw-r--r--fs/exec.c11
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/xip.c2
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/hugetlbfs/inode.c25
-rw-r--r--fs/inode.c35
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/host.c4
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/minix/dir.c22
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/client.c14
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/export.c4
-rw-r--r--fs/nfsd/nfs3xdr.c75
-rw-r--r--fs/nfsd/nfs4acl.c4
-rw-r--r--fs/nfsd/nfs4callback.c263
-rw-r--r--fs/nfsd/nfs4proc.c89
-rw-r--r--fs/nfsd/nfs4state.c685
-rw-r--r--fs/nfsd/nfs4xdr.c42
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/nfsfh.c158
-rw-r--r--fs/nfsd/nfssvc.c54
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/nilfs2/nilfs.h10
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/ntfs/file.c42
-rw-r--r--fs/ntfs/layout.h2
-rw-r--r--fs/ntfs/malloc.h2
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c1342
-rw-r--r--fs/ocfs2/alloc.h101
-rw-r--r--fs/ocfs2/aops.c37
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/buffer_head_io.c47
-rw-r--r--fs/ocfs2/buffer_head_io.h8
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/cluster/netdebug.c4
-rw-r--r--fs/ocfs2/dir.c107
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmthread.c6
-rw-r--r--fs/ocfs2/dlmglue.c105
-rw-r--r--fs/ocfs2/dlmglue.h6
-rw-r--r--fs/ocfs2/extent_map.c33
-rw-r--r--fs/ocfs2/extent_map.h8
-rw-r--r--fs/ocfs2/file.c151
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/inode.c86
-rw-r--r--fs/ocfs2/inode.h20
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c82
-rw-r--r--fs/ocfs2/journal.h94
-rw-r--r--fs/ocfs2/localalloc.c12
-rw-r--r--fs/ocfs2/namei.c341
-rw-r--r--fs/ocfs2/namei.h6
-rw-r--r--fs/ocfs2/ocfs2.h52
-rw-r--r--fs/ocfs2/ocfs2_fs.h107
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota.h2
-rw-r--r--fs/ocfs2/quota_global.c9
-rw-r--r--fs/ocfs2/quota_local.c26
-rw-r--r--fs/ocfs2/refcounttree.c4313
-rw-r--r--fs/ocfs2/refcounttree.h106
-rw-r--r--fs/ocfs2/resize.c16
-rw-r--r--fs/ocfs2/slot_map.c10
-rw-r--r--fs/ocfs2/suballoc.c35
-rw-r--r--fs/ocfs2/super.c15
-rw-r--r--fs/ocfs2/uptodate.c265
-rw-r--r--fs/ocfs2/uptodate.h51
-rw-r--r--fs/ocfs2/xattr.c2056
-rw-r--r--fs/ocfs2/xattr.h15
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/omfs/file.c4
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/omfs/omfs.h6
-rw-r--r--fs/open.c5
-rw-r--r--fs/partitions/check.c2
-rw-r--r--fs/proc/array.c85
-rw-r--r--fs/proc/base.c67
-rw-r--r--fs/proc/kcore.c335
-rw-r--r--fs/proc/meminfo.c4
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/page.c5
-rw-r--r--fs/proc/task_mmu.c57
-rw-r--r--fs/qnx4/Kconfig11
-rw-r--r--fs/qnx4/Makefile2
-rw-r--r--fs/qnx4/bitmap.c81
-rw-r--r--fs/qnx4/dir.c5
-rw-r--r--fs/qnx4/file.c40
-rw-r--r--fs/qnx4/inode.c84
-rw-r--r--fs/qnx4/namei.c105
-rw-r--r--fs/qnx4/qnx4.h8
-rw-r--r--fs/qnx4/truncate.c34
-rw-r--r--fs/quota/dquot.c4
-rw-r--r--fs/ramfs/inode.c4
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/select.c14
-rw-r--r--fs/smbfs/proc.c2
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/super.c2
-rw-r--r--fs/sync.c1
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c112
-rw-r--r--fs/ubifs/debug.h5
-rw-r--r--fs/ubifs/file.c62
-rw-r--r--fs/ubifs/gc.c2
-rw-r--r--fs/ubifs/io.c29
-rw-r--r--fs/ubifs/journal.c13
-rw-r--r--fs/ubifs/key.h35
-rw-r--r--fs/ubifs/log.c17
-rw-r--r--fs/ubifs/lprops.c43
-rw-r--r--fs/ubifs/master.c20
-rw-r--r--fs/ubifs/orphan.c7
-rw-r--r--fs/ubifs/recovery.c4
-rw-r--r--fs/ubifs/replay.c6
-rw-r--r--fs/ubifs/scan.c32
-rw-r--r--fs/ubifs/super.c33
-rw-r--r--fs/ubifs/tnc.c76
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/ubifs-media.h7
-rw-r--r--fs/ubifs/ubifs.h13
-rw-r--r--fs/ubifs/xattr.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h2
-rw-r--r--fs/xfs/xfs_fs.h2
171 files changed, 10470 insertions, 2815 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 455aa207e67e..d4bf8caad8d0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -109,6 +109,7 @@ source "fs/sysfs/Kconfig"
109 109
110config TMPFS 110config TMPFS
111 bool "Virtual memory file system support (former shm fs)" 111 bool "Virtual memory file system support (former shm fs)"
112 depends on SHMEM
112 help 113 help
113 Tmpfs is a file system which keeps all files in virtual memory. 114 Tmpfs is a file system which keeps all files in virtual memory.
114 115
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3ff8bdd18fb3..0931bc1325eb 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -21,7 +21,7 @@ static void afs_fl_release_private(struct file_lock *fl);
21static struct workqueue_struct *afs_lock_manager; 21static struct workqueue_struct *afs_lock_manager;
22static DEFINE_MUTEX(afs_lock_manager_mutex); 22static DEFINE_MUTEX(afs_lock_manager_mutex);
23 23
24static struct file_lock_operations afs_lock_ops = { 24static const struct file_lock_operations afs_lock_ops = {
25 .fl_copy_lock = afs_fl_copy_lock, 25 .fl_copy_lock = afs_fl_copy_lock,
26 .fl_release_private = afs_fl_release_private, 26 .fl_release_private = afs_fl_release_private,
27}; 27};
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 8630615e57fe..852739d262a9 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -28,7 +28,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v);
28static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, 28static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
29 size_t size, loff_t *_pos); 29 size_t size, loff_t *_pos);
30 30
31static struct seq_operations afs_proc_cells_ops = { 31static const struct seq_operations afs_proc_cells_ops = {
32 .start = afs_proc_cells_start, 32 .start = afs_proc_cells_start,
33 .next = afs_proc_cells_next, 33 .next = afs_proc_cells_next,
34 .stop = afs_proc_cells_stop, 34 .stop = afs_proc_cells_stop,
@@ -70,7 +70,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
70static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v); 70static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v);
71static int afs_proc_cell_volumes_show(struct seq_file *m, void *v); 71static int afs_proc_cell_volumes_show(struct seq_file *m, void *v);
72 72
73static struct seq_operations afs_proc_cell_volumes_ops = { 73static const struct seq_operations afs_proc_cell_volumes_ops = {
74 .start = afs_proc_cell_volumes_start, 74 .start = afs_proc_cell_volumes_start,
75 .next = afs_proc_cell_volumes_next, 75 .next = afs_proc_cell_volumes_next,
76 .stop = afs_proc_cell_volumes_stop, 76 .stop = afs_proc_cell_volumes_stop,
@@ -95,7 +95,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
95static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v); 95static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v);
96static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v); 96static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v);
97 97
98static struct seq_operations afs_proc_cell_vlservers_ops = { 98static const struct seq_operations afs_proc_cell_vlservers_ops = {
99 .start = afs_proc_cell_vlservers_start, 99 .start = afs_proc_cell_vlservers_start,
100 .next = afs_proc_cell_vlservers_next, 100 .next = afs_proc_cell_vlservers_next,
101 .stop = afs_proc_cell_vlservers_stop, 101 .stop = afs_proc_cell_vlservers_stop,
@@ -119,7 +119,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
119static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); 119static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
120static int afs_proc_cell_servers_show(struct seq_file *m, void *v); 120static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
121 121
122static struct seq_operations afs_proc_cell_servers_ops = { 122static const struct seq_operations afs_proc_cell_servers_ops = {
123 .start = afs_proc_cell_servers_start, 123 .start = afs_proc_cell_servers_start,
124 .next = afs_proc_cell_servers_next, 124 .next = afs_proc_cell_servers_next,
125 .stop = afs_proc_cell_servers_stop, 125 .stop = afs_proc_cell_servers_stop,
diff --git a/fs/aio.c b/fs/aio.c
index d065b2c3273e..02a2c9340573 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -24,6 +24,7 @@
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/mman.h> 26#include <linux/mman.h>
27#include <linux/mmu_context.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include <linux/timer.h> 29#include <linux/timer.h>
29#include <linux/aio.h> 30#include <linux/aio.h>
@@ -34,7 +35,6 @@
34 35
35#include <asm/kmap_types.h> 36#include <asm/kmap_types.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/mmu_context.h>
38 38
39#if DEBUG > 1 39#if DEBUG > 1
40#define dprintk printk 40#define dprintk printk
@@ -78,6 +78,7 @@ static int __init aio_setup(void)
78 78
79 return 0; 79 return 0;
80} 80}
81__initcall(aio_setup);
81 82
82static void aio_free_ring(struct kioctx *ctx) 83static void aio_free_ring(struct kioctx *ctx)
83{ 84{
@@ -380,6 +381,7 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
380 __set_current_state(TASK_RUNNING); 381 __set_current_state(TASK_RUNNING);
381 return iocb->ki_user_data; 382 return iocb->ki_user_data;
382} 383}
384EXPORT_SYMBOL(wait_on_sync_kiocb);
383 385
384/* exit_aio: called when the last user of mm goes away. At this point, 386/* exit_aio: called when the last user of mm goes away. At this point,
385 * there is no way for any new requests to be submited or any of the 387 * there is no way for any new requests to be submited or any of the
@@ -573,6 +575,7 @@ int aio_put_req(struct kiocb *req)
573 spin_unlock_irq(&ctx->ctx_lock); 575 spin_unlock_irq(&ctx->ctx_lock);
574 return ret; 576 return ret;
575} 577}
578EXPORT_SYMBOL(aio_put_req);
576 579
577static struct kioctx *lookup_ioctx(unsigned long ctx_id) 580static struct kioctx *lookup_ioctx(unsigned long ctx_id)
578{ 581{
@@ -595,51 +598,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
595} 598}
596 599
597/* 600/*
598 * use_mm
599 * Makes the calling kernel thread take on the specified
600 * mm context.
601 * Called by the retry thread execute retries within the
602 * iocb issuer's mm context, so that copy_from/to_user
603 * operations work seamlessly for aio.
604 * (Note: this routine is intended to be called only
605 * from a kernel thread context)
606 */
607static void use_mm(struct mm_struct *mm)
608{
609 struct mm_struct *active_mm;
610 struct task_struct *tsk = current;
611
612 task_lock(tsk);
613 active_mm = tsk->active_mm;
614 atomic_inc(&mm->mm_count);
615 tsk->mm = mm;
616 tsk->active_mm = mm;
617 switch_mm(active_mm, mm, tsk);
618 task_unlock(tsk);
619
620 mmdrop(active_mm);
621}
622
623/*
624 * unuse_mm
625 * Reverses the effect of use_mm, i.e. releases the
626 * specified mm context which was earlier taken on
627 * by the calling kernel thread
628 * (Note: this routine is intended to be called only
629 * from a kernel thread context)
630 */
631static void unuse_mm(struct mm_struct *mm)
632{
633 struct task_struct *tsk = current;
634
635 task_lock(tsk);
636 tsk->mm = NULL;
637 /* active_mm is still 'mm' */
638 enter_lazy_tlb(mm, tsk);
639 task_unlock(tsk);
640}
641
642/*
643 * Queue up a kiocb to be retried. Assumes that the kiocb 601 * Queue up a kiocb to be retried. Assumes that the kiocb
644 * has already been marked as kicked, and places it on 602 * has already been marked as kicked, and places it on
645 * the retry run list for the corresponding ioctx, if it 603 * the retry run list for the corresponding ioctx, if it
@@ -1037,6 +995,7 @@ put_rq:
1037 spin_unlock_irqrestore(&ctx->ctx_lock, flags); 995 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
1038 return ret; 996 return ret;
1039} 997}
998EXPORT_SYMBOL(aio_complete);
1040 999
1041/* aio_read_evt 1000/* aio_read_evt
1042 * Pull an event off of the ioctx's event ring. Returns the number of 1001 * Pull an event off of the ioctx's event ring. Returns the number of
@@ -1825,9 +1784,3 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
1825 asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout); 1784 asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout);
1826 return ret; 1785 return ret;
1827} 1786}
1828
1829__initcall(aio_setup);
1830
1831EXPORT_SYMBOL(aio_complete);
1832EXPORT_SYMBOL(aio_put_req);
1833EXPORT_SYMBOL(wait_on_sync_kiocb);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 47d4a01c5393..d11c51fc2a3f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -77,28 +77,24 @@ static const struct address_space_operations anon_aops = {
77 * 77 *
78 * Creates a new file by hooking it on a single inode. This is useful for files 78 * Creates a new file by hooking it on a single inode. This is useful for files
79 * that do not need to have a full-fledged inode in order to operate correctly. 79 * that do not need to have a full-fledged inode in order to operate correctly.
80 * All the files created with anon_inode_getfd() will share a single inode, 80 * All the files created with anon_inode_getfile() will share a single inode,
81 * hence saving memory and avoiding code duplication for the file/inode/dentry 81 * hence saving memory and avoiding code duplication for the file/inode/dentry
82 * setup. Returns new descriptor or -error. 82 * setup. Returns the newly created file* or an error pointer.
83 */ 83 */
84int anon_inode_getfd(const char *name, const struct file_operations *fops, 84struct file *anon_inode_getfile(const char *name,
85 void *priv, int flags) 85 const struct file_operations *fops,
86 void *priv, int flags)
86{ 87{
87 struct qstr this; 88 struct qstr this;
88 struct dentry *dentry; 89 struct dentry *dentry;
89 struct file *file; 90 struct file *file;
90 int error, fd; 91 int error;
91 92
92 if (IS_ERR(anon_inode_inode)) 93 if (IS_ERR(anon_inode_inode))
93 return -ENODEV; 94 return ERR_PTR(-ENODEV);
94 95
95 if (fops->owner && !try_module_get(fops->owner)) 96 if (fops->owner && !try_module_get(fops->owner))
96 return -ENOENT; 97 return ERR_PTR(-ENOENT);
97
98 error = get_unused_fd_flags(flags);
99 if (error < 0)
100 goto err_module;
101 fd = error;
102 98
103 /* 99 /*
104 * Link the inode to a directory entry by creating a unique name 100 * Link the inode to a directory entry by creating a unique name
@@ -110,7 +106,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
110 this.hash = 0; 106 this.hash = 0;
111 dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 107 dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
112 if (!dentry) 108 if (!dentry)
113 goto err_put_unused_fd; 109 goto err_module;
114 110
115 /* 111 /*
116 * We know the anon_inode inode count is always greater than zero, 112 * We know the anon_inode inode count is always greater than zero,
@@ -136,16 +132,54 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
136 file->f_version = 0; 132 file->f_version = 0;
137 file->private_data = priv; 133 file->private_data = priv;
138 134
135 return file;
136
137err_dput:
138 dput(dentry);
139err_module:
140 module_put(fops->owner);
141 return ERR_PTR(error);
142}
143EXPORT_SYMBOL_GPL(anon_inode_getfile);
144
145/**
146 * anon_inode_getfd - creates a new file instance by hooking it up to an
147 * anonymous inode, and a dentry that describe the "class"
148 * of the file
149 *
150 * @name: [in] name of the "class" of the new file
151 * @fops: [in] file operations for the new file
152 * @priv: [in] private data for the new file (will be file's private_data)
153 * @flags: [in] flags
154 *
155 * Creates a new file by hooking it on a single inode. This is useful for files
156 * that do not need to have a full-fledged inode in order to operate correctly.
157 * All the files created with anon_inode_getfd() will share a single inode,
158 * hence saving memory and avoiding code duplication for the file/inode/dentry
159 * setup. Returns new descriptor or an error code.
160 */
161int anon_inode_getfd(const char *name, const struct file_operations *fops,
162 void *priv, int flags)
163{
164 int error, fd;
165 struct file *file;
166
167 error = get_unused_fd_flags(flags);
168 if (error < 0)
169 return error;
170 fd = error;
171
172 file = anon_inode_getfile(name, fops, priv, flags);
173 if (IS_ERR(file)) {
174 error = PTR_ERR(file);
175 goto err_put_unused_fd;
176 }
139 fd_install(fd, file); 177 fd_install(fd, file);
140 178
141 return fd; 179 return fd;
142 180
143err_dput:
144 dput(dentry);
145err_put_unused_fd: 181err_put_unused_fd:
146 put_unused_fd(fd); 182 put_unused_fd(fd);
147err_module:
148 module_put(fops->owner);
149 return error; 183 return error;
150} 184}
151EXPORT_SYMBOL_GPL(anon_inode_getfd); 185EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 2316e944a109..e947915109e5 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -90,7 +90,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); 90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
91 continue; 91 continue;
92 } 92 }
93 while (d_mountpoint(path.dentry) && follow_down(&path)); 93 while (d_mountpoint(path.dentry) && follow_down(&path))
94 ; 94 ;
95 umount_ok = may_umount(path.mnt); 95 umount_ok = may_umount(path.mnt);
96 path_put(&path); 96 path_put(&path);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 615d5496fe0f..dd376c124e71 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -842,7 +842,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
842 sb->s_magic = BEFS_SUPER_MAGIC; 842 sb->s_magic = BEFS_SUPER_MAGIC;
843 /* Set real blocksize of fs */ 843 /* Set real blocksize of fs */
844 sb_set_blocksize(sb, (ulong) befs_sb->block_size); 844 sb_set_blocksize(sb, (ulong) befs_sb->block_size);
845 sb->s_op = (struct super_operations *) &befs_sops; 845 sb->s_op = &befs_sops;
846 root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); 846 root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir)));
847 if (IS_ERR(root)) { 847 if (IS_ERR(root)) {
848 ret = PTR_ERR(root); 848 ret = PTR_ERR(root);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7c1e65d54872..442d94fe255c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1280,9 +1280,6 @@ static int writenote(struct memelfnote *men, struct file *file,
1280#define DUMP_WRITE(addr, nr) \ 1280#define DUMP_WRITE(addr, nr) \
1281 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1281 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1282 goto end_coredump; 1282 goto end_coredump;
1283#define DUMP_SEEK(off) \
1284 if (!dump_seek(file, (off))) \
1285 goto end_coredump;
1286 1283
1287static void fill_elf_header(struct elfhdr *elf, int segs, 1284static void fill_elf_header(struct elfhdr *elf, int segs,
1288 u16 machine, u32 flags, u8 osabi) 1285 u16 machine, u32 flags, u8 osabi)
@@ -2016,7 +2013,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2016 goto end_coredump; 2013 goto end_coredump;
2017 2014
2018 /* Align to page */ 2015 /* Align to page */
2019 DUMP_SEEK(dataoff - foffset); 2016 if (!dump_seek(file, dataoff - foffset))
2017 goto end_coredump;
2020 2018
2021 for (vma = first_vma(current, gate_vma); vma != NULL; 2019 for (vma = first_vma(current, gate_vma); vma != NULL;
2022 vma = next_vma(vma, gate_vma)) { 2020 vma = next_vma(vma, gate_vma)) {
@@ -2027,33 +2025,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2027 2025
2028 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 2026 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2029 struct page *page; 2027 struct page *page;
2030 struct vm_area_struct *tmp_vma; 2028 int stop;
2031 2029
2032 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 2030 page = get_dump_page(addr);
2033 &page, &tmp_vma) <= 0) { 2031 if (page) {
2034 DUMP_SEEK(PAGE_SIZE); 2032 void *kaddr = kmap(page);
2035 } else { 2033 stop = ((size += PAGE_SIZE) > limit) ||
2036 if (page == ZERO_PAGE(0)) { 2034 !dump_write(file, kaddr, PAGE_SIZE);
2037 if (!dump_seek(file, PAGE_SIZE)) { 2035 kunmap(page);
2038 page_cache_release(page);
2039 goto end_coredump;
2040 }
2041 } else {
2042 void *kaddr;
2043 flush_cache_page(tmp_vma, addr,
2044 page_to_pfn(page));
2045 kaddr = kmap(page);
2046 if ((size += PAGE_SIZE) > limit ||
2047 !dump_write(file, kaddr,
2048 PAGE_SIZE)) {
2049 kunmap(page);
2050 page_cache_release(page);
2051 goto end_coredump;
2052 }
2053 kunmap(page);
2054 }
2055 page_cache_release(page); 2036 page_cache_release(page);
2056 } 2037 } else
2038 stop = !dump_seek(file, PAGE_SIZE);
2039 if (stop)
2040 goto end_coredump;
2057 } 2041 }
2058 } 2042 }
2059 2043
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 20fbeced472b..76285471073e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1325,9 +1325,6 @@ static int writenote(struct memelfnote *men, struct file *file)
1325#define DUMP_WRITE(addr, nr) \ 1325#define DUMP_WRITE(addr, nr) \
1326 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1326 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1327 goto end_coredump; 1327 goto end_coredump;
1328#define DUMP_SEEK(off) \
1329 if (!dump_seek(file, (off))) \
1330 goto end_coredump;
1331 1328
1332static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) 1329static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
1333{ 1330{
@@ -1518,6 +1515,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1518 unsigned long *limit, unsigned long mm_flags) 1515 unsigned long *limit, unsigned long mm_flags)
1519{ 1516{
1520 struct vm_area_struct *vma; 1517 struct vm_area_struct *vma;
1518 int err = 0;
1521 1519
1522 for (vma = current->mm->mmap; vma; vma = vma->vm_next) { 1520 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1523 unsigned long addr; 1521 unsigned long addr;
@@ -1525,43 +1523,26 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1525 if (!maydump(vma, mm_flags)) 1523 if (!maydump(vma, mm_flags))
1526 continue; 1524 continue;
1527 1525
1528 for (addr = vma->vm_start; 1526 for (addr = vma->vm_start; addr < vma->vm_end;
1529 addr < vma->vm_end; 1527 addr += PAGE_SIZE) {
1530 addr += PAGE_SIZE 1528 struct page *page = get_dump_page(addr);
1531 ) { 1529 if (page) {
1532 struct vm_area_struct *vma; 1530 void *kaddr = kmap(page);
1533 struct page *page; 1531 *size += PAGE_SIZE;
1534 1532 if (*size > *limit)
1535 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 1533 err = -EFBIG;
1536 &page, &vma) <= 0) { 1534 else if (!dump_write(file, kaddr, PAGE_SIZE))
1537 DUMP_SEEK(file->f_pos + PAGE_SIZE); 1535 err = -EIO;
1538 }
1539 else if (page == ZERO_PAGE(0)) {
1540 page_cache_release(page);
1541 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1542 }
1543 else {
1544 void *kaddr;
1545
1546 flush_cache_page(vma, addr, page_to_pfn(page));
1547 kaddr = kmap(page);
1548 if ((*size += PAGE_SIZE) > *limit ||
1549 !dump_write(file, kaddr, PAGE_SIZE)
1550 ) {
1551 kunmap(page);
1552 page_cache_release(page);
1553 return -EIO;
1554 }
1555 kunmap(page); 1536 kunmap(page);
1556 page_cache_release(page); 1537 page_cache_release(page);
1557 } 1538 } else if (!dump_seek(file, file->f_pos + PAGE_SIZE))
1539 err = -EFBIG;
1540 if (err)
1541 goto out;
1558 } 1542 }
1559 } 1543 }
1560 1544out:
1561 return 0; 1545 return err;
1562
1563end_coredump:
1564 return -EFBIG;
1565} 1546}
1566#endif 1547#endif
1567 1548
@@ -1802,7 +1783,8 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1802 goto end_coredump; 1783 goto end_coredump;
1803 } 1784 }
1804 1785
1805 DUMP_SEEK(dataoff); 1786 if (!dump_seek(file, dataoff))
1787 goto end_coredump;
1806 1788
1807 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) 1789 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
1808 goto end_coredump; 1790 goto end_coredump;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71e7e03ac343..5d1ed50bd46c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1114,7 +1114,7 @@ EXPORT_SYMBOL(revalidate_disk);
1114int check_disk_change(struct block_device *bdev) 1114int check_disk_change(struct block_device *bdev)
1115{ 1115{
1116 struct gendisk *disk = bdev->bd_disk; 1116 struct gendisk *disk = bdev->bd_disk;
1117 struct block_device_operations * bdops = disk->fops; 1117 const struct block_device_operations *bdops = disk->fops;
1118 1118
1119 if (!bdops->media_changed) 1119 if (!bdops->media_changed)
1120 return 0; 1120 return 0;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8b8192790011..6c4173146bb7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -772,7 +772,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
772 } 772 }
773} 773}
774 774
775static struct address_space_operations btree_aops = { 775static const struct address_space_operations btree_aops = {
776 .readpage = btree_readpage, 776 .readpage = btree_readpage,
777 .writepage = btree_writepage, 777 .writepage = btree_writepage,
778 .writepages = btree_writepages, 778 .writepages = btree_writepages,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 59cba180fe83..9096fd0ca3ca 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -55,13 +55,13 @@ struct btrfs_iget_args {
55 struct btrfs_root *root; 55 struct btrfs_root *root;
56}; 56};
57 57
58static struct inode_operations btrfs_dir_inode_operations; 58static const struct inode_operations btrfs_dir_inode_operations;
59static struct inode_operations btrfs_symlink_inode_operations; 59static const struct inode_operations btrfs_symlink_inode_operations;
60static struct inode_operations btrfs_dir_ro_inode_operations; 60static const struct inode_operations btrfs_dir_ro_inode_operations;
61static struct inode_operations btrfs_special_inode_operations; 61static const struct inode_operations btrfs_special_inode_operations;
62static struct inode_operations btrfs_file_inode_operations; 62static const struct inode_operations btrfs_file_inode_operations;
63static struct address_space_operations btrfs_aops; 63static const struct address_space_operations btrfs_aops;
64static struct address_space_operations btrfs_symlink_aops; 64static const struct address_space_operations btrfs_symlink_aops;
65static struct file_operations btrfs_dir_file_operations; 65static struct file_operations btrfs_dir_file_operations;
66static struct extent_io_ops btrfs_extent_io_ops; 66static struct extent_io_ops btrfs_extent_io_ops;
67 67
@@ -5201,7 +5201,7 @@ static int btrfs_permission(struct inode *inode, int mask)
5201 return generic_permission(inode, mask, btrfs_check_acl); 5201 return generic_permission(inode, mask, btrfs_check_acl);
5202} 5202}
5203 5203
5204static struct inode_operations btrfs_dir_inode_operations = { 5204static const struct inode_operations btrfs_dir_inode_operations = {
5205 .getattr = btrfs_getattr, 5205 .getattr = btrfs_getattr,
5206 .lookup = btrfs_lookup, 5206 .lookup = btrfs_lookup,
5207 .create = btrfs_create, 5207 .create = btrfs_create,
@@ -5219,7 +5219,7 @@ static struct inode_operations btrfs_dir_inode_operations = {
5219 .removexattr = btrfs_removexattr, 5219 .removexattr = btrfs_removexattr,
5220 .permission = btrfs_permission, 5220 .permission = btrfs_permission,
5221}; 5221};
5222static struct inode_operations btrfs_dir_ro_inode_operations = { 5222static const struct inode_operations btrfs_dir_ro_inode_operations = {
5223 .lookup = btrfs_lookup, 5223 .lookup = btrfs_lookup,
5224 .permission = btrfs_permission, 5224 .permission = btrfs_permission,
5225}; 5225};
@@ -5259,7 +5259,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
5259 * 5259 *
5260 * For now we're avoiding this by dropping bmap. 5260 * For now we're avoiding this by dropping bmap.
5261 */ 5261 */
5262static struct address_space_operations btrfs_aops = { 5262static const struct address_space_operations btrfs_aops = {
5263 .readpage = btrfs_readpage, 5263 .readpage = btrfs_readpage,
5264 .writepage = btrfs_writepage, 5264 .writepage = btrfs_writepage,
5265 .writepages = btrfs_writepages, 5265 .writepages = btrfs_writepages,
@@ -5271,14 +5271,14 @@ static struct address_space_operations btrfs_aops = {
5271 .set_page_dirty = btrfs_set_page_dirty, 5271 .set_page_dirty = btrfs_set_page_dirty,
5272}; 5272};
5273 5273
5274static struct address_space_operations btrfs_symlink_aops = { 5274static const struct address_space_operations btrfs_symlink_aops = {
5275 .readpage = btrfs_readpage, 5275 .readpage = btrfs_readpage,
5276 .writepage = btrfs_writepage, 5276 .writepage = btrfs_writepage,
5277 .invalidatepage = btrfs_invalidatepage, 5277 .invalidatepage = btrfs_invalidatepage,
5278 .releasepage = btrfs_releasepage, 5278 .releasepage = btrfs_releasepage,
5279}; 5279};
5280 5280
5281static struct inode_operations btrfs_file_inode_operations = { 5281static const struct inode_operations btrfs_file_inode_operations = {
5282 .truncate = btrfs_truncate, 5282 .truncate = btrfs_truncate,
5283 .getattr = btrfs_getattr, 5283 .getattr = btrfs_getattr,
5284 .setattr = btrfs_setattr, 5284 .setattr = btrfs_setattr,
@@ -5290,7 +5290,7 @@ static struct inode_operations btrfs_file_inode_operations = {
5290 .fallocate = btrfs_fallocate, 5290 .fallocate = btrfs_fallocate,
5291 .fiemap = btrfs_fiemap, 5291 .fiemap = btrfs_fiemap,
5292}; 5292};
5293static struct inode_operations btrfs_special_inode_operations = { 5293static const struct inode_operations btrfs_special_inode_operations = {
5294 .getattr = btrfs_getattr, 5294 .getattr = btrfs_getattr,
5295 .setattr = btrfs_setattr, 5295 .setattr = btrfs_setattr,
5296 .permission = btrfs_permission, 5296 .permission = btrfs_permission,
@@ -5299,7 +5299,7 @@ static struct inode_operations btrfs_special_inode_operations = {
5299 .listxattr = btrfs_listxattr, 5299 .listxattr = btrfs_listxattr,
5300 .removexattr = btrfs_removexattr, 5300 .removexattr = btrfs_removexattr,
5301}; 5301};
5302static struct inode_operations btrfs_symlink_inode_operations = { 5302static const struct inode_operations btrfs_symlink_inode_operations = {
5303 .readlink = generic_readlink, 5303 .readlink = generic_readlink,
5304 .follow_link = page_follow_link_light, 5304 .follow_link = page_follow_link_light,
5305 .put_link = page_put_link, 5305 .put_link = page_put_link,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6d6d06cb6dfc..2db17cd66fc5 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -51,7 +51,7 @@
51#include "export.h" 51#include "export.h"
52#include "compression.h" 52#include "compression.h"
53 53
54static struct super_operations btrfs_super_ops; 54static const struct super_operations btrfs_super_ops;
55 55
56static void btrfs_put_super(struct super_block *sb) 56static void btrfs_put_super(struct super_block *sb)
57{ 57{
@@ -675,7 +675,7 @@ static int btrfs_unfreeze(struct super_block *sb)
675 return 0; 675 return 0;
676} 676}
677 677
678static struct super_operations btrfs_super_ops = { 678static const struct super_operations btrfs_super_ops = {
679 .delete_inode = btrfs_delete_inode, 679 .delete_inode = btrfs_delete_inode,
680 .put_super = btrfs_put_super, 680 .put_super = btrfs_put_super,
681 .sync_fs = btrfs_sync_fs, 681 .sync_fs = btrfs_sync_fs,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d91b0de7c502..30c0d45c1b5e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2605,7 +2605,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2605 extent); 2605 extent);
2606 cs = btrfs_file_extent_offset(src, extent); 2606 cs = btrfs_file_extent_offset(src, extent);
2607 cl = btrfs_file_extent_num_bytes(src, 2607 cl = btrfs_file_extent_num_bytes(src,
2608 extent);; 2608 extent);
2609 if (btrfs_file_extent_compression(src, 2609 if (btrfs_file_extent_compression(src,
2610 extent)) { 2610 extent)) {
2611 cs = 0; 2611 cs = 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index 90a98865b0cc..209f7f15f5f8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -52,6 +52,7 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
52 bh->b_end_io = handler; 52 bh->b_end_io = handler;
53 bh->b_private = private; 53 bh->b_private = private;
54} 54}
55EXPORT_SYMBOL(init_buffer);
55 56
56static int sync_buffer(void *word) 57static int sync_buffer(void *word)
57{ 58{
@@ -80,6 +81,7 @@ void unlock_buffer(struct buffer_head *bh)
80 smp_mb__after_clear_bit(); 81 smp_mb__after_clear_bit();
81 wake_up_bit(&bh->b_state, BH_Lock); 82 wake_up_bit(&bh->b_state, BH_Lock);
82} 83}
84EXPORT_SYMBOL(unlock_buffer);
83 85
84/* 86/*
85 * Block until a buffer comes unlocked. This doesn't stop it 87 * Block until a buffer comes unlocked. This doesn't stop it
@@ -90,6 +92,7 @@ void __wait_on_buffer(struct buffer_head * bh)
90{ 92{
91 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); 93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
92} 94}
95EXPORT_SYMBOL(__wait_on_buffer);
93 96
94static void 97static void
95__clear_page_buffers(struct page *page) 98__clear_page_buffers(struct page *page)
@@ -144,6 +147,7 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
144 __end_buffer_read_notouch(bh, uptodate); 147 __end_buffer_read_notouch(bh, uptodate);
145 put_bh(bh); 148 put_bh(bh);
146} 149}
150EXPORT_SYMBOL(end_buffer_read_sync);
147 151
148void end_buffer_write_sync(struct buffer_head *bh, int uptodate) 152void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
149{ 153{
@@ -164,6 +168,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
164 unlock_buffer(bh); 168 unlock_buffer(bh);
165 put_bh(bh); 169 put_bh(bh);
166} 170}
171EXPORT_SYMBOL(end_buffer_write_sync);
167 172
168/* 173/*
169 * Various filesystems appear to want __find_get_block to be non-blocking. 174 * Various filesystems appear to want __find_get_block to be non-blocking.
@@ -272,6 +277,7 @@ void invalidate_bdev(struct block_device *bdev)
272 invalidate_bh_lrus(); 277 invalidate_bh_lrus();
273 invalidate_mapping_pages(mapping, 0, -1); 278 invalidate_mapping_pages(mapping, 0, -1);
274} 279}
280EXPORT_SYMBOL(invalidate_bdev);
275 281
276/* 282/*
277 * Kick pdflush then try to free up some ZONE_NORMAL memory. 283 * Kick pdflush then try to free up some ZONE_NORMAL memory.
@@ -410,6 +416,7 @@ still_busy:
410 local_irq_restore(flags); 416 local_irq_restore(flags);
411 return; 417 return;
412} 418}
419EXPORT_SYMBOL(end_buffer_async_write);
413 420
414/* 421/*
415 * If a page's buffers are under async readin (end_buffer_async_read 422 * If a page's buffers are under async readin (end_buffer_async_read
@@ -438,8 +445,8 @@ static void mark_buffer_async_read(struct buffer_head *bh)
438 set_buffer_async_read(bh); 445 set_buffer_async_read(bh);
439} 446}
440 447
441void mark_buffer_async_write_endio(struct buffer_head *bh, 448static void mark_buffer_async_write_endio(struct buffer_head *bh,
442 bh_end_io_t *handler) 449 bh_end_io_t *handler)
443{ 450{
444 bh->b_end_io = handler; 451 bh->b_end_io = handler;
445 set_buffer_async_write(bh); 452 set_buffer_async_write(bh);
@@ -553,7 +560,7 @@ repeat:
553 return err; 560 return err;
554} 561}
555 562
556void do_thaw_all(struct work_struct *work) 563static void do_thaw_all(struct work_struct *work)
557{ 564{
558 struct super_block *sb; 565 struct super_block *sb;
559 char b[BDEVNAME_SIZE]; 566 char b[BDEVNAME_SIZE];
@@ -1172,6 +1179,7 @@ void mark_buffer_dirty(struct buffer_head *bh)
1172 } 1179 }
1173 } 1180 }
1174} 1181}
1182EXPORT_SYMBOL(mark_buffer_dirty);
1175 1183
1176/* 1184/*
1177 * Decrement a buffer_head's reference count. If all buffers against a page 1185 * Decrement a buffer_head's reference count. If all buffers against a page
@@ -1188,6 +1196,7 @@ void __brelse(struct buffer_head * buf)
1188 } 1196 }
1189 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); 1197 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1190} 1198}
1199EXPORT_SYMBOL(__brelse);
1191 1200
1192/* 1201/*
1193 * bforget() is like brelse(), except it discards any 1202 * bforget() is like brelse(), except it discards any
@@ -1206,6 +1215,7 @@ void __bforget(struct buffer_head *bh)
1206 } 1215 }
1207 __brelse(bh); 1216 __brelse(bh);
1208} 1217}
1218EXPORT_SYMBOL(__bforget);
1209 1219
1210static struct buffer_head *__bread_slow(struct buffer_head *bh) 1220static struct buffer_head *__bread_slow(struct buffer_head *bh)
1211{ 1221{
@@ -2218,6 +2228,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2218 } 2228 }
2219 return 0; 2229 return 0;
2220} 2230}
2231EXPORT_SYMBOL(block_read_full_page);
2221 2232
2222/* utility function for filesystems that need to do work on expanding 2233/* utility function for filesystems that need to do work on expanding
2223 * truncates. Uses filesystem pagecache writes to allow the filesystem to 2234 * truncates. Uses filesystem pagecache writes to allow the filesystem to
@@ -2252,6 +2263,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
2252out: 2263out:
2253 return err; 2264 return err;
2254} 2265}
2266EXPORT_SYMBOL(generic_cont_expand_simple);
2255 2267
2256static int cont_expand_zero(struct file *file, struct address_space *mapping, 2268static int cont_expand_zero(struct file *file, struct address_space *mapping,
2257 loff_t pos, loff_t *bytes) 2269 loff_t pos, loff_t *bytes)
@@ -2352,6 +2364,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
2352out: 2364out:
2353 return err; 2365 return err;
2354} 2366}
2367EXPORT_SYMBOL(cont_write_begin);
2355 2368
2356int block_prepare_write(struct page *page, unsigned from, unsigned to, 2369int block_prepare_write(struct page *page, unsigned from, unsigned to,
2357 get_block_t *get_block) 2370 get_block_t *get_block)
@@ -2362,6 +2375,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
2362 ClearPageUptodate(page); 2375 ClearPageUptodate(page);
2363 return err; 2376 return err;
2364} 2377}
2378EXPORT_SYMBOL(block_prepare_write);
2365 2379
2366int block_commit_write(struct page *page, unsigned from, unsigned to) 2380int block_commit_write(struct page *page, unsigned from, unsigned to)
2367{ 2381{
@@ -2369,6 +2383,7 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
2369 __block_commit_write(inode,page,from,to); 2383 __block_commit_write(inode,page,from,to);
2370 return 0; 2384 return 0;
2371} 2385}
2386EXPORT_SYMBOL(block_commit_write);
2372 2387
2373/* 2388/*
2374 * block_page_mkwrite() is not allowed to change the file size as it gets 2389 * block_page_mkwrite() is not allowed to change the file size as it gets
@@ -2426,6 +2441,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2426out: 2441out:
2427 return ret; 2442 return ret;
2428} 2443}
2444EXPORT_SYMBOL(block_page_mkwrite);
2429 2445
2430/* 2446/*
2431 * nobh_write_begin()'s prereads are special: the buffer_heads are freed 2447 * nobh_write_begin()'s prereads are special: the buffer_heads are freed
@@ -2849,6 +2865,7 @@ unlock:
2849out: 2865out:
2850 return err; 2866 return err;
2851} 2867}
2868EXPORT_SYMBOL(block_truncate_page);
2852 2869
2853/* 2870/*
2854 * The generic ->writepage function for buffer-backed address_spaces 2871 * The generic ->writepage function for buffer-backed address_spaces
@@ -2890,6 +2907,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2890 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 2907 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2891 return __block_write_full_page(inode, page, get_block, wbc, handler); 2908 return __block_write_full_page(inode, page, get_block, wbc, handler);
2892} 2909}
2910EXPORT_SYMBOL(block_write_full_page_endio);
2893 2911
2894/* 2912/*
2895 * The generic ->writepage function for buffer-backed address_spaces 2913 * The generic ->writepage function for buffer-backed address_spaces
@@ -2900,7 +2918,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
2900 return block_write_full_page_endio(page, get_block, wbc, 2918 return block_write_full_page_endio(page, get_block, wbc,
2901 end_buffer_async_write); 2919 end_buffer_async_write);
2902} 2920}
2903 2921EXPORT_SYMBOL(block_write_full_page);
2904 2922
2905sector_t generic_block_bmap(struct address_space *mapping, sector_t block, 2923sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2906 get_block_t *get_block) 2924 get_block_t *get_block)
@@ -2913,6 +2931,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2913 get_block(inode, block, &tmp, 0); 2931 get_block(inode, block, &tmp, 0);
2914 return tmp.b_blocknr; 2932 return tmp.b_blocknr;
2915} 2933}
2934EXPORT_SYMBOL(generic_block_bmap);
2916 2935
2917static void end_bio_bh_io_sync(struct bio *bio, int err) 2936static void end_bio_bh_io_sync(struct bio *bio, int err)
2918{ 2937{
@@ -2982,6 +3001,7 @@ int submit_bh(int rw, struct buffer_head * bh)
2982 bio_put(bio); 3001 bio_put(bio);
2983 return ret; 3002 return ret;
2984} 3003}
3004EXPORT_SYMBOL(submit_bh);
2985 3005
2986/** 3006/**
2987 * ll_rw_block: low-level access to block devices (DEPRECATED) 3007 * ll_rw_block: low-level access to block devices (DEPRECATED)
@@ -3043,6 +3063,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3043 unlock_buffer(bh); 3063 unlock_buffer(bh);
3044 } 3064 }
3045} 3065}
3066EXPORT_SYMBOL(ll_rw_block);
3046 3067
3047/* 3068/*
3048 * For a data-integrity writeout, we need to wait upon any in-progress I/O 3069 * For a data-integrity writeout, we need to wait upon any in-progress I/O
@@ -3071,6 +3092,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
3071 } 3092 }
3072 return ret; 3093 return ret;
3073} 3094}
3095EXPORT_SYMBOL(sync_dirty_buffer);
3074 3096
3075/* 3097/*
3076 * try_to_free_buffers() checks if all the buffers on this particular page 3098 * try_to_free_buffers() checks if all the buffers on this particular page
@@ -3185,6 +3207,7 @@ void block_sync_page(struct page *page)
3185 if (mapping) 3207 if (mapping)
3186 blk_run_backing_dev(mapping->backing_dev_info, page); 3208 blk_run_backing_dev(mapping->backing_dev_info, page);
3187} 3209}
3210EXPORT_SYMBOL(block_sync_page);
3188 3211
3189/* 3212/*
3190 * There are no bdflush tunables left. But distributions are 3213 * There are no bdflush tunables left. But distributions are
@@ -3361,29 +3384,3 @@ void __init buffer_init(void)
3361 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); 3384 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3362 hotcpu_notifier(buffer_cpu_notify, 0); 3385 hotcpu_notifier(buffer_cpu_notify, 0);
3363} 3386}
3364
3365EXPORT_SYMBOL(__bforget);
3366EXPORT_SYMBOL(__brelse);
3367EXPORT_SYMBOL(__wait_on_buffer);
3368EXPORT_SYMBOL(block_commit_write);
3369EXPORT_SYMBOL(block_prepare_write);
3370EXPORT_SYMBOL(block_page_mkwrite);
3371EXPORT_SYMBOL(block_read_full_page);
3372EXPORT_SYMBOL(block_sync_page);
3373EXPORT_SYMBOL(block_truncate_page);
3374EXPORT_SYMBOL(block_write_full_page);
3375EXPORT_SYMBOL(block_write_full_page_endio);
3376EXPORT_SYMBOL(cont_write_begin);
3377EXPORT_SYMBOL(end_buffer_read_sync);
3378EXPORT_SYMBOL(end_buffer_write_sync);
3379EXPORT_SYMBOL(end_buffer_async_write);
3380EXPORT_SYMBOL(file_fsync);
3381EXPORT_SYMBOL(generic_block_bmap);
3382EXPORT_SYMBOL(generic_cont_expand_simple);
3383EXPORT_SYMBOL(init_buffer);
3384EXPORT_SYMBOL(invalidate_bdev);
3385EXPORT_SYMBOL(ll_rw_block);
3386EXPORT_SYMBOL(mark_buffer_dirty);
3387EXPORT_SYMBOL(submit_bh);
3388EXPORT_SYMBOL(sync_dirty_buffer);
3389EXPORT_SYMBOL(unlock_buffer);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 606912d8f2a8..fea9e898c4ba 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -142,7 +142,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
143 if (rc != 0) { 143 if (rc != 0) {
144 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", 144 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
145 __func__, *devname, rc));; 145 __func__, *devname, rc));
146 goto compose_mount_options_err; 146 goto compose_mount_options_err;
147 } 147 }
148 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 148 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
@@ -385,7 +385,7 @@ out_err:
385 goto out; 385 goto out;
386} 386}
387 387
388struct inode_operations cifs_dfs_referral_inode_operations = { 388const struct inode_operations cifs_dfs_referral_inode_operations = {
389 .follow_link = cifs_dfs_follow_mountpoint, 389 .follow_link = cifs_dfs_follow_mountpoint,
390}; 390};
391 391
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3610e9958b4c..d79ce2e95c23 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -50,7 +50,7 @@
50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ 50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
51 51
52#ifdef CONFIG_CIFS_QUOTA 52#ifdef CONFIG_CIFS_QUOTA
53static struct quotactl_ops cifs_quotactl_ops; 53static const struct quotactl_ops cifs_quotactl_ops;
54#endif /* QUOTA */ 54#endif /* QUOTA */
55 55
56int cifsFYI = 0; 56int cifsFYI = 0;
@@ -517,7 +517,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
517 return rc; 517 return rc;
518} 518}
519 519
520static struct quotactl_ops cifs_quotactl_ops = { 520static const struct quotactl_ops cifs_quotactl_ops = {
521 .set_xquota = cifs_xquota_set, 521 .set_xquota = cifs_xquota_set,
522 .get_xquota = cifs_xquota_get, 522 .get_xquota = cifs_xquota_get,
523 .set_xstate = cifs_xstate_set, 523 .set_xstate = cifs_xstate_set,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 094325e3f714..ac2b24c192f8 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -67,7 +67,7 @@ extern int cifs_setattr(struct dentry *, struct iattr *);
67 67
68extern const struct inode_operations cifs_file_inode_ops; 68extern const struct inode_operations cifs_file_inode_ops;
69extern const struct inode_operations cifs_symlink_inode_ops; 69extern const struct inode_operations cifs_symlink_inode_ops;
70extern struct inode_operations cifs_dfs_referral_inode_operations; 70extern const struct inode_operations cifs_dfs_referral_inode_operations;
71 71
72 72
73/* Functions related to files and directories */ 73/* Functions related to files and directories */
diff --git a/fs/compat.c b/fs/compat.c
index 6d6f98fe64a0..3aa48834a222 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -100,13 +100,6 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, st
100 get_compat_timespec(&tv[1], &t[1])) 100 get_compat_timespec(&tv[1], &t[1]))
101 return -EFAULT; 101 return -EFAULT;
102 102
103 if ((tv[0].tv_nsec == UTIME_OMIT || tv[0].tv_nsec == UTIME_NOW)
104 && tv[0].tv_sec != 0)
105 return -EINVAL;
106 if ((tv[1].tv_nsec == UTIME_OMIT || tv[1].tv_nsec == UTIME_NOW)
107 && tv[1].tv_sec != 0)
108 return -EINVAL;
109
110 if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) 103 if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
111 return 0; 104 return 0;
112 } 105 }
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 75efb028974b..d5f8c96964be 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,14 +18,13 @@
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21#include <linux/magic.h>
21#include <linux/idr.h> 22#include <linux/idr.h>
22#include <linux/devpts_fs.h> 23#include <linux/devpts_fs.h>
23#include <linux/parser.h> 24#include <linux/parser.h>
24#include <linux/fsnotify.h> 25#include <linux/fsnotify.h>
25#include <linux/seq_file.h> 26#include <linux/seq_file.h>
26 27
27#define DEVPTS_SUPER_MAGIC 0x1cd1
28
29#define DEVPTS_DEFAULT_MODE 0600 28#define DEVPTS_DEFAULT_MODE 0600
30/* 29/*
31 * ptmx is a new node in /dev/pts and will be unused in legacy (single- 30 * ptmx is a new node in /dev/pts and will be unused in legacy (single-
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1d1d27442235..1c8bb8c3a82e 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -386,9 +386,9 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr)
386 return rv; 386 return rv;
387} 387}
388 388
389static struct seq_operations format1_seq_ops; 389static const struct seq_operations format1_seq_ops;
390static struct seq_operations format2_seq_ops; 390static const struct seq_operations format2_seq_ops;
391static struct seq_operations format3_seq_ops; 391static const struct seq_operations format3_seq_ops;
392 392
393static void *table_seq_start(struct seq_file *seq, loff_t *pos) 393static void *table_seq_start(struct seq_file *seq, loff_t *pos)
394{ 394{
@@ -534,21 +534,21 @@ static void table_seq_stop(struct seq_file *seq, void *iter_ptr)
534 } 534 }
535} 535}
536 536
537static struct seq_operations format1_seq_ops = { 537static const struct seq_operations format1_seq_ops = {
538 .start = table_seq_start, 538 .start = table_seq_start,
539 .next = table_seq_next, 539 .next = table_seq_next,
540 .stop = table_seq_stop, 540 .stop = table_seq_stop,
541 .show = table_seq_show, 541 .show = table_seq_show,
542}; 542};
543 543
544static struct seq_operations format2_seq_ops = { 544static const struct seq_operations format2_seq_ops = {
545 .start = table_seq_start, 545 .start = table_seq_start,
546 .next = table_seq_next, 546 .next = table_seq_next,
547 .stop = table_seq_stop, 547 .stop = table_seq_stop,
548 .show = table_seq_show, 548 .show = table_seq_show,
549}; 549};
550 550
551static struct seq_operations format3_seq_ops = { 551static const struct seq_operations format3_seq_ops = {
552 .start = table_seq_start, 552 .start = table_seq_start,
553 .next = table_seq_next, 553 .next = table_seq_next,
554 .stop = table_seq_stop, 554 .stop = table_seq_stop,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 00b30a2d5466..542f625312f3 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -582,7 +582,7 @@ extern const struct inode_operations ecryptfs_dir_iops;
582extern const struct inode_operations ecryptfs_symlink_iops; 582extern const struct inode_operations ecryptfs_symlink_iops;
583extern const struct super_operations ecryptfs_sops; 583extern const struct super_operations ecryptfs_sops;
584extern const struct dentry_operations ecryptfs_dops; 584extern const struct dentry_operations ecryptfs_dops;
585extern struct address_space_operations ecryptfs_aops; 585extern const struct address_space_operations ecryptfs_aops;
586extern int ecryptfs_verbosity; 586extern int ecryptfs_verbosity;
587extern unsigned int ecryptfs_message_buf_len; 587extern unsigned int ecryptfs_message_buf_len;
588extern signed long ecryptfs_message_wait_timeout; 588extern signed long ecryptfs_message_wait_timeout;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 5c6bab9786e3..05772aeaa8f4 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -545,7 +545,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
545 return rc; 545 return rc;
546} 546}
547 547
548struct address_space_operations ecryptfs_aops = { 548const struct address_space_operations ecryptfs_aops = {
549 .writepage = ecryptfs_writepage, 549 .writepage = ecryptfs_writepage,
550 .readpage = ecryptfs_readpage, 550 .readpage = ecryptfs_readpage,
551 .write_begin = ecryptfs_write_begin, 551 .write_begin = ecryptfs_write_begin,
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 31d12de83a2a..8b47e4200e65 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -68,11 +68,16 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n)
68} 68}
69EXPORT_SYMBOL_GPL(eventfd_signal); 69EXPORT_SYMBOL_GPL(eventfd_signal);
70 70
71static void eventfd_free_ctx(struct eventfd_ctx *ctx)
72{
73 kfree(ctx);
74}
75
71static void eventfd_free(struct kref *kref) 76static void eventfd_free(struct kref *kref)
72{ 77{
73 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); 78 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
74 79
75 kfree(ctx); 80 eventfd_free_ctx(ctx);
76} 81}
77 82
78/** 83/**
@@ -298,9 +303,23 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
298} 303}
299EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); 304EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
300 305
301SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) 306/**
307 * eventfd_file_create - Creates an eventfd file pointer.
308 * @count: Initial eventfd counter value.
309 * @flags: Flags for the eventfd file.
310 *
311 * This function creates an eventfd file pointer, w/out installing it into
312 * the fd table. This is useful when the eventfd file is used during the
313 * initialization of data structures that require extra setup after the eventfd
314 * creation. So the eventfd creation is split into the file pointer creation
315 * phase, and the file descriptor installation phase.
316 * In this way races with userspace closing the newly installed file descriptor
317 * can be avoided.
318 * Returns an eventfd file pointer, or a proper error pointer.
319 */
320struct file *eventfd_file_create(unsigned int count, int flags)
302{ 321{
303 int fd; 322 struct file *file;
304 struct eventfd_ctx *ctx; 323 struct eventfd_ctx *ctx;
305 324
306 /* Check the EFD_* constants for consistency. */ 325 /* Check the EFD_* constants for consistency. */
@@ -308,26 +327,48 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
308 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); 327 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
309 328
310 if (flags & ~EFD_FLAGS_SET) 329 if (flags & ~EFD_FLAGS_SET)
311 return -EINVAL; 330 return ERR_PTR(-EINVAL);
312 331
313 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 332 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
314 if (!ctx) 333 if (!ctx)
315 return -ENOMEM; 334 return ERR_PTR(-ENOMEM);
316 335
317 kref_init(&ctx->kref); 336 kref_init(&ctx->kref);
318 init_waitqueue_head(&ctx->wqh); 337 init_waitqueue_head(&ctx->wqh);
319 ctx->count = count; 338 ctx->count = count;
320 ctx->flags = flags; 339 ctx->flags = flags;
321 340
322 /* 341 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
323 * When we call this, the initialization must be complete, since 342 flags & EFD_SHARED_FCNTL_FLAGS);
324 * anon_inode_getfd() will install the fd. 343 if (IS_ERR(file))
325 */ 344 eventfd_free_ctx(ctx);
326 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 345
327 flags & EFD_SHARED_FCNTL_FLAGS); 346 return file;
328 if (fd < 0) 347}
329 kfree(ctx); 348
349SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
350{
351 int fd, error;
352 struct file *file;
353
354 error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
355 if (error < 0)
356 return error;
357 fd = error;
358
359 file = eventfd_file_create(count, flags);
360 if (IS_ERR(file)) {
361 error = PTR_ERR(file);
362 goto err_put_unused_fd;
363 }
364 fd_install(fd, file);
365
330 return fd; 366 return fd;
367
368err_put_unused_fd:
369 put_unused_fd(fd);
370
371 return error;
331} 372}
332 373
333SYSCALL_DEFINE1(eventfd, unsigned int, count) 374SYSCALL_DEFINE1(eventfd, unsigned int, count)
diff --git a/fs/exec.c b/fs/exec.c
index 172ceb6edde4..5c833c18d0d4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,7 +33,7 @@
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/perf_counter.h> 36#include <linux/perf_event.h>
37#include <linux/highmem.h> 37#include <linux/highmem.h>
38#include <linux/spinlock.h> 38#include <linux/spinlock.h>
39#include <linux/key.h> 39#include <linux/key.h>
@@ -845,6 +845,9 @@ static int de_thread(struct task_struct *tsk)
845 sig->notify_count = 0; 845 sig->notify_count = 0;
846 846
847no_thread_group: 847no_thread_group:
848 if (current->mm)
849 setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
850
848 exit_itimers(sig); 851 exit_itimers(sig);
849 flush_itimer_signals(); 852 flush_itimer_signals();
850 853
@@ -923,7 +926,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
923 task_lock(tsk); 926 task_lock(tsk);
924 strlcpy(tsk->comm, buf, sizeof(tsk->comm)); 927 strlcpy(tsk->comm, buf, sizeof(tsk->comm));
925 task_unlock(tsk); 928 task_unlock(tsk);
926 perf_counter_comm(tsk); 929 perf_event_comm(tsk);
927} 930}
928 931
929int flush_old_exec(struct linux_binprm * bprm) 932int flush_old_exec(struct linux_binprm * bprm)
@@ -997,7 +1000,7 @@ int flush_old_exec(struct linux_binprm * bprm)
997 * security domain: 1000 * security domain:
998 */ 1001 */
999 if (!get_dumpable(current->mm)) 1002 if (!get_dumpable(current->mm))
1000 perf_counter_exit_task(current); 1003 perf_event_exit_task(current);
1001 1004
1002 /* An exec changes our domain. We are no longer part of the thread 1005 /* An exec changes our domain. We are no longer part of the thread
1003 group */ 1006 group */
@@ -1354,6 +1357,8 @@ int do_execve(char * filename,
1354 if (retval < 0) 1357 if (retval < 0)
1355 goto out; 1358 goto out;
1356 1359
1360 current->stack_start = current->mm->start_stack;
1361
1357 /* execve succeeded */ 1362 /* execve succeeded */
1358 current->fs->in_exec = 0; 1363 current->fs->in_exec = 0;
1359 current->in_execve = 0; 1364 current->in_execve = 0;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 23701f289e98..dd7175ce5606 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -70,7 +70,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
70 if (PTR_ERR(inode) == -ESTALE) { 70 if (PTR_ERR(inode) == -ESTALE) {
71 ext2_error(dir->i_sb, __func__, 71 ext2_error(dir->i_sb, __func__,
72 "deleted inode referenced: %lu", 72 "deleted inode referenced: %lu",
73 ino); 73 (unsigned long) ino);
74 return ERR_PTR(-EIO); 74 return ERR_PTR(-EIO);
75 } else { 75 } else {
76 return ERR_CAST(inode); 76 return ERR_CAST(inode);
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index b72b85884223..c18fbf3e4068 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -20,7 +20,7 @@ __inode_direct_access(struct inode *inode, sector_t block,
20 void **kaddr, unsigned long *pfn) 20 void **kaddr, unsigned long *pfn)
21{ 21{
22 struct block_device *bdev = inode->i_sb->s_bdev; 22 struct block_device *bdev = inode->i_sb->s_bdev;
23 struct block_device_operations *ops = bdev->bd_disk->fops; 23 const struct block_device_operations *ops = bdev->bd_disk->fops;
24 sector_t sector; 24 sector_t sector;
25 25
26 sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ 26 sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a8d80a7f1105..72743d360509 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -720,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
720static ssize_t ext3_quota_write(struct super_block *sb, int type, 720static ssize_t ext3_quota_write(struct super_block *sb, int type,
721 const char *data, size_t len, loff_t off); 721 const char *data, size_t len, loff_t off);
722 722
723static struct dquot_operations ext3_quota_operations = { 723static const struct dquot_operations ext3_quota_operations = {
724 .initialize = dquot_initialize, 724 .initialize = dquot_initialize,
725 .drop = dquot_drop, 725 .drop = dquot_drop,
726 .alloc_space = dquot_alloc_space, 726 .alloc_space = dquot_alloc_space,
@@ -737,7 +737,7 @@ static struct dquot_operations ext3_quota_operations = {
737 .destroy_dquot = dquot_destroy, 737 .destroy_dquot = dquot_destroy,
738}; 738};
739 739
740static struct quotactl_ops ext3_qctl_operations = { 740static const struct quotactl_ops ext3_qctl_operations = {
741 .quota_on = ext3_quota_on, 741 .quota_on = ext3_quota_on,
742 .quota_off = vfs_quota_off, 742 .quota_off = vfs_quota_off,
743 .quota_sync = vfs_quota_sync, 743 .quota_sync = vfs_quota_sync,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4abd683b963d..3a798737e305 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2337,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page,
2337 /* 2337 /*
2338 * Rest of the page in the page_vec 2338 * Rest of the page in the page_vec
2339 * redirty then and skip then. We will 2339 * redirty then and skip then. We will
2340 * try to to write them again after 2340 * try to write them again after
2341 * starting a new transaction 2341 * starting a new transaction
2342 */ 2342 */
2343 redirty_page_for_writepage(wbc, page); 2343 redirty_page_for_writepage(wbc, page);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a6b1ab734728..df539ba27779 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
964static ssize_t ext4_quota_write(struct super_block *sb, int type, 964static ssize_t ext4_quota_write(struct super_block *sb, int type,
965 const char *data, size_t len, loff_t off); 965 const char *data, size_t len, loff_t off);
966 966
967static struct dquot_operations ext4_quota_operations = { 967static const struct dquot_operations ext4_quota_operations = {
968 .initialize = dquot_initialize, 968 .initialize = dquot_initialize,
969 .drop = dquot_drop, 969 .drop = dquot_drop,
970 .alloc_space = dquot_alloc_space, 970 .alloc_space = dquot_alloc_space,
@@ -985,7 +985,7 @@ static struct dquot_operations ext4_quota_operations = {
985 .destroy_dquot = dquot_destroy, 985 .destroy_dquot = dquot_destroy,
986}; 986};
987 987
988static struct quotactl_ops ext4_qctl_operations = { 988static const struct quotactl_ops ext4_qctl_operations = {
989 .quota_on = ext4_quota_on, 989 .quota_on = ext4_quota_on,
990 .quota_off = vfs_quota_off, 990 .quota_off = vfs_quota_off,
991 .quota_sync = vfs_quota_sync, 991 .quota_sync = vfs_quota_sync,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 28c590b7c9da..8f1cfb02a6cb 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -179,7 +179,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
179 * always aligned to a 64 bit boundary. 179 * always aligned to a 64 bit boundary.
180 * 180 *
181 * The size of the buffer is in bytes, but is it assumed that it is 181 * The size of the buffer is in bytes, but is it assumed that it is
182 * always ok to to read a complete multiple of 64 bits at the end 182 * always ok to read a complete multiple of 64 bits at the end
183 * of the block in case the end is no aligned to a natural boundary. 183 * of the block in case the end is no aligned to a natural boundary.
184 * 184 *
185 * Return: the block number (bitmap buffer scope) that was found 185 * Return: the block number (bitmap buffer scope) that was found
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a93b885311d8..eba6d552d9c9 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,12 +31,10 @@
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/ima.h> 33#include <linux/ima.h>
34#include <linux/magic.h>
34 35
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36 37
37/* some random number */
38#define HUGETLBFS_MAGIC 0x958458f6
39
40static const struct super_operations hugetlbfs_ops; 38static const struct super_operations hugetlbfs_ops;
41static const struct address_space_operations hugetlbfs_aops; 39static const struct address_space_operations hugetlbfs_aops;
42const struct file_operations hugetlbfs_file_operations; 40const struct file_operations hugetlbfs_file_operations;
@@ -507,6 +505,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
507 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 505 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
508 INIT_LIST_HEAD(&inode->i_mapping->private_list); 506 INIT_LIST_HEAD(&inode->i_mapping->private_list);
509 info = HUGETLBFS_I(inode); 507 info = HUGETLBFS_I(inode);
508 /*
509 * The policy is initialized here even if we are creating a
510 * private inode because initialization simply creates an
511 * an empty rb tree and calls spin_lock_init(), later when we
512 * call mpol_free_shared_policy() it will just return because
513 * the rb tree will still be empty.
514 */
510 mpol_shared_policy_init(&info->policy, NULL); 515 mpol_shared_policy_init(&info->policy, NULL);
511 switch (mode & S_IFMT) { 516 switch (mode & S_IFMT) {
512 default: 517 default:
@@ -931,13 +936,19 @@ static struct file_system_type hugetlbfs_fs_type = {
931 936
932static struct vfsmount *hugetlbfs_vfsmount; 937static struct vfsmount *hugetlbfs_vfsmount;
933 938
934static int can_do_hugetlb_shm(void) 939static int can_do_hugetlb_shm(int creat_flags)
935{ 940{
936 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 941 if (creat_flags != HUGETLB_SHMFS_INODE)
942 return 0;
943 if (capable(CAP_IPC_LOCK))
944 return 1;
945 if (in_group_p(sysctl_hugetlb_shm_group))
946 return 1;
947 return 0;
937} 948}
938 949
939struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, 950struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
940 struct user_struct **user) 951 struct user_struct **user, int creat_flags)
941{ 952{
942 int error = -ENOMEM; 953 int error = -ENOMEM;
943 struct file *file; 954 struct file *file;
@@ -949,7 +960,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
949 if (!hugetlbfs_vfsmount) 960 if (!hugetlbfs_vfsmount)
950 return ERR_PTR(-ENOENT); 961 return ERR_PTR(-ENOENT);
951 962
952 if (!can_do_hugetlb_shm()) { 963 if (!can_do_hugetlb_shm(creat_flags)) {
953 *user = current_user(); 964 *user = current_user();
954 if (user_shm_lock(size, *user)) { 965 if (user_shm_lock(size, *user)) {
955 WARN_ONCE(1, 966 WARN_ONCE(1,
diff --git a/fs/inode.c b/fs/inode.c
index b2ba83d2c4e1..76582b06ab97 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -14,6 +14,7 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/rwsem.h>
17#include <linux/hash.h> 18#include <linux/hash.h>
18#include <linux/swap.h> 19#include <linux/swap.h>
19#include <linux/security.h> 20#include <linux/security.h>
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly;
87DEFINE_SPINLOCK(inode_lock); 88DEFINE_SPINLOCK(inode_lock);
88 89
89/* 90/*
90 * iprune_mutex provides exclusion between the kswapd or try_to_free_pages 91 * iprune_sem provides exclusion between the kswapd or try_to_free_pages
91 * icache shrinking path, and the umount path. Without this exclusion, 92 * icache shrinking path, and the umount path. Without this exclusion,
92 * by the time prune_icache calls iput for the inode whose pages it has 93 * by the time prune_icache calls iput for the inode whose pages it has
93 * been invalidating, or by the time it calls clear_inode & destroy_inode 94 * been invalidating, or by the time it calls clear_inode & destroy_inode
94 * from its final dispose_list, the struct super_block they refer to 95 * from its final dispose_list, the struct super_block they refer to
95 * (for inode->i_sb->s_op) may already have been freed and reused. 96 * (for inode->i_sb->s_op) may already have been freed and reused.
97 *
98 * We make this an rwsem because the fastpath is icache shrinking. In
99 * some cases a filesystem may be doing a significant amount of work in
100 * its inode reclaim code, so this should improve parallelism.
96 */ 101 */
97static DEFINE_MUTEX(iprune_mutex); 102static DECLARE_RWSEM(iprune_sem);
98 103
99/* 104/*
100 * Statistics gathering.. 105 * Statistics gathering..
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode)
123int inode_init_always(struct super_block *sb, struct inode *inode) 128int inode_init_always(struct super_block *sb, struct inode *inode)
124{ 129{
125 static const struct address_space_operations empty_aops; 130 static const struct address_space_operations empty_aops;
126 static struct inode_operations empty_iops; 131 static const struct inode_operations empty_iops;
127 static const struct file_operations empty_fops; 132 static const struct file_operations empty_fops;
128 struct address_space *const mapping = &inode->i_data; 133 struct address_space *const mapping = &inode->i_data;
129 134
@@ -381,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
381 /* 386 /*
382 * We can reschedule here without worrying about the list's 387 * We can reschedule here without worrying about the list's
383 * consistency because the per-sb list of inodes must not 388 * consistency because the per-sb list of inodes must not
384 * change during umount anymore, and because iprune_mutex keeps 389 * change during umount anymore, and because iprune_sem keeps
385 * shrink_icache_memory() away. 390 * shrink_icache_memory() away.
386 */ 391 */
387 cond_resched_lock(&inode_lock); 392 cond_resched_lock(&inode_lock);
@@ -420,7 +425,7 @@ int invalidate_inodes(struct super_block *sb)
420 int busy; 425 int busy;
421 LIST_HEAD(throw_away); 426 LIST_HEAD(throw_away);
422 427
423 mutex_lock(&iprune_mutex); 428 down_write(&iprune_sem);
424 spin_lock(&inode_lock); 429 spin_lock(&inode_lock);
425 inotify_unmount_inodes(&sb->s_inodes); 430 inotify_unmount_inodes(&sb->s_inodes);
426 fsnotify_unmount_inodes(&sb->s_inodes); 431 fsnotify_unmount_inodes(&sb->s_inodes);
@@ -428,7 +433,7 @@ int invalidate_inodes(struct super_block *sb)
428 spin_unlock(&inode_lock); 433 spin_unlock(&inode_lock);
429 434
430 dispose_list(&throw_away); 435 dispose_list(&throw_away);
431 mutex_unlock(&iprune_mutex); 436 up_write(&iprune_sem);
432 437
433 return busy; 438 return busy;
434} 439}
@@ -467,7 +472,7 @@ static void prune_icache(int nr_to_scan)
467 int nr_scanned; 472 int nr_scanned;
468 unsigned long reap = 0; 473 unsigned long reap = 0;
469 474
470 mutex_lock(&iprune_mutex); 475 down_read(&iprune_sem);
471 spin_lock(&inode_lock); 476 spin_lock(&inode_lock);
472 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 477 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
473 struct inode *inode; 478 struct inode *inode;
@@ -509,7 +514,7 @@ static void prune_icache(int nr_to_scan)
509 spin_unlock(&inode_lock); 514 spin_unlock(&inode_lock);
510 515
511 dispose_list(&freeable); 516 dispose_list(&freeable);
512 mutex_unlock(&iprune_mutex); 517 up_read(&iprune_sem);
513} 518}
514 519
515/* 520/*
@@ -695,13 +700,15 @@ void unlock_new_inode(struct inode *inode)
695 } 700 }
696#endif 701#endif
697 /* 702 /*
698 * This is special! We do not need the spinlock 703 * This is special! We do not need the spinlock when clearing I_LOCK,
699 * when clearing I_LOCK, because we're guaranteed 704 * because we're guaranteed that nobody else tries to do anything about
700 * that nobody else tries to do anything about the 705 * the state of the inode when it is locked, as we just created it (so
701 * state of the inode when it is locked, as we 706 * there can be no old holders that haven't tested I_LOCK).
702 * just created it (so there can be no old holders 707 * However we must emit the memory barrier so that other CPUs reliably
703 * that haven't tested I_LOCK). 708 * see the clearing of I_LOCK after the other inode initialisation has
709 * completed.
704 */ 710 */
711 smp_mb();
705 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 712 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
706 inode->i_state &= ~(I_LOCK|I_NEW); 713 inode->i_state &= ~(I_LOCK|I_NEW);
707 wake_up_inode(inode); 714 wake_up_inode(inode);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a8a358bc0f21..53b86e16e5fe 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -768,7 +768,7 @@ static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
768{ 768{
769} 769}
770 770
771static struct seq_operations jbd2_seq_history_ops = { 771static const struct seq_operations jbd2_seq_history_ops = {
772 .start = jbd2_seq_history_start, 772 .start = jbd2_seq_history_start,
773 .next = jbd2_seq_history_next, 773 .next = jbd2_seq_history_next,
774 .stop = jbd2_seq_history_stop, 774 .stop = jbd2_seq_history_stop,
@@ -872,7 +872,7 @@ static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
872{ 872{
873} 873}
874 874
875static struct seq_operations jbd2_seq_info_ops = { 875static const struct seq_operations jbd2_seq_info_ops = {
876 .start = jbd2_seq_info_start, 876 .start = jbd2_seq_info_start,
877 .next = jbd2_seq_info_next, 877 .next = jbd2_seq_info_next,
878 .stop = jbd2_seq_info_stop, 878 .stop = jbd2_seq_info_stop,
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 0035c021395a..9a80e8e595d0 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -123,7 +123,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child)
123 return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino)); 123 return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino));
124} 124}
125 125
126static struct export_operations jffs2_export_ops = { 126static const struct export_operations jffs2_export_ops = {
127 .get_parent = jffs2_get_parent, 127 .get_parent = jffs2_get_parent,
128 .fh_to_dentry = jffs2_fh_to_dentry, 128 .fh_to_dentry = jffs2_fh_to_dentry,
129 .fh_to_parent = jffs2_fh_to_parent, 129 .fh_to_parent = jffs2_fh_to_parent,
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d351..fc9032dc8862 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
166 */ 166 */
167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) 167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
168 continue; 168 continue;
169 if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) 169 if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
170 continue; 170 continue;
171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
172 continue; 172 continue;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 4336adba952a..c81249fef11f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -458,7 +458,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
458 nlm_put_lockowner(fl->fl_u.nfs_fl.owner); 458 nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
459} 459}
460 460
461static struct file_lock_operations nlmclnt_lock_ops = { 461static const struct file_lock_operations nlmclnt_lock_ops = {
462 .fl_copy_lock = nlmclnt_locks_copy_lock, 462 .fl_copy_lock = nlmclnt_locks_copy_lock,
463 .fl_release_private = nlmclnt_locks_release_private, 463 .fl_release_private = nlmclnt_locks_release_private,
464}; 464};
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7cb076ac6b45..4600c2037b8b 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
111 */ 111 */
112 chain = &nlm_hosts[nlm_hash_address(ni->sap)]; 112 chain = &nlm_hosts[nlm_hash_address(ni->sap)];
113 hlist_for_each_entry(host, pos, chain, h_hash) { 113 hlist_for_each_entry(host, pos, chain, h_hash) {
114 if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) 114 if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
115 continue; 115 continue;
116 116
117 /* See if we have an NSM handle for this client */ 117 /* See if we have an NSM handle for this client */
@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
125 if (host->h_server != ni->server) 125 if (host->h_server != ni->server)
126 continue; 126 continue;
127 if (ni->server && 127 if (ni->server &&
128 !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) 128 !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
129 continue; 129 continue;
130 130
131 /* Move to head of hash chain. */ 131 /* Move to head of hash chain. */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 30c933188dd7..f956651d0f65 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
209 struct nsm_handle *nsm; 209 struct nsm_handle *nsm;
210 210
211 list_for_each_entry(nsm, &nsm_handles, sm_link) 211 list_for_each_entry(nsm, &nsm_handles, sm_link)
212 if (nlm_cmp_addr(nsm_addr(nsm), sap)) 212 if (rpc_cmp_addr(nsm_addr(nsm), sap))
213 return nsm; 213 return nsm;
214 return NULL; 214 return NULL;
215} 215}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e577a78d7bac..d1001790fa9a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -705,7 +705,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
705 return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; 705 return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
706} 706}
707 707
708struct lock_manager_operations nlmsvc_lock_operations = { 708const struct lock_manager_operations nlmsvc_lock_operations = {
709 .fl_compare_owner = nlmsvc_same_owner, 709 .fl_compare_owner = nlmsvc_same_owner,
710 .fl_notify = nlmsvc_notify_blocked, 710 .fl_notify = nlmsvc_notify_blocked,
711 .fl_grant = nlmsvc_grant_deferred, 711 .fl_grant = nlmsvc_grant_deferred,
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 9e4d6aab611b..ad478da7ca63 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
417static int 417static int
418nlmsvc_match_ip(void *datap, struct nlm_host *host) 418nlmsvc_match_ip(void *datap, struct nlm_host *host)
419{ 419{
420 return nlm_cmp_addr(nlm_srcaddr(host), datap); 420 return rpc_cmp_addr(nlm_srcaddr(host), datap);
421} 421}
422 422
423/** 423/**
diff --git a/fs/locks.c b/fs/locks.c
index 19ee18a6829b..a8794f233bc9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -434,7 +434,7 @@ static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
434 return fl->fl_file == try->fl_file; 434 return fl->fl_file == try->fl_file;
435} 435}
436 436
437static struct lock_manager_operations lease_manager_ops = { 437static const struct lock_manager_operations lease_manager_ops = {
438 .fl_break = lease_break_callback, 438 .fl_break = lease_break_callback,
439 .fl_release_private = lease_release_private_callback, 439 .fl_release_private = lease_release_private_callback,
440 .fl_mylease = lease_mylease_callback, 440 .fl_mylease = lease_mylease_callback,
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d407e7a0b6fe..6198731d7fcd 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -308,14 +308,18 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
308 struct inode *inode = (struct inode*)mapping->host; 308 struct inode *inode = (struct inode*)mapping->host;
309 char *kaddr = page_address(page); 309 char *kaddr = page_address(page);
310 loff_t pos = page_offset(page) + (char*)de - kaddr; 310 loff_t pos = page_offset(page) + (char*)de - kaddr;
311 unsigned len = minix_sb(inode->i_sb)->s_dirsize; 311 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
312 unsigned len = sbi->s_dirsize;
312 int err; 313 int err;
313 314
314 lock_page(page); 315 lock_page(page);
315 err = __minix_write_begin(NULL, mapping, pos, len, 316 err = __minix_write_begin(NULL, mapping, pos, len,
316 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 317 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
317 if (err == 0) { 318 if (err == 0) {
318 de->inode = 0; 319 if (sbi->s_version == MINIX_V3)
320 ((minix3_dirent *) de)->inode = 0;
321 else
322 de->inode = 0;
319 err = dir_commit_chunk(page, pos, len); 323 err = dir_commit_chunk(page, pos, len);
320 } else { 324 } else {
321 unlock_page(page); 325 unlock_page(page);
@@ -440,7 +444,10 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page,
440 err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, 444 err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize,
441 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 445 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
442 if (err == 0) { 446 if (err == 0) {
443 de->inode = inode->i_ino; 447 if (sbi->s_version == MINIX_V3)
448 ((minix3_dirent *) de)->inode = inode->i_ino;
449 else
450 de->inode = inode->i_ino;
444 err = dir_commit_chunk(page, pos, sbi->s_dirsize); 451 err = dir_commit_chunk(page, pos, sbi->s_dirsize);
445 } else { 452 } else {
446 unlock_page(page); 453 unlock_page(page);
@@ -470,7 +477,14 @@ ino_t minix_inode_by_name(struct dentry *dentry)
470 ino_t res = 0; 477 ino_t res = 0;
471 478
472 if (de) { 479 if (de) {
473 res = de->inode; 480 struct address_space *mapping = page->mapping;
481 struct inode *inode = mapping->host;
482 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
483
484 if (sbi->s_version == MINIX_V3)
485 res = ((minix3_dirent *) de)->inode;
486 else
487 res = de->inode;
474 dir_put_page(page); 488 dir_put_page(page);
475 } 489 }
476 return res; 490 return res;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9c590722d87e..b8b5b30d53f0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -1241,7 +1241,7 @@ ncp_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
1241 month = 2; 1241 month = 2;
1242 } else { 1242 } else {
1243 nl_day = (year & 3) || day <= 59 ? day : day - 1; 1243 nl_day = (year & 3) || day <= 59 ? day : day - 1;
1244 for (month = 0; month < 12; month++) 1244 for (month = 1; month < 12; month++)
1245 if (day_n[month] > nl_day) 1245 if (day_n[month] > nl_day)
1246 break; 1246 break;
1247 } 1247 }
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index fa038df63ac8..53a7ed7eb9c6 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -442,7 +442,7 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
442 if (dentry) { 442 if (dentry) {
443 struct inode* s_inode = dentry->d_inode; 443 struct inode* s_inode = dentry->d_inode;
444 444
445 if (inode) { 445 if (s_inode) {
446 NCP_FINFO(s_inode)->volNumber = vnum; 446 NCP_FINFO(s_inode)->volNumber = vnum;
447 NCP_FINFO(s_inode)->dirEntNum = de; 447 NCP_FINFO(s_inode)->dirEntNum = de;
448 NCP_FINFO(s_inode)->DosDirNum = dosde; 448 NCP_FINFO(s_inode)->DosDirNum = dosde;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e5a2dac5f715..76b0aa0f73bf 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -222,7 +222,7 @@ static unsigned decode_sessionid(struct xdr_stream *xdr,
222 222
223 p = read_buf(xdr, len); 223 p = read_buf(xdr, len);
224 if (unlikely(p == NULL)) 224 if (unlikely(p == NULL))
225 return htonl(NFS4ERR_RESOURCE);; 225 return htonl(NFS4ERR_RESOURCE);
226 226
227 memcpy(sid->data, p, len); 227 memcpy(sid->data, p, len);
228 return 0; 228 return 0;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e350bd6a2334..152025358dad 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -933,10 +933,6 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
933 goto out_error; 933 goto out_error;
934 934
935 nfs_server_set_fsinfo(server, &fsinfo); 935 nfs_server_set_fsinfo(server, &fsinfo);
936 error = bdi_init(&server->backing_dev_info);
937 if (error)
938 goto out_error;
939
940 936
941 /* Get some general file system info */ 937 /* Get some general file system info */
942 if (server->namelen == 0) { 938 if (server->namelen == 0) {
@@ -995,6 +991,12 @@ static struct nfs_server *nfs_alloc_server(void)
995 return NULL; 991 return NULL;
996 } 992 }
997 993
994 if (bdi_init(&server->backing_dev_info)) {
995 nfs_free_iostats(server->io_stats);
996 kfree(server);
997 return NULL;
998 }
999
998 return server; 1000 return server;
999} 1001}
1000 1002
@@ -1529,7 +1531,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
1529static void nfs_server_list_stop(struct seq_file *p, void *v); 1531static void nfs_server_list_stop(struct seq_file *p, void *v);
1530static int nfs_server_list_show(struct seq_file *m, void *v); 1532static int nfs_server_list_show(struct seq_file *m, void *v);
1531 1533
1532static struct seq_operations nfs_server_list_ops = { 1534static const struct seq_operations nfs_server_list_ops = {
1533 .start = nfs_server_list_start, 1535 .start = nfs_server_list_start,
1534 .next = nfs_server_list_next, 1536 .next = nfs_server_list_next,
1535 .stop = nfs_server_list_stop, 1537 .stop = nfs_server_list_stop,
@@ -1550,7 +1552,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
1550static void nfs_volume_list_stop(struct seq_file *p, void *v); 1552static void nfs_volume_list_stop(struct seq_file *p, void *v);
1551static int nfs_volume_list_show(struct seq_file *m, void *v); 1553static int nfs_volume_list_show(struct seq_file *m, void *v);
1552 1554
1553static struct seq_operations nfs_volume_list_ops = { 1555static const struct seq_operations nfs_volume_list_ops = {
1554 .start = nfs_volume_list_start, 1556 .start = nfs_volume_list_start,
1555 .next = nfs_volume_list_next, 1557 .next = nfs_volume_list_next,
1556 .stop = nfs_volume_list_stop, 1558 .stop = nfs_volume_list_stop,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1434080aefeb..2ef4fecf3984 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -638,7 +638,7 @@ static void nfs4_fl_release_lock(struct file_lock *fl)
638 nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); 638 nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
639} 639}
640 640
641static struct file_lock_operations nfs4_fl_lock_ops = { 641static const struct file_lock_operations nfs4_fl_lock_ops = {
642 .fl_copy_lock = nfs4_fl_copy_lock, 642 .fl_copy_lock = nfs4_fl_copy_lock,
643 .fl_release_private = nfs4_fl_release_lock, 643 .fl_release_private = nfs4_fl_release_lock,
644}; 644};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index de935692d40d..f1cc0587cfef 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2190,8 +2190,8 @@ static void nfs_kill_super(struct super_block *s)
2190{ 2190{
2191 struct nfs_server *server = NFS_SB(s); 2191 struct nfs_server *server = NFS_SB(s);
2192 2192
2193 bdi_unregister(&server->backing_dev_info);
2194 kill_anon_super(s); 2193 kill_anon_super(s);
2194 bdi_unregister(&server->backing_dev_info);
2195 nfs_fscache_release_super_cookie(s); 2195 nfs_fscache_release_super_cookie(s);
2196 nfs_free_server(server); 2196 nfs_free_server(server);
2197} 2197}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index d9462643155c..c1c9e035d4a4 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1341,6 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1341 if (rv) 1341 if (rv)
1342 goto out; 1342 goto out;
1343 rv = check_nfsd_access(exp, rqstp); 1343 rv = check_nfsd_access(exp, rqstp);
1344 if (rv)
1345 fh_put(fhp);
1344out: 1346out:
1345 exp_put(exp); 1347 exp_put(exp);
1346 return rv; 1348 return rv;
@@ -1515,7 +1517,7 @@ static int e_show(struct seq_file *m, void *p)
1515 return svc_export_show(m, &svc_export_cache, cp); 1517 return svc_export_show(m, &svc_export_cache, cp);
1516} 1518}
1517 1519
1518struct seq_operations nfs_exports_op = { 1520const struct seq_operations nfs_exports_op = {
1519 .start = e_start, 1521 .start = e_start,
1520 .next = e_next, 1522 .next = e_next,
1521 .stop = e_stop, 1523 .stop = e_stop,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 01d4ec1c88e0..edf926e1062f 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -814,17 +814,6 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
814 return p; 814 return p;
815} 815}
816 816
817static __be32 *
818encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
819 struct svc_fh *fhp)
820{
821 p = encode_post_op_attr(cd->rqstp, p, fhp);
822 *p++ = xdr_one; /* yes, a file handle follows */
823 p = encode_fh(p, fhp);
824 fh_put(fhp);
825 return p;
826}
827
828static int 817static int
829compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, 818compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
830 const char *name, int namlen) 819 const char *name, int namlen)
@@ -836,29 +825,54 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
836 dparent = cd->fh.fh_dentry; 825 dparent = cd->fh.fh_dentry;
837 exp = cd->fh.fh_export; 826 exp = cd->fh.fh_export;
838 827
839 fh_init(fhp, NFS3_FHSIZE);
840 if (isdotent(name, namlen)) { 828 if (isdotent(name, namlen)) {
841 if (namlen == 2) { 829 if (namlen == 2) {
842 dchild = dget_parent(dparent); 830 dchild = dget_parent(dparent);
843 if (dchild == dparent) { 831 if (dchild == dparent) {
844 /* filesystem root - cannot return filehandle for ".." */ 832 /* filesystem root - cannot return filehandle for ".." */
845 dput(dchild); 833 dput(dchild);
846 return 1; 834 return -ENOENT;
847 } 835 }
848 } else 836 } else
849 dchild = dget(dparent); 837 dchild = dget(dparent);
850 } else 838 } else
851 dchild = lookup_one_len(name, dparent, namlen); 839 dchild = lookup_one_len(name, dparent, namlen);
852 if (IS_ERR(dchild)) 840 if (IS_ERR(dchild))
853 return 1; 841 return -ENOENT;
854 if (d_mountpoint(dchild) || 842 rv = -ENOENT;
855 fh_compose(fhp, exp, dchild, &cd->fh) != 0 || 843 if (d_mountpoint(dchild))
856 !dchild->d_inode) 844 goto out;
857 rv = 1; 845 rv = fh_compose(fhp, exp, dchild, &cd->fh);
846 if (rv)
847 goto out;
848 if (!dchild->d_inode)
849 goto out;
850 rv = 0;
851out:
858 dput(dchild); 852 dput(dchild);
859 return rv; 853 return rv;
860} 854}
861 855
856__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
857{
858 struct svc_fh fh;
859 int err;
860
861 fh_init(&fh, NFS3_FHSIZE);
862 err = compose_entry_fh(cd, &fh, name, namlen);
863 if (err) {
864 *p++ = 0;
865 *p++ = 0;
866 goto out;
867 }
868 p = encode_post_op_attr(cd->rqstp, p, &fh);
869 *p++ = xdr_one; /* yes, a file handle follows */
870 p = encode_fh(p, &fh);
871out:
872 fh_put(&fh);
873 return p;
874}
875
862/* 876/*
863 * Encode a directory entry. This one works for both normal readdir 877 * Encode a directory entry. This one works for both normal readdir
864 * and readdirplus. 878 * and readdirplus.
@@ -929,16 +943,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
929 943
930 p = encode_entry_baggage(cd, p, name, namlen, ino); 944 p = encode_entry_baggage(cd, p, name, namlen, ino);
931 945
932 /* throw in readdirplus baggage */ 946 if (plus)
933 if (plus) { 947 p = encode_entryplus_baggage(cd, p, name, namlen);
934 struct svc_fh fh;
935
936 if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
937 *p++ = 0;
938 *p++ = 0;
939 } else
940 p = encode_entryplus_baggage(cd, p, &fh);
941 }
942 num_entry_words = p - cd->buffer; 948 num_entry_words = p - cd->buffer;
943 } else if (cd->rqstp->rq_respages[pn+1] != NULL) { 949 } else if (cd->rqstp->rq_respages[pn+1] != NULL) {
944 /* temporarily encode entry into next page, then move back to 950 /* temporarily encode entry into next page, then move back to
@@ -951,17 +957,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
951 957
952 p1 = encode_entry_baggage(cd, p1, name, namlen, ino); 958 p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
953 959
954 /* throw in readdirplus baggage */ 960 if (plus)
955 if (plus) { 961 p = encode_entryplus_baggage(cd, p1, name, namlen);
956 struct svc_fh fh;
957
958 if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
959 /* zero out the filehandle */
960 *p1++ = 0;
961 *p1++ = 0;
962 } else
963 p1 = encode_entryplus_baggage(cd, p1, &fh);
964 }
965 962
966 /* determine entry word length and lengths to go in pages */ 963 /* determine entry word length and lengths to go in pages */
967 num_entry_words = p1 - tmp; 964 num_entry_words = p1 - tmp;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 54b8b4140c8f..725d02f210e2 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -321,7 +321,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
321 deny = ~pas.group & pas.other; 321 deny = ~pas.group & pas.other;
322 if (deny) { 322 if (deny) {
323 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 323 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
324 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 324 ace->flag = eflag;
325 ace->access_mask = deny_mask_from_posix(deny, flags); 325 ace->access_mask = deny_mask_from_posix(deny, flags);
326 ace->whotype = NFS4_ACL_WHO_GROUP; 326 ace->whotype = NFS4_ACL_WHO_GROUP;
327 ace++; 327 ace++;
@@ -335,7 +335,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
335 if (deny) { 335 if (deny) {
336 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 336 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
337 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 337 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
338 ace->access_mask = mask_from_posix(deny, flags); 338 ace->access_mask = deny_mask_from_posix(deny, flags);
339 ace->whotype = NFS4_ACL_WHO_NAMED; 339 ace->whotype = NFS4_ACL_WHO_NAMED;
340 ace->who = pa->e_id; 340 ace->who = pa->e_id;
341 ace++; 341 ace++;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3fd23f7aceca..24e8d78f8dde 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -43,25 +43,30 @@
43#include <linux/sunrpc/xdr.h> 43#include <linux/sunrpc/xdr.h>
44#include <linux/sunrpc/svc.h> 44#include <linux/sunrpc/svc.h>
45#include <linux/sunrpc/clnt.h> 45#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/svcsock.h>
46#include <linux/nfsd/nfsd.h> 47#include <linux/nfsd/nfsd.h>
47#include <linux/nfsd/state.h> 48#include <linux/nfsd/state.h>
48#include <linux/sunrpc/sched.h> 49#include <linux/sunrpc/sched.h>
49#include <linux/nfs4.h> 50#include <linux/nfs4.h>
51#include <linux/sunrpc/xprtsock.h>
50 52
51#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
52 54
53#define NFSPROC4_CB_NULL 0 55#define NFSPROC4_CB_NULL 0
54#define NFSPROC4_CB_COMPOUND 1 56#define NFSPROC4_CB_COMPOUND 1
57#define NFS4_STATEID_SIZE 16
55 58
56/* Index of predefined Linux callback client operations */ 59/* Index of predefined Linux callback client operations */
57 60
58enum { 61enum {
59 NFSPROC4_CLNT_CB_NULL = 0, 62 NFSPROC4_CLNT_CB_NULL = 0,
60 NFSPROC4_CLNT_CB_RECALL, 63 NFSPROC4_CLNT_CB_RECALL,
64 NFSPROC4_CLNT_CB_SEQUENCE,
61}; 65};
62 66
63enum nfs_cb_opnum4 { 67enum nfs_cb_opnum4 {
64 OP_CB_RECALL = 4, 68 OP_CB_RECALL = 4,
69 OP_CB_SEQUENCE = 11,
65}; 70};
66 71
67#define NFS4_MAXTAGLEN 20 72#define NFS4_MAXTAGLEN 20
@@ -70,17 +75,29 @@ enum nfs_cb_opnum4 {
70#define NFS4_dec_cb_null_sz 0 75#define NFS4_dec_cb_null_sz 0
71#define cb_compound_enc_hdr_sz 4 76#define cb_compound_enc_hdr_sz 4
72#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) 77#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
78#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
79#define cb_sequence_enc_sz (sessionid_sz + 4 + \
80 1 /* no referring calls list yet */)
81#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
82
73#define op_enc_sz 1 83#define op_enc_sz 1
74#define op_dec_sz 2 84#define op_dec_sz 2
75#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) 85#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
76#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) 86#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2)
77#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ 87#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
88 cb_sequence_enc_sz + \
78 1 + enc_stateid_sz + \ 89 1 + enc_stateid_sz + \
79 enc_nfs4_fh_sz) 90 enc_nfs4_fh_sz)
80 91
81#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ 92#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
93 cb_sequence_dec_sz + \
82 op_dec_sz) 94 op_dec_sz)
83 95
96struct nfs4_rpc_args {
97 void *args_op;
98 struct nfsd4_cb_sequence args_seq;
99};
100
84/* 101/*
85* Generic encode routines from fs/nfs/nfs4xdr.c 102* Generic encode routines from fs/nfs/nfs4xdr.c
86*/ 103*/
@@ -137,11 +154,13 @@ xdr_error: \
137} while (0) 154} while (0)
138 155
139struct nfs4_cb_compound_hdr { 156struct nfs4_cb_compound_hdr {
140 int status; 157 /* args */
141 u32 ident; 158 u32 ident; /* minorversion 0 only */
142 u32 nops; 159 u32 nops;
143 __be32 *nops_p; 160 __be32 *nops_p;
144 u32 minorversion; 161 u32 minorversion;
162 /* res */
163 int status;
145 u32 taglen; 164 u32 taglen;
146 char *tag; 165 char *tag;
147}; 166};
@@ -238,6 +257,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
238 hdr->nops++; 257 hdr->nops++;
239} 258}
240 259
260static void
261encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
262 struct nfs4_cb_compound_hdr *hdr)
263{
264 __be32 *p;
265
266 if (hdr->minorversion == 0)
267 return;
268
269 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
270
271 WRITE32(OP_CB_SEQUENCE);
272 WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN);
273 WRITE32(args->cbs_clp->cl_cb_seq_nr);
274 WRITE32(0); /* slotid, always 0 */
275 WRITE32(0); /* highest slotid always 0 */
276 WRITE32(0); /* cachethis always 0 */
277 WRITE32(0); /* FIXME: support referring_call_lists */
278 hdr->nops++;
279}
280
241static int 281static int
242nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) 282nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
243{ 283{
@@ -249,15 +289,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
249} 289}
250 290
251static int 291static int
252nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) 292nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
293 struct nfs4_rpc_args *rpc_args)
253{ 294{
254 struct xdr_stream xdr; 295 struct xdr_stream xdr;
296 struct nfs4_delegation *args = rpc_args->args_op;
255 struct nfs4_cb_compound_hdr hdr = { 297 struct nfs4_cb_compound_hdr hdr = {
256 .ident = args->dl_ident, 298 .ident = args->dl_ident,
299 .minorversion = rpc_args->args_seq.cbs_minorversion,
257 }; 300 };
258 301
259 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 302 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
260 encode_cb_compound_hdr(&xdr, &hdr); 303 encode_cb_compound_hdr(&xdr, &hdr);
304 encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
261 encode_cb_recall(&xdr, args, &hdr); 305 encode_cb_recall(&xdr, args, &hdr);
262 encode_cb_nops(&hdr); 306 encode_cb_nops(&hdr);
263 return 0; 307 return 0;
@@ -299,6 +343,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
299 return 0; 343 return 0;
300} 344}
301 345
346/*
347 * Our current back channel implmentation supports a single backchannel
348 * with a single slot.
349 */
350static int
351decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
352 struct rpc_rqst *rqstp)
353{
354 struct nfs4_sessionid id;
355 int status;
356 u32 dummy;
357 __be32 *p;
358
359 if (res->cbs_minorversion == 0)
360 return 0;
361
362 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
363 if (status)
364 return status;
365
366 /*
367 * If the server returns different values for sessionID, slotID or
368 * sequence number, the server is looney tunes.
369 */
370 status = -ESERVERFAULT;
371
372 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
373 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
374 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
375 if (memcmp(id.data, res->cbs_clp->cl_sessionid.data,
376 NFS4_MAX_SESSIONID_LEN)) {
377 dprintk("%s Invalid session id\n", __func__);
378 goto out;
379 }
380 READ32(dummy);
381 if (dummy != res->cbs_clp->cl_cb_seq_nr) {
382 dprintk("%s Invalid sequence number\n", __func__);
383 goto out;
384 }
385 READ32(dummy); /* slotid must be 0 */
386 if (dummy != 0) {
387 dprintk("%s Invalid slotid\n", __func__);
388 goto out;
389 }
390 /* FIXME: process highest slotid and target highest slotid */
391 status = 0;
392out:
393 return status;
394}
395
396
302static int 397static int
303nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) 398nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
304{ 399{
@@ -306,7 +401,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
306} 401}
307 402
308static int 403static int
309nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) 404nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
405 struct nfsd4_cb_sequence *seq)
310{ 406{
311 struct xdr_stream xdr; 407 struct xdr_stream xdr;
312 struct nfs4_cb_compound_hdr hdr; 408 struct nfs4_cb_compound_hdr hdr;
@@ -316,6 +412,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
316 status = decode_cb_compound_hdr(&xdr, &hdr); 412 status = decode_cb_compound_hdr(&xdr, &hdr);
317 if (status) 413 if (status)
318 goto out; 414 goto out;
415 if (seq) {
416 status = decode_cb_sequence(&xdr, seq, rqstp);
417 if (status)
418 goto out;
419 }
319 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); 420 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
320out: 421out:
321 return status; 422 return status;
@@ -377,16 +478,15 @@ static int max_cb_time(void)
377 478
378int setup_callback_client(struct nfs4_client *clp) 479int setup_callback_client(struct nfs4_client *clp)
379{ 480{
380 struct sockaddr_in addr;
381 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 481 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
382 struct rpc_timeout timeparms = { 482 struct rpc_timeout timeparms = {
383 .to_initval = max_cb_time(), 483 .to_initval = max_cb_time(),
384 .to_retries = 0, 484 .to_retries = 0,
385 }; 485 };
386 struct rpc_create_args args = { 486 struct rpc_create_args args = {
387 .protocol = IPPROTO_TCP, 487 .protocol = XPRT_TRANSPORT_TCP,
388 .address = (struct sockaddr *)&addr, 488 .address = (struct sockaddr *) &cb->cb_addr,
389 .addrsize = sizeof(addr), 489 .addrsize = cb->cb_addrlen,
390 .timeout = &timeparms, 490 .timeout = &timeparms,
391 .program = &cb_program, 491 .program = &cb_program,
392 .prognumber = cb->cb_prog, 492 .prognumber = cb->cb_prog,
@@ -399,13 +499,10 @@ int setup_callback_client(struct nfs4_client *clp)
399 499
400 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 500 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
401 return -EINVAL; 501 return -EINVAL;
402 502 if (cb->cb_minorversion) {
403 /* Initialize address */ 503 args.bc_xprt = clp->cl_cb_xprt;
404 memset(&addr, 0, sizeof(addr)); 504 args.protocol = XPRT_TRANSPORT_BC_TCP;
405 addr.sin_family = AF_INET; 505 }
406 addr.sin_port = htons(cb->cb_port);
407 addr.sin_addr.s_addr = htonl(cb->cb_addr);
408
409 /* Create RPC client */ 506 /* Create RPC client */
410 client = rpc_create(&args); 507 client = rpc_create(&args);
411 if (IS_ERR(client)) { 508 if (IS_ERR(client)) {
@@ -439,42 +536,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
439 .rpc_call_done = nfsd4_cb_probe_done, 536 .rpc_call_done = nfsd4_cb_probe_done,
440}; 537};
441 538
442static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) 539static struct rpc_cred *callback_cred;
443{
444 struct auth_cred acred = {
445 .machine_cred = 1
446 };
447 540
448 /* 541int set_callback_cred(void)
449 * Note in the gss case this doesn't actually have to wait for a 542{
450 * gss upcall (or any calls to the client); this just creates a 543 callback_cred = rpc_lookup_machine_cred();
451 * non-uptodate cred which the rpc state machine will fill in with 544 if (!callback_cred)
452 * a refresh_upcall later. 545 return -ENOMEM;
453 */ 546 return 0;
454 return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
455 RPCAUTH_LOOKUP_NEW);
456} 547}
457 548
549
458void do_probe_callback(struct nfs4_client *clp) 550void do_probe_callback(struct nfs4_client *clp)
459{ 551{
460 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 552 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
461 struct rpc_message msg = { 553 struct rpc_message msg = {
462 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 554 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
463 .rpc_argp = clp, 555 .rpc_argp = clp,
556 .rpc_cred = callback_cred
464 }; 557 };
465 struct rpc_cred *cred;
466 int status; 558 int status;
467 559
468 cred = lookup_cb_cred(cb);
469 if (IS_ERR(cred)) {
470 status = PTR_ERR(cred);
471 goto out;
472 }
473 cb->cb_cred = cred;
474 msg.rpc_cred = cb->cb_cred;
475 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, 560 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
476 &nfsd4_cb_probe_ops, (void *)clp); 561 &nfsd4_cb_probe_ops, (void *)clp);
477out:
478 if (status) { 562 if (status) {
479 warn_no_callback_path(clp, status); 563 warn_no_callback_path(clp, status);
480 put_nfs4_client(clp); 564 put_nfs4_client(clp);
@@ -503,11 +587,95 @@ nfsd4_probe_callback(struct nfs4_client *clp)
503 do_probe_callback(clp); 587 do_probe_callback(clp);
504} 588}
505 589
590/*
591 * There's currently a single callback channel slot.
592 * If the slot is available, then mark it busy. Otherwise, set the
593 * thread for sleeping on the callback RPC wait queue.
594 */
595static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
596 struct rpc_task *task)
597{
598 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
599 u32 *ptr = (u32 *)clp->cl_sessionid.data;
600 int status = 0;
601
602 dprintk("%s: %u:%u:%u:%u\n", __func__,
603 ptr[0], ptr[1], ptr[2], ptr[3]);
604
605 if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
606 rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
607 dprintk("%s slot is busy\n", __func__);
608 status = -EAGAIN;
609 goto out;
610 }
611
612 /*
613 * We'll need the clp during XDR encoding and decoding,
614 * and the sequence during decoding to verify the reply
615 */
616 args->args_seq.cbs_clp = clp;
617 task->tk_msg.rpc_resp = &args->args_seq;
618
619out:
620 dprintk("%s status=%d\n", __func__, status);
621 return status;
622}
623
624/*
625 * TODO: cb_sequence should support referring call lists, cachethis, multiple
626 * slots, and mark callback channel down on communication errors.
627 */
628static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
629{
630 struct nfs4_delegation *dp = calldata;
631 struct nfs4_client *clp = dp->dl_client;
632 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
633 u32 minorversion = clp->cl_cb_conn.cb_minorversion;
634 int status = 0;
635
636 args->args_seq.cbs_minorversion = minorversion;
637 if (minorversion) {
638 status = nfsd41_cb_setup_sequence(clp, task);
639 if (status) {
640 if (status != -EAGAIN) {
641 /* terminate rpc task */
642 task->tk_status = status;
643 task->tk_action = NULL;
644 }
645 return;
646 }
647 }
648 rpc_call_start(task);
649}
650
651static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
652{
653 struct nfs4_delegation *dp = calldata;
654 struct nfs4_client *clp = dp->dl_client;
655
656 dprintk("%s: minorversion=%d\n", __func__,
657 clp->cl_cb_conn.cb_minorversion);
658
659 if (clp->cl_cb_conn.cb_minorversion) {
660 /* No need for lock, access serialized in nfsd4_cb_prepare */
661 ++clp->cl_cb_seq_nr;
662 clear_bit(0, &clp->cl_cb_slot_busy);
663 rpc_wake_up_next(&clp->cl_cb_waitq);
664 dprintk("%s: freed slot, new seqid=%d\n", __func__,
665 clp->cl_cb_seq_nr);
666
667 /* We're done looking into the sequence information */
668 task->tk_msg.rpc_resp = NULL;
669 }
670}
671
506static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 672static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
507{ 673{
508 struct nfs4_delegation *dp = calldata; 674 struct nfs4_delegation *dp = calldata;
509 struct nfs4_client *clp = dp->dl_client; 675 struct nfs4_client *clp = dp->dl_client;
510 676
677 nfsd4_cb_done(task, calldata);
678
511 switch (task->tk_status) { 679 switch (task->tk_status) {
512 case -EIO: 680 case -EIO:
513 /* Network partition? */ 681 /* Network partition? */
@@ -520,16 +688,19 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
520 break; 688 break;
521 default: 689 default:
522 /* success, or error we can't handle */ 690 /* success, or error we can't handle */
523 return; 691 goto done;
524 } 692 }
525 if (dp->dl_retries--) { 693 if (dp->dl_retries--) {
526 rpc_delay(task, 2*HZ); 694 rpc_delay(task, 2*HZ);
527 task->tk_status = 0; 695 task->tk_status = 0;
528 rpc_restart_call(task); 696 rpc_restart_call(task);
697 return;
529 } else { 698 } else {
530 atomic_set(&clp->cl_cb_conn.cb_set, 0); 699 atomic_set(&clp->cl_cb_conn.cb_set, 0);
531 warn_no_callback_path(clp, task->tk_status); 700 warn_no_callback_path(clp, task->tk_status);
532 } 701 }
702done:
703 kfree(task->tk_msg.rpc_argp);
533} 704}
534 705
535static void nfsd4_cb_recall_release(void *calldata) 706static void nfsd4_cb_recall_release(void *calldata)
@@ -542,6 +713,7 @@ static void nfsd4_cb_recall_release(void *calldata)
542} 713}
543 714
544static const struct rpc_call_ops nfsd4_cb_recall_ops = { 715static const struct rpc_call_ops nfsd4_cb_recall_ops = {
716 .rpc_call_prepare = nfsd4_cb_prepare,
545 .rpc_call_done = nfsd4_cb_recall_done, 717 .rpc_call_done = nfsd4_cb_recall_done,
546 .rpc_release = nfsd4_cb_recall_release, 718 .rpc_release = nfsd4_cb_recall_release,
547}; 719};
@@ -554,17 +726,24 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
554{ 726{
555 struct nfs4_client *clp = dp->dl_client; 727 struct nfs4_client *clp = dp->dl_client;
556 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; 728 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
729 struct nfs4_rpc_args *args;
557 struct rpc_message msg = { 730 struct rpc_message msg = {
558 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 731 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
559 .rpc_argp = dp, 732 .rpc_cred = callback_cred
560 .rpc_cred = clp->cl_cb_conn.cb_cred
561 }; 733 };
562 int status; 734 int status = -ENOMEM;
563 735
736 args = kzalloc(sizeof(*args), GFP_KERNEL);
737 if (!args)
738 goto out;
739 args->args_op = dp;
740 msg.rpc_argp = args;
564 dp->dl_retries = 1; 741 dp->dl_retries = 1;
565 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, 742 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
566 &nfsd4_cb_recall_ops, dp); 743 &nfsd4_cb_recall_ops, dp);
744out:
567 if (status) { 745 if (status) {
746 kfree(args);
568 put_nfs4_client(clp); 747 put_nfs4_client(clp);
569 nfs4_put_delegation(dp); 748 nfs4_put_delegation(dp);
570 } 749 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7c8801769a3c..bebc0c2e1b0a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -68,7 +68,6 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
68 u32 *bmval, u32 *writable) 68 u32 *bmval, u32 *writable)
69{ 69{
70 struct dentry *dentry = cstate->current_fh.fh_dentry; 70 struct dentry *dentry = cstate->current_fh.fh_dentry;
71 struct svc_export *exp = cstate->current_fh.fh_export;
72 71
73 /* 72 /*
74 * Check about attributes are supported by the NFSv4 server or not. 73 * Check about attributes are supported by the NFSv4 server or not.
@@ -80,17 +79,13 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
80 return nfserr_attrnotsupp; 79 return nfserr_attrnotsupp;
81 80
82 /* 81 /*
83 * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported 82 * Check FATTR4_WORD0_ACL can be supported
84 * in current environment or not. 83 * in current environment or not.
85 */ 84 */
86 if (bmval[0] & FATTR4_WORD0_ACL) { 85 if (bmval[0] & FATTR4_WORD0_ACL) {
87 if (!IS_POSIXACL(dentry->d_inode)) 86 if (!IS_POSIXACL(dentry->d_inode))
88 return nfserr_attrnotsupp; 87 return nfserr_attrnotsupp;
89 } 88 }
90 if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) {
91 if (exp->ex_fslocs.locations == NULL)
92 return nfserr_attrnotsupp;
93 }
94 89
95 /* 90 /*
96 * According to spec, read-only attributes return ERR_INVAL. 91 * According to spec, read-only attributes return ERR_INVAL.
@@ -123,6 +118,35 @@ nfsd4_check_open_attributes(struct svc_rqst *rqstp,
123 return status; 118 return status;
124} 119}
125 120
121static int
122is_create_with_attrs(struct nfsd4_open *open)
123{
124 return open->op_create == NFS4_OPEN_CREATE
125 && (open->op_createmode == NFS4_CREATE_UNCHECKED
126 || open->op_createmode == NFS4_CREATE_GUARDED
127 || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
128}
129
130/*
131 * if error occurs when setting the acl, just clear the acl bit
132 * in the returned attr bitmap.
133 */
134static void
135do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
136 struct nfs4_acl *acl, u32 *bmval)
137{
138 __be32 status;
139
140 status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
141 if (status)
142 /*
143 * We should probably fail the whole open at this point,
144 * but we've already created the file, so it's too late;
145 * So this seems the least of evils:
146 */
147 bmval[0] &= ~FATTR4_WORD0_ACL;
148}
149
126static inline void 150static inline void
127fh_dup2(struct svc_fh *dst, struct svc_fh *src) 151fh_dup2(struct svc_fh *dst, struct svc_fh *src)
128{ 152{
@@ -206,6 +230,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
206 if (status) 230 if (status)
207 goto out; 231 goto out;
208 232
233 if (is_create_with_attrs(open) && open->op_acl != NULL)
234 do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval);
235
209 set_change_info(&open->op_cinfo, current_fh); 236 set_change_info(&open->op_cinfo, current_fh);
210 fh_dup2(current_fh, &resfh); 237 fh_dup2(current_fh, &resfh);
211 238
@@ -536,12 +563,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
536 status = nfserr_badtype; 563 status = nfserr_badtype;
537 } 564 }
538 565
539 if (!status) { 566 if (status)
540 fh_unlock(&cstate->current_fh); 567 goto out;
541 set_change_info(&create->cr_cinfo, &cstate->current_fh); 568
542 fh_dup2(&cstate->current_fh, &resfh); 569 if (create->cr_acl != NULL)
543 } 570 do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
571 create->cr_bmval);
544 572
573 fh_unlock(&cstate->current_fh);
574 set_change_info(&create->cr_cinfo, &cstate->current_fh);
575 fh_dup2(&cstate->current_fh, &resfh);
576out:
545 fh_put(&resfh); 577 fh_put(&resfh);
546 return status; 578 return status;
547} 579}
@@ -947,34 +979,6 @@ static struct nfsd4_operation nfsd4_ops[];
947static const char *nfsd4_op_name(unsigned opnum); 979static const char *nfsd4_op_name(unsigned opnum);
948 980
949/* 981/*
950 * This is a replay of a compound for which no cache entry pages
951 * were used. Encode the sequence operation, and if cachethis is FALSE
952 * encode the uncache rep error on the next operation.
953 */
954static __be32
955nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args,
956 struct nfsd4_compoundres *resp)
957{
958 struct nfsd4_op *op;
959
960 dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__,
961 resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
962
963 /* Encode the replayed sequence operation */
964 BUG_ON(resp->opcnt != 1);
965 op = &args->ops[resp->opcnt - 1];
966 nfsd4_encode_operation(resp, op);
967
968 /*return nfserr_retry_uncached_rep in next operation. */
969 if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) {
970 op = &args->ops[resp->opcnt++];
971 op->status = nfserr_retry_uncached_rep;
972 nfsd4_encode_operation(resp, op);
973 }
974 return op->status;
975}
976
977/*
978 * Enforce NFSv4.1 COMPOUND ordering rules. 982 * Enforce NFSv4.1 COMPOUND ordering rules.
979 * 983 *
980 * TODO: 984 * TODO:
@@ -1083,13 +1087,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1083 BUG_ON(op->status == nfs_ok); 1087 BUG_ON(op->status == nfs_ok);
1084 1088
1085encode_op: 1089encode_op:
1086 /* Only from SEQUENCE or CREATE_SESSION */ 1090 /* Only from SEQUENCE */
1087 if (resp->cstate.status == nfserr_replay_cache) { 1091 if (resp->cstate.status == nfserr_replay_cache) {
1088 dprintk("%s NFS4.1 replay from cache\n", __func__); 1092 dprintk("%s NFS4.1 replay from cache\n", __func__);
1089 if (nfsd4_not_cached(resp)) 1093 status = op->status;
1090 status = nfsd4_enc_uncached_replay(args, resp);
1091 else
1092 status = op->status;
1093 goto out; 1094 goto out;
1094 } 1095 }
1095 if (op->status == nfserr_replay_me) { 1096 if (op->status == nfserr_replay_me) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216a48c8..2153f9bdbebd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,6 +55,7 @@
55#include <linux/lockd/bind.h> 55#include <linux/lockd/bind.h>
56#include <linux/module.h> 56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h> 57#include <linux/sunrpc/svcauth_gss.h>
58#include <linux/sunrpc/clnt.h>
58 59
59#define NFSDDBG_FACILITY NFSDDBG_PROC 60#define NFSDDBG_FACILITY NFSDDBG_PROC
60 61
@@ -413,36 +414,65 @@ gen_sessionid(struct nfsd4_session *ses)
413} 414}
414 415
415/* 416/*
416 * Give the client the number of slots it requests bound by 417 * The protocol defines ca_maxresponssize_cached to include the size of
417 * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. 418 * the rpc header, but all we need to cache is the data starting after
419 * the end of the initial SEQUENCE operation--the rest we regenerate
420 * each time. Therefore we can advertise a ca_maxresponssize_cached
421 * value that is the number of bytes in our cache plus a few additional
422 * bytes. In order to stay on the safe side, and not promise more than
423 * we can cache, those additional bytes must be the minimum possible: 24
424 * bytes of rpc header (xid through accept state, with AUTH_NULL
425 * verifier), 12 for the compound header (with zero-length tag), and 44
426 * for the SEQUENCE op response:
427 */
428#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
429
430/*
431 * Give the client the number of ca_maxresponsesize_cached slots it
432 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
433 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
434 * than NFSD_MAX_SLOTS_PER_SESSION.
418 * 435 *
419 * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we 436 * If we run out of reserved DRC memory we should (up to a point)
420 * should (up to a point) re-negotiate active sessions and reduce their 437 * re-negotiate active sessions and reduce their slot usage to make
421 * slot usage to make rooom for new connections. For now we just fail the 438 * rooom for new connections. For now we just fail the create session.
422 * create session.
423 */ 439 */
424static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) 440static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
425{ 441{
426 int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; 442 int mem, size = fchan->maxresp_cached;
427 443
428 if (fchan->maxreqs < 1) 444 if (fchan->maxreqs < 1)
429 return nfserr_inval; 445 return nfserr_inval;
430 else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
431 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
432 446
433 spin_lock(&nfsd_serv->sv_lock); 447 if (size < NFSD_MIN_HDR_SEQ_SZ)
434 if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) 448 size = NFSD_MIN_HDR_SEQ_SZ;
435 np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; 449 size -= NFSD_MIN_HDR_SEQ_SZ;
436 nfsd_serv->sv_drc_pages_used += np; 450 if (size > NFSD_SLOT_CACHE_SIZE)
437 spin_unlock(&nfsd_serv->sv_lock); 451 size = NFSD_SLOT_CACHE_SIZE;
452
453 /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
454 mem = fchan->maxreqs * size;
455 if (mem > NFSD_MAX_MEM_PER_SESSION) {
456 fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
457 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
458 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
459 mem = fchan->maxreqs * size;
460 }
438 461
439 if (np <= 0) { 462 spin_lock(&nfsd_drc_lock);
440 status = nfserr_resource; 463 /* bound the total session drc memory ussage */
441 fchan->maxreqs = 0; 464 if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
442 } else 465 fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
443 fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; 466 mem = fchan->maxreqs * size;
467 }
468 nfsd_drc_mem_used += mem;
469 spin_unlock(&nfsd_drc_lock);
444 470
445 return status; 471 if (fchan->maxreqs == 0)
472 return nfserr_serverfault;
473
474 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
475 return 0;
446} 476}
447 477
448/* 478/*
@@ -466,36 +496,41 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
466 fchan->maxresp_sz = maxcount; 496 fchan->maxresp_sz = maxcount;
467 session_fchan->maxresp_sz = fchan->maxresp_sz; 497 session_fchan->maxresp_sz = fchan->maxresp_sz;
468 498
469 /* Set the max response cached size our default which is
470 * a multiple of PAGE_SIZE and small */
471 session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
472 fchan->maxresp_cached = session_fchan->maxresp_cached;
473
474 /* Use the client's maxops if possible */ 499 /* Use the client's maxops if possible */
475 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 500 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
476 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 501 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
477 session_fchan->maxops = fchan->maxops; 502 session_fchan->maxops = fchan->maxops;
478 503
479 /* try to use the client requested number of slots */
480 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
481 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
482
483 /* FIXME: Error means no more DRC pages so the server should 504 /* FIXME: Error means no more DRC pages so the server should
484 * recover pages from existing sessions. For now fail session 505 * recover pages from existing sessions. For now fail session
485 * creation. 506 * creation.
486 */ 507 */
487 status = set_forechannel_maxreqs(fchan); 508 status = set_forechannel_drc_size(fchan);
488 509
510 session_fchan->maxresp_cached = fchan->maxresp_cached;
489 session_fchan->maxreqs = fchan->maxreqs; 511 session_fchan->maxreqs = fchan->maxreqs;
512
513 dprintk("%s status %d\n", __func__, status);
490 return status; 514 return status;
491} 515}
492 516
517static void
518free_session_slots(struct nfsd4_session *ses)
519{
520 int i;
521
522 for (i = 0; i < ses->se_fchannel.maxreqs; i++)
523 kfree(ses->se_slots[i]);
524}
525
493static int 526static int
494alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, 527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
495 struct nfsd4_create_session *cses) 528 struct nfsd4_create_session *cses)
496{ 529{
497 struct nfsd4_session *new, tmp; 530 struct nfsd4_session *new, tmp;
498 int idx, status = nfserr_resource, slotsize; 531 struct nfsd4_slot *sp;
532 int idx, slotsize, cachesize, i;
533 int status;
499 534
500 memset(&tmp, 0, sizeof(tmp)); 535 memset(&tmp, 0, sizeof(tmp));
501 536
@@ -506,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
506 if (status) 541 if (status)
507 goto out; 542 goto out;
508 543
509 /* allocate struct nfsd4_session and slot table in one piece */ 544 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
510 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); 545 + sizeof(struct nfsd4_session) > PAGE_SIZE);
546
547 status = nfserr_serverfault;
548 /* allocate struct nfsd4_session and slot table pointers in one piece */
549 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
511 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); 550 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
512 if (!new) 551 if (!new)
513 goto out; 552 goto out;
514 553
515 memcpy(new, &tmp, sizeof(*new)); 554 memcpy(new, &tmp, sizeof(*new));
516 555
556 /* allocate each struct nfsd4_slot and data cache in one piece */
557 cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
558 for (i = 0; i < new->se_fchannel.maxreqs; i++) {
559 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
560 if (!sp)
561 goto out_free;
562 new->se_slots[i] = sp;
563 }
564
517 new->se_client = clp; 565 new->se_client = clp;
518 gen_sessionid(new); 566 gen_sessionid(new);
519 idx = hash_sessionid(&new->se_sessionid); 567 idx = hash_sessionid(&new->se_sessionid);
@@ -530,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
530 status = nfs_ok; 578 status = nfs_ok;
531out: 579out:
532 return status; 580 return status;
581out_free:
582 free_session_slots(new);
583 kfree(new);
584 goto out;
533} 585}
534 586
535/* caller must hold sessionid_lock */ 587/* caller must hold sessionid_lock */
@@ -572,19 +624,16 @@ release_session(struct nfsd4_session *ses)
572 nfsd4_put_session(ses); 624 nfsd4_put_session(ses);
573} 625}
574 626
575static void nfsd4_release_respages(struct page **respages, short resused);
576
577void 627void
578free_session(struct kref *kref) 628free_session(struct kref *kref)
579{ 629{
580 struct nfsd4_session *ses; 630 struct nfsd4_session *ses;
581 int i;
582 631
583 ses = container_of(kref, struct nfsd4_session, se_ref); 632 ses = container_of(kref, struct nfsd4_session, se_ref);
584 for (i = 0; i < ses->se_fchannel.maxreqs; i++) { 633 spin_lock(&nfsd_drc_lock);
585 struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; 634 nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
586 nfsd4_release_respages(e->ce_respages, e->ce_resused); 635 spin_unlock(&nfsd_drc_lock);
587 } 636 free_session_slots(ses);
588 kfree(ses); 637 kfree(ses);
589} 638}
590 639
@@ -647,18 +696,14 @@ shutdown_callback_client(struct nfs4_client *clp)
647 clp->cl_cb_conn.cb_client = NULL; 696 clp->cl_cb_conn.cb_client = NULL;
648 rpc_shutdown_client(clnt); 697 rpc_shutdown_client(clnt);
649 } 698 }
650 if (clp->cl_cb_conn.cb_cred) {
651 put_rpccred(clp->cl_cb_conn.cb_cred);
652 clp->cl_cb_conn.cb_cred = NULL;
653 }
654} 699}
655 700
656static inline void 701static inline void
657free_client(struct nfs4_client *clp) 702free_client(struct nfs4_client *clp)
658{ 703{
659 shutdown_callback_client(clp); 704 shutdown_callback_client(clp);
660 nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, 705 if (clp->cl_cb_xprt)
661 clp->cl_slot.sl_cache_entry.ce_resused); 706 svc_xprt_put(clp->cl_cb_xprt);
662 if (clp->cl_cred.cr_group_info) 707 if (clp->cl_cred.cr_group_info)
663 put_group_info(clp->cl_cred.cr_group_info); 708 put_group_info(clp->cl_cred.cr_group_info);
664 kfree(clp->cl_principal); 709 kfree(clp->cl_principal);
@@ -714,25 +759,6 @@ expire_client(struct nfs4_client *clp)
714 put_nfs4_client(clp); 759 put_nfs4_client(clp);
715} 760}
716 761
717static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
718{
719 struct nfs4_client *clp;
720
721 clp = alloc_client(name);
722 if (clp == NULL)
723 return NULL;
724 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
725 atomic_set(&clp->cl_count, 1);
726 atomic_set(&clp->cl_cb_conn.cb_set, 0);
727 INIT_LIST_HEAD(&clp->cl_idhash);
728 INIT_LIST_HEAD(&clp->cl_strhash);
729 INIT_LIST_HEAD(&clp->cl_openowners);
730 INIT_LIST_HEAD(&clp->cl_delegations);
731 INIT_LIST_HEAD(&clp->cl_sessions);
732 INIT_LIST_HEAD(&clp->cl_lru);
733 return clp;
734}
735
736static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) 762static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
737{ 763{
738 memcpy(target->cl_verifier.data, source->data, 764 memcpy(target->cl_verifier.data, source->data,
@@ -795,6 +821,46 @@ static void gen_confirm(struct nfs4_client *clp)
795 *p++ = i++; 821 *p++ = i++;
796} 822}
797 823
824static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
825 struct svc_rqst *rqstp, nfs4_verifier *verf)
826{
827 struct nfs4_client *clp;
828 struct sockaddr *sa = svc_addr(rqstp);
829 char *princ;
830
831 clp = alloc_client(name);
832 if (clp == NULL)
833 return NULL;
834
835 princ = svc_gss_principal(rqstp);
836 if (princ) {
837 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
838 if (clp->cl_principal == NULL) {
839 free_client(clp);
840 return NULL;
841 }
842 }
843
844 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
845 atomic_set(&clp->cl_count, 1);
846 atomic_set(&clp->cl_cb_conn.cb_set, 0);
847 INIT_LIST_HEAD(&clp->cl_idhash);
848 INIT_LIST_HEAD(&clp->cl_strhash);
849 INIT_LIST_HEAD(&clp->cl_openowners);
850 INIT_LIST_HEAD(&clp->cl_delegations);
851 INIT_LIST_HEAD(&clp->cl_sessions);
852 INIT_LIST_HEAD(&clp->cl_lru);
853 clear_bit(0, &clp->cl_cb_slot_busy);
854 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
855 copy_verf(clp, verf);
856 rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
857 clp->cl_flavor = rqstp->rq_flavor;
858 copy_cred(&clp->cl_cred, &rqstp->rq_cred);
859 gen_confirm(clp);
860
861 return clp;
862}
863
798static int check_name(struct xdr_netobj name) 864static int check_name(struct xdr_netobj name)
799{ 865{
800 if (name.len == 0) 866 if (name.len == 0)
@@ -902,93 +968,40 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
902 return NULL; 968 return NULL;
903} 969}
904 970
905/* a helper function for parse_callback */
906static int
907parse_octet(unsigned int *lenp, char **addrp)
908{
909 unsigned int len = *lenp;
910 char *p = *addrp;
911 int n = -1;
912 char c;
913
914 for (;;) {
915 if (!len)
916 break;
917 len--;
918 c = *p++;
919 if (c == '.')
920 break;
921 if ((c < '0') || (c > '9')) {
922 n = -1;
923 break;
924 }
925 if (n < 0)
926 n = 0;
927 n = (n * 10) + (c - '0');
928 if (n > 255) {
929 n = -1;
930 break;
931 }
932 }
933 *lenp = len;
934 *addrp = p;
935 return n;
936}
937
938/* parse and set the setclientid ipv4 callback address */
939static int
940parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
941{
942 int temp = 0;
943 u32 cbaddr = 0;
944 u16 cbport = 0;
945 u32 addrlen = addr_len;
946 char *addr = addr_val;
947 int i, shift;
948
949 /* ipaddress */
950 shift = 24;
951 for(i = 4; i > 0 ; i--) {
952 if ((temp = parse_octet(&addrlen, &addr)) < 0) {
953 return 0;
954 }
955 cbaddr |= (temp << shift);
956 if (shift > 0)
957 shift -= 8;
958 }
959 *cbaddrp = cbaddr;
960
961 /* port */
962 shift = 8;
963 for(i = 2; i > 0 ; i--) {
964 if ((temp = parse_octet(&addrlen, &addr)) < 0) {
965 return 0;
966 }
967 cbport |= (temp << shift);
968 if (shift > 0)
969 shift -= 8;
970 }
971 *cbportp = cbport;
972 return 1;
973}
974
975static void 971static void
976gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) 972gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
977{ 973{
978 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 974 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
979 975 unsigned short expected_family;
980 /* Currently, we only support tcp for the callback channel */ 976
981 if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) 977 /* Currently, we only support tcp and tcp6 for the callback channel */
978 if (se->se_callback_netid_len == 3 &&
979 !memcmp(se->se_callback_netid_val, "tcp", 3))
980 expected_family = AF_INET;
981 else if (se->se_callback_netid_len == 4 &&
982 !memcmp(se->se_callback_netid_val, "tcp6", 4))
983 expected_family = AF_INET6;
984 else
982 goto out_err; 985 goto out_err;
983 986
984 if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, 987 cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
985 &cb->cb_addr, &cb->cb_port))) 988 se->se_callback_addr_len,
989 (struct sockaddr *) &cb->cb_addr,
990 sizeof(cb->cb_addr));
991
992 if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
986 goto out_err; 993 goto out_err;
994
995 if (cb->cb_addr.ss_family == AF_INET6)
996 ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
997
987 cb->cb_minorversion = 0; 998 cb->cb_minorversion = 0;
988 cb->cb_prog = se->se_callback_prog; 999 cb->cb_prog = se->se_callback_prog;
989 cb->cb_ident = se->se_callback_ident; 1000 cb->cb_ident = se->se_callback_ident;
990 return; 1001 return;
991out_err: 1002out_err:
1003 cb->cb_addr.ss_family = AF_UNSPEC;
1004 cb->cb_addrlen = 0;
992 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1005 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
993 "will not receive delegations\n", 1006 "will not receive delegations\n",
994 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1007 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -996,175 +1009,87 @@ out_err:
996 return; 1009 return;
997} 1010}
998 1011
999void
1000nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
1001{
1002 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1003
1004 resp->cstate.statp = statp;
1005}
1006
1007/* 1012/*
1008 * Dereference the result pages. 1013 * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
1009 */ 1014 */
1010static void 1015void
1011nfsd4_release_respages(struct page **respages, short resused) 1016nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1012{ 1017{
1013 int i; 1018 struct nfsd4_slot *slot = resp->cstate.slot;
1019 unsigned int base;
1014 1020
1015 dprintk("--> %s\n", __func__); 1021 dprintk("--> %s slot %p\n", __func__, slot);
1016 for (i = 0; i < resused; i++) {
1017 if (!respages[i])
1018 continue;
1019 put_page(respages[i]);
1020 respages[i] = NULL;
1021 }
1022}
1023 1022
1024static void 1023 slot->sl_opcnt = resp->opcnt;
1025nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) 1024 slot->sl_status = resp->cstate.status;
1026{
1027 int i;
1028 1025
1029 for (i = 0; i < count; i++) { 1026 if (nfsd4_not_cached(resp)) {
1030 topages[i] = frompages[i]; 1027 slot->sl_datalen = 0;
1031 if (!topages[i]) 1028 return;
1032 continue;
1033 get_page(topages[i]);
1034 } 1029 }
1030 slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
1031 base = (char *)resp->cstate.datap -
1032 (char *)resp->xbuf->head[0].iov_base;
1033 if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
1034 slot->sl_datalen))
1035 WARN("%s: sessions DRC could not cache compound\n", __func__);
1036 return;
1035} 1037}
1036 1038
1037/* 1039/*
1038 * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous 1040 * Encode the replay sequence operation from the slot values.
1039 * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total 1041 * If cachethis is FALSE encode the uncached rep error on the next
1040 * length of the XDR response is less than se_fmaxresp_cached 1042 * operation which sets resp->p and increments resp->opcnt for
1041 * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a 1043 * nfs4svc_encode_compoundres.
1042 * of the reply (e.g. readdir).
1043 * 1044 *
1044 * Store the base and length of the rq_req.head[0] page
1045 * of the NFSv4.1 data, just past the rpc header.
1046 */ 1045 */
1047void 1046static __be32
1048nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) 1047nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
1048 struct nfsd4_compoundres *resp)
1049{ 1049{
1050 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1050 struct nfsd4_op *op;
1051 struct svc_rqst *rqstp = resp->rqstp; 1051 struct nfsd4_slot *slot = resp->cstate.slot;
1052 struct nfsd4_compoundargs *args = rqstp->rq_argp;
1053 struct nfsd4_op *op = &args->ops[resp->opcnt];
1054 struct kvec *resv = &rqstp->rq_res.head[0];
1055
1056 dprintk("--> %s entry %p\n", __func__, entry);
1057
1058 /* Don't cache a failed OP_SEQUENCE. */
1059 if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
1060 return;
1061 1052
1062 nfsd4_release_respages(entry->ce_respages, entry->ce_resused); 1053 dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
1063 entry->ce_opcnt = resp->opcnt; 1054 resp->opcnt, resp->cstate.slot->sl_cachethis);
1064 entry->ce_status = resp->cstate.status;
1065 1055
1066 /* 1056 /* Encode the replayed sequence operation */
1067 * Don't need a page to cache just the sequence operation - the slot 1057 op = &args->ops[resp->opcnt - 1];
1068 * does this for us! 1058 nfsd4_encode_operation(resp, op);
1069 */
1070 1059
1071 if (nfsd4_not_cached(resp)) { 1060 /* Return nfserr_retry_uncached_rep in next operation. */
1072 entry->ce_resused = 0; 1061 if (args->opcnt > 1 && slot->sl_cachethis == 0) {
1073 entry->ce_rpchdrlen = 0; 1062 op = &args->ops[resp->opcnt++];
1074 dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, 1063 op->status = nfserr_retry_uncached_rep;
1075 resp->cstate.slot->sl_cache_entry.ce_cachethis); 1064 nfsd4_encode_operation(resp, op);
1076 return;
1077 }
1078 entry->ce_resused = rqstp->rq_resused;
1079 if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
1080 entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
1081 nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
1082 entry->ce_resused);
1083 entry->ce_datav.iov_base = resp->cstate.statp;
1084 entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
1085 (char *)page_address(rqstp->rq_respages[0]));
1086 /* Current request rpc header length*/
1087 entry->ce_rpchdrlen = (char *)resp->cstate.statp -
1088 (char *)page_address(rqstp->rq_respages[0]);
1089}
1090
1091/*
1092 * We keep the rpc header, but take the nfs reply from the replycache.
1093 */
1094static int
1095nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
1096 struct nfsd4_cache_entry *entry)
1097{
1098 struct svc_rqst *rqstp = resp->rqstp;
1099 struct kvec *resv = &resp->rqstp->rq_res.head[0];
1100 int len;
1101
1102 /* Current request rpc header length*/
1103 len = (char *)resp->cstate.statp -
1104 (char *)page_address(rqstp->rq_respages[0]);
1105 if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
1106 dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
1107 entry->ce_datav.iov_len);
1108 return 0;
1109 } 1065 }
1110 /* copy the cached reply nfsd data past the current rpc header */ 1066 return op->status;
1111 memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
1112 entry->ce_datav.iov_len);
1113 resv->iov_len = len + entry->ce_datav.iov_len;
1114 return 1;
1115} 1067}
1116 1068
1117/* 1069/*
1118 * Keep the first page of the replay. Copy the NFSv4.1 data from the first 1070 * The sequence operation is not cached because we can use the slot and
1119 * cached page. Replace any futher replay pages from the cache. 1071 * session values.
1120 */ 1072 */
1121__be32 1073__be32
1122nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, 1074nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1123 struct nfsd4_sequence *seq) 1075 struct nfsd4_sequence *seq)
1124{ 1076{
1125 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1077 struct nfsd4_slot *slot = resp->cstate.slot;
1126 __be32 status; 1078 __be32 status;
1127 1079
1128 dprintk("--> %s entry %p\n", __func__, entry); 1080 dprintk("--> %s slot %p\n", __func__, slot);
1129
1130 /*
1131 * If this is just the sequence operation, we did not keep
1132 * a page in the cache entry because we can just use the
1133 * slot info stored in struct nfsd4_sequence that was checked
1134 * against the slot in nfsd4_sequence().
1135 *
1136 * This occurs when seq->cachethis is FALSE, or when the client
1137 * session inactivity timer fires and a solo sequence operation
1138 * is sent (lease renewal).
1139 */
1140 if (seq && nfsd4_not_cached(resp)) {
1141 seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
1142 return nfs_ok;
1143 }
1144
1145 if (!nfsd41_copy_replay_data(resp, entry)) {
1146 /*
1147 * Not enough room to use the replay rpc header, send the
1148 * cached header. Release all the allocated result pages.
1149 */
1150 svc_free_res_pages(resp->rqstp);
1151 nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
1152 entry->ce_resused);
1153 } else {
1154 /* Release all but the first allocated result page */
1155 1081
1156 resp->rqstp->rq_resused--; 1082 /* Either returns 0 or nfserr_retry_uncached */
1157 svc_free_res_pages(resp->rqstp); 1083 status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
1084 if (status == nfserr_retry_uncached_rep)
1085 return status;
1158 1086
1159 nfsd4_copy_pages(&resp->rqstp->rq_respages[1], 1087 /* The sequence operation has been encoded, cstate->datap set. */
1160 &entry->ce_respages[1], 1088 memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
1161 entry->ce_resused - 1);
1162 }
1163 1089
1164 resp->rqstp->rq_resused = entry->ce_resused; 1090 resp->opcnt = slot->sl_opcnt;
1165 resp->opcnt = entry->ce_opcnt; 1091 resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
1166 resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; 1092 status = slot->sl_status;
1167 status = entry->ce_status;
1168 1093
1169 return status; 1094 return status;
1170} 1095}
@@ -1194,13 +1119,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1194 int status; 1119 int status;
1195 unsigned int strhashval; 1120 unsigned int strhashval;
1196 char dname[HEXDIR_LEN]; 1121 char dname[HEXDIR_LEN];
1122 char addr_str[INET6_ADDRSTRLEN];
1197 nfs4_verifier verf = exid->verifier; 1123 nfs4_verifier verf = exid->verifier;
1198 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1124 struct sockaddr *sa = svc_addr(rqstp);
1199 1125
1126 rpc_ntop(sa, addr_str, sizeof(addr_str));
1200 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " 1127 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
1201 " ip_addr=%u flags %x, spa_how %d\n", 1128 "ip_addr=%s flags %x, spa_how %d\n",
1202 __func__, rqstp, exid, exid->clname.len, exid->clname.data, 1129 __func__, rqstp, exid, exid->clname.len, exid->clname.data,
1203 ip_addr, exid->flags, exid->spa_how); 1130 addr_str, exid->flags, exid->spa_how);
1204 1131
1205 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) 1132 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
1206 return nfserr_inval; 1133 return nfserr_inval;
@@ -1281,28 +1208,23 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1281 1208
1282out_new: 1209out_new:
1283 /* Normal case */ 1210 /* Normal case */
1284 new = create_client(exid->clname, dname); 1211 new = create_client(exid->clname, dname, rqstp, &verf);
1285 if (new == NULL) { 1212 if (new == NULL) {
1286 status = nfserr_resource; 1213 status = nfserr_serverfault;
1287 goto out; 1214 goto out;
1288 } 1215 }
1289 1216
1290 copy_verf(new, &verf);
1291 copy_cred(&new->cl_cred, &rqstp->rq_cred);
1292 new->cl_addr = ip_addr;
1293 gen_clid(new); 1217 gen_clid(new);
1294 gen_confirm(new);
1295 add_to_unconfirmed(new, strhashval); 1218 add_to_unconfirmed(new, strhashval);
1296out_copy: 1219out_copy:
1297 exid->clientid.cl_boot = new->cl_clientid.cl_boot; 1220 exid->clientid.cl_boot = new->cl_clientid.cl_boot;
1298 exid->clientid.cl_id = new->cl_clientid.cl_id; 1221 exid->clientid.cl_id = new->cl_clientid.cl_id;
1299 1222
1300 new->cl_slot.sl_seqid = 0;
1301 exid->seqid = 1; 1223 exid->seqid = 1;
1302 nfsd4_set_ex_flags(new, exid); 1224 nfsd4_set_ex_flags(new, exid);
1303 1225
1304 dprintk("nfsd4_exchange_id seqid %d flags %x\n", 1226 dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1305 new->cl_slot.sl_seqid, new->cl_exchange_flags); 1227 new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
1306 status = nfs_ok; 1228 status = nfs_ok;
1307 1229
1308out: 1230out:
@@ -1313,40 +1235,60 @@ error:
1313} 1235}
1314 1236
1315static int 1237static int
1316check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) 1238check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
1317{ 1239{
1318 dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, 1240 dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
1319 slot->sl_seqid); 1241 slot_seqid);
1320 1242
1321 /* The slot is in use, and no response has been sent. */ 1243 /* The slot is in use, and no response has been sent. */
1322 if (slot->sl_inuse) { 1244 if (slot_inuse) {
1323 if (seqid == slot->sl_seqid) 1245 if (seqid == slot_seqid)
1324 return nfserr_jukebox; 1246 return nfserr_jukebox;
1325 else 1247 else
1326 return nfserr_seq_misordered; 1248 return nfserr_seq_misordered;
1327 } 1249 }
1328 /* Normal */ 1250 /* Normal */
1329 if (likely(seqid == slot->sl_seqid + 1)) 1251 if (likely(seqid == slot_seqid + 1))
1330 return nfs_ok; 1252 return nfs_ok;
1331 /* Replay */ 1253 /* Replay */
1332 if (seqid == slot->sl_seqid) 1254 if (seqid == slot_seqid)
1333 return nfserr_replay_cache; 1255 return nfserr_replay_cache;
1334 /* Wraparound */ 1256 /* Wraparound */
1335 if (seqid == 1 && (slot->sl_seqid + 1) == 0) 1257 if (seqid == 1 && (slot_seqid + 1) == 0)
1336 return nfs_ok; 1258 return nfs_ok;
1337 /* Misordered replay or misordered new request */ 1259 /* Misordered replay or misordered new request */
1338 return nfserr_seq_misordered; 1260 return nfserr_seq_misordered;
1339} 1261}
1340 1262
1263/*
1264 * Cache the create session result into the create session single DRC
1265 * slot cache by saving the xdr structure. sl_seqid has been set.
1266 * Do this for solo or embedded create session operations.
1267 */
1268static void
1269nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
1270 struct nfsd4_clid_slot *slot, int nfserr)
1271{
1272 slot->sl_status = nfserr;
1273 memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
1274}
1275
1276static __be32
1277nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
1278 struct nfsd4_clid_slot *slot)
1279{
1280 memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
1281 return slot->sl_status;
1282}
1283
1341__be32 1284__be32
1342nfsd4_create_session(struct svc_rqst *rqstp, 1285nfsd4_create_session(struct svc_rqst *rqstp,
1343 struct nfsd4_compound_state *cstate, 1286 struct nfsd4_compound_state *cstate,
1344 struct nfsd4_create_session *cr_ses) 1287 struct nfsd4_create_session *cr_ses)
1345{ 1288{
1346 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1289 struct sockaddr *sa = svc_addr(rqstp);
1347 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1348 struct nfs4_client *conf, *unconf; 1290 struct nfs4_client *conf, *unconf;
1349 struct nfsd4_slot *slot = NULL; 1291 struct nfsd4_clid_slot *cs_slot = NULL;
1350 int status = 0; 1292 int status = 0;
1351 1293
1352 nfs4_lock_state(); 1294 nfs4_lock_state();
@@ -1354,40 +1296,38 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1354 conf = find_confirmed_client(&cr_ses->clientid); 1296 conf = find_confirmed_client(&cr_ses->clientid);
1355 1297
1356 if (conf) { 1298 if (conf) {
1357 slot = &conf->cl_slot; 1299 cs_slot = &conf->cl_cs_slot;
1358 status = check_slot_seqid(cr_ses->seqid, slot); 1300 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1359 if (status == nfserr_replay_cache) { 1301 if (status == nfserr_replay_cache) {
1360 dprintk("Got a create_session replay! seqid= %d\n", 1302 dprintk("Got a create_session replay! seqid= %d\n",
1361 slot->sl_seqid); 1303 cs_slot->sl_seqid);
1362 cstate->slot = slot;
1363 cstate->status = status;
1364 /* Return the cached reply status */ 1304 /* Return the cached reply status */
1365 status = nfsd4_replay_cache_entry(resp, NULL); 1305 status = nfsd4_replay_create_session(cr_ses, cs_slot);
1366 goto out; 1306 goto out;
1367 } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { 1307 } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
1368 status = nfserr_seq_misordered; 1308 status = nfserr_seq_misordered;
1369 dprintk("Sequence misordered!\n"); 1309 dprintk("Sequence misordered!\n");
1370 dprintk("Expected seqid= %d but got seqid= %d\n", 1310 dprintk("Expected seqid= %d but got seqid= %d\n",
1371 slot->sl_seqid, cr_ses->seqid); 1311 cs_slot->sl_seqid, cr_ses->seqid);
1372 goto out; 1312 goto out;
1373 } 1313 }
1374 conf->cl_slot.sl_seqid++; 1314 cs_slot->sl_seqid++;
1375 } else if (unconf) { 1315 } else if (unconf) {
1376 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1316 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1377 (ip_addr != unconf->cl_addr)) { 1317 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
1378 status = nfserr_clid_inuse; 1318 status = nfserr_clid_inuse;
1379 goto out; 1319 goto out;
1380 } 1320 }
1381 1321
1382 slot = &unconf->cl_slot; 1322 cs_slot = &unconf->cl_cs_slot;
1383 status = check_slot_seqid(cr_ses->seqid, slot); 1323 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1384 if (status) { 1324 if (status) {
1385 /* an unconfirmed replay returns misordered */ 1325 /* an unconfirmed replay returns misordered */
1386 status = nfserr_seq_misordered; 1326 status = nfserr_seq_misordered;
1387 goto out; 1327 goto out_cache;
1388 } 1328 }
1389 1329
1390 slot->sl_seqid++; /* from 0 to 1 */ 1330 cs_slot->sl_seqid++; /* from 0 to 1 */
1391 move_to_confirmed(unconf); 1331 move_to_confirmed(unconf);
1392 1332
1393 /* 1333 /*
@@ -1396,6 +1336,19 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1396 cr_ses->flags &= ~SESSION4_PERSIST; 1336 cr_ses->flags &= ~SESSION4_PERSIST;
1397 cr_ses->flags &= ~SESSION4_RDMA; 1337 cr_ses->flags &= ~SESSION4_RDMA;
1398 1338
1339 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1340 unconf->cl_cb_xprt = rqstp->rq_xprt;
1341 svc_xprt_get(unconf->cl_cb_xprt);
1342 rpc_copy_addr(
1343 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1344 sa);
1345 unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1346 unconf->cl_cb_conn.cb_minorversion =
1347 cstate->minorversion;
1348 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1349 unconf->cl_cb_seq_nr = 1;
1350 nfsd4_probe_callback(unconf);
1351 }
1399 conf = unconf; 1352 conf = unconf;
1400 } else { 1353 } else {
1401 status = nfserr_stale_clientid; 1354 status = nfserr_stale_clientid;
@@ -1408,12 +1361,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1408 1361
1409 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1362 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
1410 NFS4_MAX_SESSIONID_LEN); 1363 NFS4_MAX_SESSIONID_LEN);
1411 cr_ses->seqid = slot->sl_seqid; 1364 cr_ses->seqid = cs_slot->sl_seqid;
1412 1365
1413 slot->sl_inuse = true; 1366out_cache:
1414 cstate->slot = slot; 1367 /* cache solo and embedded create sessions under the state lock */
1415 /* Ensure a page is used for the cache */ 1368 nfsd4_cache_create_session(cr_ses, cs_slot, status);
1416 slot->sl_cache_entry.ce_cachethis = 1;
1417out: 1369out:
1418 nfs4_unlock_state(); 1370 nfs4_unlock_state();
1419 dprintk("%s returns %d\n", __func__, ntohl(status)); 1371 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1478,18 +1430,23 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1478 if (seq->slotid >= session->se_fchannel.maxreqs) 1430 if (seq->slotid >= session->se_fchannel.maxreqs)
1479 goto out; 1431 goto out;
1480 1432
1481 slot = &session->se_slots[seq->slotid]; 1433 slot = session->se_slots[seq->slotid];
1482 dprintk("%s: slotid %d\n", __func__, seq->slotid); 1434 dprintk("%s: slotid %d\n", __func__, seq->slotid);
1483 1435
1484 status = check_slot_seqid(seq->seqid, slot); 1436 /* We do not negotiate the number of slots yet, so set the
1437 * maxslots to the session maxreqs which is used to encode
1438 * sr_highest_slotid and the sr_target_slot id to maxslots */
1439 seq->maxslots = session->se_fchannel.maxreqs;
1440
1441 status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
1485 if (status == nfserr_replay_cache) { 1442 if (status == nfserr_replay_cache) {
1486 cstate->slot = slot; 1443 cstate->slot = slot;
1487 cstate->session = session; 1444 cstate->session = session;
1488 /* Return the cached reply status and set cstate->status 1445 /* Return the cached reply status and set cstate->status
1489 * for nfsd4_svc_encode_compoundres processing */ 1446 * for nfsd4_proc_compound processing */
1490 status = nfsd4_replay_cache_entry(resp, seq); 1447 status = nfsd4_replay_cache_entry(resp, seq);
1491 cstate->status = nfserr_replay_cache; 1448 cstate->status = nfserr_replay_cache;
1492 goto replay_cache; 1449 goto out;
1493 } 1450 }
1494 if (status) 1451 if (status)
1495 goto out; 1452 goto out;
@@ -1497,23 +1454,23 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1497 /* Success! bump slot seqid */ 1454 /* Success! bump slot seqid */
1498 slot->sl_inuse = true; 1455 slot->sl_inuse = true;
1499 slot->sl_seqid = seq->seqid; 1456 slot->sl_seqid = seq->seqid;
1500 slot->sl_cache_entry.ce_cachethis = seq->cachethis; 1457 slot->sl_cachethis = seq->cachethis;
1501 /* Always set the cache entry cachethis for solo sequence */
1502 if (nfsd4_is_solo_sequence(resp))
1503 slot->sl_cache_entry.ce_cachethis = 1;
1504 1458
1505 cstate->slot = slot; 1459 cstate->slot = slot;
1506 cstate->session = session; 1460 cstate->session = session;
1507 1461
1508replay_cache: 1462 /* Hold a session reference until done processing the compound:
1509 /* Renew the clientid on success and on replay.
1510 * Hold a session reference until done processing the compound:
1511 * nfsd4_put_session called only if the cstate slot is set. 1463 * nfsd4_put_session called only if the cstate slot is set.
1512 */ 1464 */
1513 renew_client(session->se_client);
1514 nfsd4_get_session(session); 1465 nfsd4_get_session(session);
1515out: 1466out:
1516 spin_unlock(&sessionid_lock); 1467 spin_unlock(&sessionid_lock);
1468 /* Renew the clientid on success and on replay */
1469 if (cstate->session) {
1470 nfs4_lock_state();
1471 renew_client(session->se_client);
1472 nfs4_unlock_state();
1473 }
1517 dprintk("%s: return %d\n", __func__, ntohl(status)); 1474 dprintk("%s: return %d\n", __func__, ntohl(status));
1518 return status; 1475 return status;
1519} 1476}
@@ -1522,7 +1479,7 @@ __be32
1522nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1479nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1523 struct nfsd4_setclientid *setclid) 1480 struct nfsd4_setclientid *setclid)
1524{ 1481{
1525 struct sockaddr_in *sin = svc_addr_in(rqstp); 1482 struct sockaddr *sa = svc_addr(rqstp);
1526 struct xdr_netobj clname = { 1483 struct xdr_netobj clname = {
1527 .len = setclid->se_namelen, 1484 .len = setclid->se_namelen,
1528 .data = setclid->se_name, 1485 .data = setclid->se_name,
@@ -1531,7 +1488,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1531 unsigned int strhashval; 1488 unsigned int strhashval;
1532 struct nfs4_client *conf, *unconf, *new; 1489 struct nfs4_client *conf, *unconf, *new;
1533 __be32 status; 1490 __be32 status;
1534 char *princ;
1535 char dname[HEXDIR_LEN]; 1491 char dname[HEXDIR_LEN];
1536 1492
1537 if (!check_name(clname)) 1493 if (!check_name(clname))
@@ -1554,8 +1510,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1554 /* RFC 3530 14.2.33 CASE 0: */ 1510 /* RFC 3530 14.2.33 CASE 0: */
1555 status = nfserr_clid_inuse; 1511 status = nfserr_clid_inuse;
1556 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { 1512 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1557 dprintk("NFSD: setclientid: string in use by client" 1513 char addr_str[INET6_ADDRSTRLEN];
1558 " at %pI4\n", &conf->cl_addr); 1514 rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
1515 sizeof(addr_str));
1516 dprintk("NFSD: setclientid: string in use by client "
1517 "at %s\n", addr_str);
1559 goto out; 1518 goto out;
1560 } 1519 }
1561 } 1520 }
@@ -1573,7 +1532,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1573 */ 1532 */
1574 if (unconf) 1533 if (unconf)
1575 expire_client(unconf); 1534 expire_client(unconf);
1576 new = create_client(clname, dname); 1535 new = create_client(clname, dname, rqstp, &clverifier);
1577 if (new == NULL) 1536 if (new == NULL)
1578 goto out; 1537 goto out;
1579 gen_clid(new); 1538 gen_clid(new);
@@ -1590,7 +1549,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1590 */ 1549 */
1591 expire_client(unconf); 1550 expire_client(unconf);
1592 } 1551 }
1593 new = create_client(clname, dname); 1552 new = create_client(clname, dname, rqstp, &clverifier);
1594 if (new == NULL) 1553 if (new == NULL)
1595 goto out; 1554 goto out;
1596 copy_clid(new, conf); 1555 copy_clid(new, conf);
@@ -1600,7 +1559,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1600 * probable client reboot; state will be removed if 1559 * probable client reboot; state will be removed if
1601 * confirmed. 1560 * confirmed.
1602 */ 1561 */
1603 new = create_client(clname, dname); 1562 new = create_client(clname, dname, rqstp, &clverifier);
1604 if (new == NULL) 1563 if (new == NULL)
1605 goto out; 1564 goto out;
1606 gen_clid(new); 1565 gen_clid(new);
@@ -1611,25 +1570,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1611 * confirmed. 1570 * confirmed.
1612 */ 1571 */
1613 expire_client(unconf); 1572 expire_client(unconf);
1614 new = create_client(clname, dname); 1573 new = create_client(clname, dname, rqstp, &clverifier);
1615 if (new == NULL) 1574 if (new == NULL)
1616 goto out; 1575 goto out;
1617 gen_clid(new); 1576 gen_clid(new);
1618 } 1577 }
1619 copy_verf(new, &clverifier); 1578 gen_callback(new, setclid, rpc_get_scope_id(sa));
1620 new->cl_addr = sin->sin_addr.s_addr;
1621 new->cl_flavor = rqstp->rq_flavor;
1622 princ = svc_gss_principal(rqstp);
1623 if (princ) {
1624 new->cl_principal = kstrdup(princ, GFP_KERNEL);
1625 if (new->cl_principal == NULL) {
1626 free_client(new);
1627 goto out;
1628 }
1629 }
1630 copy_cred(&new->cl_cred, &rqstp->rq_cred);
1631 gen_confirm(new);
1632 gen_callback(new, setclid);
1633 add_to_unconfirmed(new, strhashval); 1579 add_to_unconfirmed(new, strhashval);
1634 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1580 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
1635 setclid->se_clientid.cl_id = new->cl_clientid.cl_id; 1581 setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
@@ -1651,7 +1597,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1651 struct nfsd4_compound_state *cstate, 1597 struct nfsd4_compound_state *cstate,
1652 struct nfsd4_setclientid_confirm *setclientid_confirm) 1598 struct nfsd4_setclientid_confirm *setclientid_confirm)
1653{ 1599{
1654 struct sockaddr_in *sin = svc_addr_in(rqstp); 1600 struct sockaddr *sa = svc_addr(rqstp);
1655 struct nfs4_client *conf, *unconf; 1601 struct nfs4_client *conf, *unconf;
1656 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 1602 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
1657 clientid_t * clid = &setclientid_confirm->sc_clientid; 1603 clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -1670,9 +1616,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1670 unconf = find_unconfirmed_client(clid); 1616 unconf = find_unconfirmed_client(clid);
1671 1617
1672 status = nfserr_clid_inuse; 1618 status = nfserr_clid_inuse;
1673 if (conf && conf->cl_addr != sin->sin_addr.s_addr) 1619 if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
1674 goto out; 1620 goto out;
1675 if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) 1621 if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
1676 goto out; 1622 goto out;
1677 1623
1678 /* 1624 /*
@@ -2163,7 +2109,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
2163 return -EAGAIN; 2109 return -EAGAIN;
2164} 2110}
2165 2111
2166static struct lock_manager_operations nfsd_lease_mng_ops = { 2112static const struct lock_manager_operations nfsd_lease_mng_ops = {
2167 .fl_break = nfsd_break_deleg_cb, 2113 .fl_break = nfsd_break_deleg_cb,
2168 .fl_release_private = nfsd_release_deleg_cb, 2114 .fl_release_private = nfsd_release_deleg_cb,
2169 .fl_copy_lock = nfsd_copy_lock_deleg_cb, 2115 .fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -3368,7 +3314,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
3368 3314
3369/* Hack!: For now, we're defining this just so we can use a pointer to it 3315/* Hack!: For now, we're defining this just so we can use a pointer to it
3370 * as a unique cookie to identify our (NFSv4's) posix locks. */ 3316 * as a unique cookie to identify our (NFSv4's) posix locks. */
3371static struct lock_manager_operations nfsd_posix_mng_ops = { 3317static const struct lock_manager_operations nfsd_posix_mng_ops = {
3372}; 3318};
3373 3319
3374static inline void 3320static inline void
@@ -4072,7 +4018,7 @@ set_max_delegations(void)
4072 4018
4073/* initialization to perform when the nfsd service is started: */ 4019/* initialization to perform when the nfsd service is started: */
4074 4020
4075static void 4021static int
4076__nfs4_state_start(void) 4022__nfs4_state_start(void)
4077{ 4023{
4078 unsigned long grace_time; 4024 unsigned long grace_time;
@@ -4084,19 +4030,26 @@ __nfs4_state_start(void)
4084 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 4030 printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4085 grace_time/HZ); 4031 grace_time/HZ);
4086 laundry_wq = create_singlethread_workqueue("nfsd4"); 4032 laundry_wq = create_singlethread_workqueue("nfsd4");
4033 if (laundry_wq == NULL)
4034 return -ENOMEM;
4087 queue_delayed_work(laundry_wq, &laundromat_work, grace_time); 4035 queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
4088 set_max_delegations(); 4036 set_max_delegations();
4037 return set_callback_cred();
4089} 4038}
4090 4039
4091void 4040int
4092nfs4_state_start(void) 4041nfs4_state_start(void)
4093{ 4042{
4043 int ret;
4044
4094 if (nfs4_init) 4045 if (nfs4_init)
4095 return; 4046 return 0;
4096 nfsd4_load_reboot_recovery_data(); 4047 nfsd4_load_reboot_recovery_data();
4097 __nfs4_state_start(); 4048 ret = __nfs4_state_start();
4049 if (ret)
4050 return ret;
4098 nfs4_init = 1; 4051 nfs4_init = 1;
4099 return; 4052 return 0;
4100} 4053}
4101 4054
4102time_t 4055time_t
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2dcc7feaa6ff..0fbd50cee1f6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1599,7 +1599,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
1599static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) 1599static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
1600{ 1600{
1601 struct svc_fh tmp_fh; 1601 struct svc_fh tmp_fh;
1602 char *path, *rootpath; 1602 char *path = NULL, *rootpath;
1603 size_t rootlen;
1603 1604
1604 fh_init(&tmp_fh, NFS4_FHSIZE); 1605 fh_init(&tmp_fh, NFS4_FHSIZE);
1605 *stat = exp_pseudoroot(rqstp, &tmp_fh); 1606 *stat = exp_pseudoroot(rqstp, &tmp_fh);
@@ -1609,14 +1610,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
1609 1610
1610 path = exp->ex_pathname; 1611 path = exp->ex_pathname;
1611 1612
1612 if (strncmp(path, rootpath, strlen(rootpath))) { 1613 rootlen = strlen(rootpath);
1614 if (strncmp(path, rootpath, rootlen)) {
1613 dprintk("nfsd: fs_locations failed;" 1615 dprintk("nfsd: fs_locations failed;"
1614 "%s is not contained in %s\n", path, rootpath); 1616 "%s is not contained in %s\n", path, rootpath);
1615 *stat = nfserr_notsupp; 1617 *stat = nfserr_notsupp;
1616 return NULL; 1618 path = NULL;
1619 goto out;
1617 } 1620 }
1618 1621 path += rootlen;
1619 return path + strlen(rootpath); 1622out:
1623 fh_put(&tmp_fh);
1624 return path;
1620} 1625}
1621 1626
1622/* 1627/*
@@ -1793,11 +1798,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1793 goto out_nfserr; 1798 goto out_nfserr;
1794 } 1799 }
1795 } 1800 }
1796 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
1797 if (exp->ex_fslocs.locations == NULL) {
1798 bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1799 }
1800 }
1801 if ((buflen -= 16) < 0) 1801 if ((buflen -= 16) < 0)
1802 goto out_resource; 1802 goto out_resource;
1803 1803
@@ -1825,8 +1825,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1825 goto out_resource; 1825 goto out_resource;
1826 if (!aclsupport) 1826 if (!aclsupport)
1827 word0 &= ~FATTR4_WORD0_ACL; 1827 word0 &= ~FATTR4_WORD0_ACL;
1828 if (!exp->ex_fslocs.locations)
1829 word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1830 if (!word2) { 1828 if (!word2) {
1831 WRITE32(2); 1829 WRITE32(2);
1832 WRITE32(word0); 1830 WRITE32(word0);
@@ -3064,6 +3062,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
3064 WRITE32(0); 3062 WRITE32(0);
3065 3063
3066 ADJUST_ARGS(); 3064 ADJUST_ARGS();
3065 resp->cstate.datap = p; /* DRC cache data pointer */
3067 return 0; 3066 return 0;
3068} 3067}
3069 3068
@@ -3166,7 +3165,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
3166 return status; 3165 return status;
3167 3166
3168 session = resp->cstate.session; 3167 session = resp->cstate.session;
3169 if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) 3168 if (session == NULL || slot->sl_cachethis == 0)
3170 return status; 3169 return status;
3171 3170
3172 if (resp->opcnt >= args->opcnt) 3171 if (resp->opcnt >= args->opcnt)
@@ -3291,6 +3290,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3291 /* 3290 /*
3292 * All that remains is to write the tag and operation count... 3291 * All that remains is to write the tag and operation count...
3293 */ 3292 */
3293 struct nfsd4_compound_state *cs = &resp->cstate;
3294 struct kvec *iov; 3294 struct kvec *iov;
3295 p = resp->tagp; 3295 p = resp->tagp;
3296 *p++ = htonl(resp->taglen); 3296 *p++ = htonl(resp->taglen);
@@ -3304,17 +3304,11 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3304 iov = &rqstp->rq_res.head[0]; 3304 iov = &rqstp->rq_res.head[0];
3305 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; 3305 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
3306 BUG_ON(iov->iov_len > PAGE_SIZE); 3306 BUG_ON(iov->iov_len > PAGE_SIZE);
3307 if (nfsd4_has_session(&resp->cstate)) { 3307 if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
3308 if (resp->cstate.status == nfserr_replay_cache && 3308 nfsd4_store_cache_entry(resp);
3309 !nfsd4_not_cached(resp)) { 3309 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3310 iov->iov_len = resp->cstate.iovlen; 3310 resp->cstate.slot->sl_inuse = false;
3311 } else { 3311 nfsd4_put_session(resp->cstate.session);
3312 nfsd4_store_cache_entry(resp);
3313 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3314 resp->cstate.slot->sl_inuse = 0;
3315 }
3316 if (resp->cstate.session)
3317 nfsd4_put_session(resp->cstate.session);
3318 } 3312 }
3319 return 1; 3313 return 1;
3320} 3314}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7e906c5b7671..00388d2a3c99 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -174,12 +174,13 @@ static const struct file_operations exports_operations = {
174}; 174};
175 175
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
177 178
178static struct file_operations pool_stats_operations = { 179static struct file_operations pool_stats_operations = {
179 .open = nfsd_pool_stats_open, 180 .open = nfsd_pool_stats_open,
180 .read = seq_read, 181 .read = seq_read,
181 .llseek = seq_lseek, 182 .llseek = seq_lseek,
182 .release = seq_release, 183 .release = nfsd_pool_stats_release,
183 .owner = THIS_MODULE, 184 .owner = THIS_MODULE,
184}; 185};
185 186
@@ -776,10 +777,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
776 size -= len; 777 size -= len;
777 mesg += len; 778 mesg += len;
778 } 779 }
779 780 rv = mesg - buf;
780 mutex_unlock(&nfsd_mutex);
781 return (mesg-buf);
782
783out_free: 781out_free:
784 kfree(nthreads); 782 kfree(nthreads);
785 mutex_unlock(&nfsd_mutex); 783 mutex_unlock(&nfsd_mutex);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8847f3fbfc1e..01965b2f3a76 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -397,44 +397,51 @@ static inline void _fh_update_old(struct dentry *dentry,
397 fh->ofh_dirino = 0; 397 fh->ofh_dirino = 0;
398} 398}
399 399
400__be32 400static bool is_root_export(struct svc_export *exp)
401fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
402 struct svc_fh *ref_fh)
403{ 401{
404 /* ref_fh is a reference file handle. 402 return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
405 * if it is non-null and for the same filesystem, then we should compose 403}
406 * a filehandle which is of the same version, where possible.
407 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
408 * Then create a 32byte filehandle using nfs_fhbase_old
409 *
410 */
411 404
412 u8 version; 405static struct super_block *exp_sb(struct svc_export *exp)
413 u8 fsid_type = 0; 406{
414 struct inode * inode = dentry->d_inode; 407 return exp->ex_path.dentry->d_inode->i_sb;
415 struct dentry *parent = dentry->d_parent; 408}
416 __u32 *datap;
417 dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev;
418 int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root);
419 409
420 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", 410static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
421 MAJOR(ex_dev), MINOR(ex_dev), 411{
422 (long) exp->ex_path.dentry->d_inode->i_ino, 412 switch (fsid_type) {
423 parent->d_name.name, dentry->d_name.name, 413 case FSID_DEV:
424 (inode ? inode->i_ino : 0)); 414 if (!old_valid_dev(exp_sb(exp)->s_dev))
415 return 0;
416 /* FALL THROUGH */
417 case FSID_MAJOR_MINOR:
418 case FSID_ENCODE_DEV:
419 return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
420 case FSID_NUM:
421 return exp->ex_flags & NFSEXP_FSID;
422 case FSID_UUID8:
423 case FSID_UUID16:
424 if (!is_root_export(exp))
425 return 0;
426 /* fall through */
427 case FSID_UUID4_INUM:
428 case FSID_UUID16_INUM:
429 return exp->ex_uuid != NULL;
430 }
431 return 1;
432}
425 433
426 /* Choose filehandle version and fsid type based on 434
427 * the reference filehandle (if it is in the same export) 435static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
428 * or the export options. 436{
429 */ 437 u8 version;
430 retry: 438 u8 fsid_type;
439retry:
431 version = 1; 440 version = 1;
432 if (ref_fh && ref_fh->fh_export == exp) { 441 if (ref_fh && ref_fh->fh_export == exp) {
433 version = ref_fh->fh_handle.fh_version; 442 version = ref_fh->fh_handle.fh_version;
434 fsid_type = ref_fh->fh_handle.fh_fsid_type; 443 fsid_type = ref_fh->fh_handle.fh_fsid_type;
435 444
436 if (ref_fh == fhp)
437 fh_put(ref_fh);
438 ref_fh = NULL; 445 ref_fh = NULL;
439 446
440 switch (version) { 447 switch (version) {
@@ -447,58 +454,66 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
447 goto retry; 454 goto retry;
448 } 455 }
449 456
450 /* Need to check that this type works for this 457 /*
451 * export point. As the fsid -> filesystem mapping 458 * As the fsid -> filesystem mapping was guided by
452 * was guided by user-space, there is no guarantee 459 * user-space, there is no guarantee that the filesystem
453 * that the filesystem actually supports that fsid 460 * actually supports that fsid type. If it doesn't we
454 * type. If it doesn't we loop around again without 461 * loop around again without ref_fh set.
455 * ref_fh set.
456 */ 462 */
457 switch(fsid_type) { 463 if (!fsid_type_ok_for_exp(fsid_type, exp))
458 case FSID_DEV: 464 goto retry;
459 if (!old_valid_dev(ex_dev))
460 goto retry;
461 /* FALL THROUGH */
462 case FSID_MAJOR_MINOR:
463 case FSID_ENCODE_DEV:
464 if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
465 & FS_REQUIRES_DEV))
466 goto retry;
467 break;
468 case FSID_NUM:
469 if (! (exp->ex_flags & NFSEXP_FSID))
470 goto retry;
471 break;
472 case FSID_UUID8:
473 case FSID_UUID16:
474 if (!root_export)
475 goto retry;
476 /* fall through */
477 case FSID_UUID4_INUM:
478 case FSID_UUID16_INUM:
479 if (exp->ex_uuid == NULL)
480 goto retry;
481 break;
482 }
483 } else if (exp->ex_flags & NFSEXP_FSID) { 465 } else if (exp->ex_flags & NFSEXP_FSID) {
484 fsid_type = FSID_NUM; 466 fsid_type = FSID_NUM;
485 } else if (exp->ex_uuid) { 467 } else if (exp->ex_uuid) {
486 if (fhp->fh_maxsize >= 64) { 468 if (fhp->fh_maxsize >= 64) {
487 if (root_export) 469 if (is_root_export(exp))
488 fsid_type = FSID_UUID16; 470 fsid_type = FSID_UUID16;
489 else 471 else
490 fsid_type = FSID_UUID16_INUM; 472 fsid_type = FSID_UUID16_INUM;
491 } else { 473 } else {
492 if (root_export) 474 if (is_root_export(exp))
493 fsid_type = FSID_UUID8; 475 fsid_type = FSID_UUID8;
494 else 476 else
495 fsid_type = FSID_UUID4_INUM; 477 fsid_type = FSID_UUID4_INUM;
496 } 478 }
497 } else if (!old_valid_dev(ex_dev)) 479 } else if (!old_valid_dev(exp_sb(exp)->s_dev))
498 /* for newer device numbers, we must use a newer fsid format */ 480 /* for newer device numbers, we must use a newer fsid format */
499 fsid_type = FSID_ENCODE_DEV; 481 fsid_type = FSID_ENCODE_DEV;
500 else 482 else
501 fsid_type = FSID_DEV; 483 fsid_type = FSID_DEV;
484 fhp->fh_handle.fh_version = version;
485 if (version)
486 fhp->fh_handle.fh_fsid_type = fsid_type;
487}
488
489__be32
490fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
491 struct svc_fh *ref_fh)
492{
493 /* ref_fh is a reference file handle.
494 * if it is non-null and for the same filesystem, then we should compose
495 * a filehandle which is of the same version, where possible.
496 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
497 * Then create a 32byte filehandle using nfs_fhbase_old
498 *
499 */
500
501 struct inode * inode = dentry->d_inode;
502 struct dentry *parent = dentry->d_parent;
503 __u32 *datap;
504 dev_t ex_dev = exp_sb(exp)->s_dev;
505
506 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
507 MAJOR(ex_dev), MINOR(ex_dev),
508 (long) exp->ex_path.dentry->d_inode->i_ino,
509 parent->d_name.name, dentry->d_name.name,
510 (inode ? inode->i_ino : 0));
511
512 /* Choose filehandle version and fsid type based on
513 * the reference filehandle (if it is in the same export)
514 * or the export options.
515 */
516 set_version_and_fsid_type(fhp, exp, ref_fh);
502 517
503 if (ref_fh == fhp) 518 if (ref_fh == fhp)
504 fh_put(ref_fh); 519 fh_put(ref_fh);
@@ -516,7 +531,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
516 fhp->fh_export = exp; 531 fhp->fh_export = exp;
517 cache_get(&exp->h); 532 cache_get(&exp->h);
518 533
519 if (version == 0xca) { 534 if (fhp->fh_handle.fh_version == 0xca) {
520 /* old style filehandle please */ 535 /* old style filehandle please */
521 memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); 536 memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
522 fhp->fh_handle.fh_size = NFS_FHSIZE; 537 fhp->fh_handle.fh_size = NFS_FHSIZE;
@@ -530,22 +545,22 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
530 _fh_update_old(dentry, exp, &fhp->fh_handle); 545 _fh_update_old(dentry, exp, &fhp->fh_handle);
531 } else { 546 } else {
532 int len; 547 int len;
533 fhp->fh_handle.fh_version = 1;
534 fhp->fh_handle.fh_auth_type = 0; 548 fhp->fh_handle.fh_auth_type = 0;
535 datap = fhp->fh_handle.fh_auth+0; 549 datap = fhp->fh_handle.fh_auth+0;
536 fhp->fh_handle.fh_fsid_type = fsid_type; 550 mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
537 mk_fsid(fsid_type, datap, ex_dev,
538 exp->ex_path.dentry->d_inode->i_ino, 551 exp->ex_path.dentry->d_inode->i_ino,
539 exp->ex_fsid, exp->ex_uuid); 552 exp->ex_fsid, exp->ex_uuid);
540 553
541 len = key_len(fsid_type); 554 len = key_len(fhp->fh_handle.fh_fsid_type);
542 datap += len/4; 555 datap += len/4;
543 fhp->fh_handle.fh_size = 4 + len; 556 fhp->fh_handle.fh_size = 4 + len;
544 557
545 if (inode) 558 if (inode)
546 _fh_update(fhp, exp, dentry); 559 _fh_update(fhp, exp, dentry);
547 if (fhp->fh_handle.fh_fileid_type == 255) 560 if (fhp->fh_handle.fh_fileid_type == 255) {
561 fh_put(fhp);
548 return nfserr_opnotsupp; 562 return nfserr_opnotsupp;
563 }
549 } 564 }
550 565
551 return 0; 566 return 0;
@@ -639,8 +654,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
639 case FSID_DEV: 654 case FSID_DEV:
640 case FSID_ENCODE_DEV: 655 case FSID_ENCODE_DEV:
641 case FSID_MAJOR_MINOR: 656 case FSID_MAJOR_MINOR:
642 if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags 657 if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
643 & FS_REQUIRES_DEV)
644 return FSIDSOURCE_DEV; 658 return FSIDSOURCE_DEV;
645 break; 659 break;
646 case FSID_NUM: 660 case FSID_NUM:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 24d58adfe5fd..67ea83eedd43 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -34,6 +34,7 @@
34#include <linux/nfsd/syscall.h> 34#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h> 35#include <linux/lockd/bind.h>
36#include <linux/nfsacl.h> 36#include <linux/nfsacl.h>
37#include <linux/seq_file.h>
37 38
38#define NFSDDBG_FACILITY NFSDDBG_SVC 39#define NFSDDBG_FACILITY NFSDDBG_SVC
39 40
@@ -66,6 +67,16 @@ struct timeval nfssvc_boot;
66DEFINE_MUTEX(nfsd_mutex); 67DEFINE_MUTEX(nfsd_mutex);
67struct svc_serv *nfsd_serv; 68struct svc_serv *nfsd_serv;
68 69
70/*
71 * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
72 * nfsd_drc_max_pages limits the total amount of memory available for
73 * version 4.1 DRC caches.
74 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
75 */
76spinlock_t nfsd_drc_lock;
77unsigned int nfsd_drc_max_mem;
78unsigned int nfsd_drc_mem_used;
79
69#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 80#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
70static struct svc_stat nfsd_acl_svcstats; 81static struct svc_stat nfsd_acl_svcstats;
71static struct svc_version * nfsd_acl_version[] = { 82static struct svc_version * nfsd_acl_version[] = {
@@ -235,13 +246,12 @@ void nfsd_reset_versions(void)
235 */ 246 */
236static void set_max_drc(void) 247static void set_max_drc(void)
237{ 248{
238 /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ 249 #define NFSD_DRC_SIZE_SHIFT 10
239 #define NFSD_DRC_SIZE_SHIFT 7 250 nfsd_drc_max_mem = (nr_free_buffer_pages()
240 nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() 251 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
241 >> NFSD_DRC_SIZE_SHIFT; 252 nfsd_drc_mem_used = 0;
242 nfsd_serv->sv_drc_pages_used = 0; 253 spin_lock_init(&nfsd_drc_lock);
243 dprintk("%s svc_drc_max_pages %u\n", __func__, 254 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
244 nfsd_serv->sv_drc_max_pages);
245} 255}
246 256
247int nfsd_create_serv(void) 257int nfsd_create_serv(void)
@@ -401,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs)
401 error = nfsd_racache_init(2*nrservs); 411 error = nfsd_racache_init(2*nrservs);
402 if (error<0) 412 if (error<0)
403 goto out; 413 goto out;
404 nfs4_state_start(); 414 error = nfs4_state_start();
415 if (error)
416 goto out;
405 417
406 nfsd_reset_versions(); 418 nfsd_reset_versions();
407 419
@@ -569,10 +581,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
569 + rqstp->rq_res.head[0].iov_len; 581 + rqstp->rq_res.head[0].iov_len;
570 rqstp->rq_res.head[0].iov_len += sizeof(__be32); 582 rqstp->rq_res.head[0].iov_len += sizeof(__be32);
571 583
572 /* NFSv4.1 DRC requires statp */
573 if (rqstp->rq_vers == 4)
574 nfsd4_set_statp(rqstp, statp);
575
576 /* Now call the procedure handler, and encode NFS status. */ 584 /* Now call the procedure handler, and encode NFS status. */
577 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 585 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
578 nfserr = map_new_errors(rqstp->rq_vers, nfserr); 586 nfserr = map_new_errors(rqstp->rq_vers, nfserr);
@@ -607,7 +615,25 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
607 615
608int nfsd_pool_stats_open(struct inode *inode, struct file *file) 616int nfsd_pool_stats_open(struct inode *inode, struct file *file)
609{ 617{
610 if (nfsd_serv == NULL) 618 int ret;
619 mutex_lock(&nfsd_mutex);
620 if (nfsd_serv == NULL) {
621 mutex_unlock(&nfsd_mutex);
611 return -ENODEV; 622 return -ENODEV;
612 return svc_pool_stats_open(nfsd_serv, file); 623 }
624 /* bump up the psudo refcount while traversing */
625 svc_get(nfsd_serv);
626 ret = svc_pool_stats_open(nfsd_serv, file);
627 mutex_unlock(&nfsd_mutex);
628 return ret;
629}
630
631int nfsd_pool_stats_release(struct inode *inode, struct file *file)
632{
633 int ret = seq_release(inode, file);
634 mutex_lock(&nfsd_mutex);
635 /* this function really, really should have been called svc_put() */
636 svc_destroy(nfsd_serv);
637 mutex_unlock(&nfsd_mutex);
638 return ret;
613} 639}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8fa09bfbcba7..a293f0273263 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -89,6 +89,12 @@ struct raparm_hbucket {
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
91 91
92static inline int
93nfsd_v4client(struct svc_rqst *rq)
94{
95 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
96}
97
92/* 98/*
93 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
94 * a mount point. 100 * a mount point.
@@ -115,7 +121,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
115 path_put(&path); 121 path_put(&path);
116 goto out; 122 goto out;
117 } 123 }
118 if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 124 if (nfsd_v4client(rqstp) ||
125 (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
119 /* successfully crossed mount point */ 126 /* successfully crossed mount point */
120 /* 127 /*
121 * This is subtle: path.dentry is *not* on path.mnt 128 * This is subtle: path.dentry is *not* on path.mnt
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index c668bca579c1..6a2711f4c321 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -46,7 +46,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc)
46 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); 46 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
47} 47}
48 48
49static struct address_space_operations def_btnode_aops = { 49static const struct address_space_operations def_btnode_aops = {
50 .sync_page = block_sync_page, 50 .sync_page = block_sync_page,
51}; 51};
52 52
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 6bd84a0d8238..fc8278c77cdd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -151,7 +151,7 @@ struct file_operations nilfs_file_operations = {
151 .splice_read = generic_file_splice_read, 151 .splice_read = generic_file_splice_read,
152}; 152};
153 153
154struct inode_operations nilfs_file_inode_operations = { 154const struct inode_operations nilfs_file_inode_operations = {
155 .truncate = nilfs_truncate, 155 .truncate = nilfs_truncate,
156 .setattr = nilfs_setattr, 156 .setattr = nilfs_setattr,
157 .permission = nilfs_permission, 157 .permission = nilfs_permission,
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1b3c2bb20da9..e6de0a27ab5d 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -52,7 +52,7 @@
52#include "dat.h" 52#include "dat.h"
53#include "ifile.h" 53#include "ifile.h"
54 54
55static struct address_space_operations def_gcinode_aops = { 55static const struct address_space_operations def_gcinode_aops = {
56 .sync_page = block_sync_page, 56 .sync_page = block_sync_page,
57}; 57};
58 58
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 807e584b163d..2d2c501deb54 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -238,7 +238,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
238 return size; 238 return size;
239} 239}
240 240
241struct address_space_operations nilfs_aops = { 241const struct address_space_operations nilfs_aops = {
242 .writepage = nilfs_writepage, 242 .writepage = nilfs_writepage,
243 .readpage = nilfs_readpage, 243 .readpage = nilfs_readpage,
244 .sync_page = block_sync_page, 244 .sync_page = block_sync_page,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 156bf6091a96..b18c4998f8d0 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -427,12 +427,12 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
427} 427}
428 428
429 429
430static struct address_space_operations def_mdt_aops = { 430static const struct address_space_operations def_mdt_aops = {
431 .writepage = nilfs_mdt_write_page, 431 .writepage = nilfs_mdt_write_page,
432 .sync_page = block_sync_page, 432 .sync_page = block_sync_page,
433}; 433};
434 434
435static struct inode_operations def_mdt_iops; 435static const struct inode_operations def_mdt_iops;
436static struct file_operations def_mdt_fops; 436static struct file_operations def_mdt_fops;
437 437
438/* 438/*
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index df70dadb336f..ed02e886fa79 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -448,7 +448,7 @@ out:
448 return err; 448 return err;
449} 449}
450 450
451struct inode_operations nilfs_dir_inode_operations = { 451const struct inode_operations nilfs_dir_inode_operations = {
452 .create = nilfs_create, 452 .create = nilfs_create,
453 .lookup = nilfs_lookup, 453 .lookup = nilfs_lookup,
454 .link = nilfs_link, 454 .link = nilfs_link,
@@ -462,12 +462,12 @@ struct inode_operations nilfs_dir_inode_operations = {
462 .permission = nilfs_permission, 462 .permission = nilfs_permission,
463}; 463};
464 464
465struct inode_operations nilfs_special_inode_operations = { 465const struct inode_operations nilfs_special_inode_operations = {
466 .setattr = nilfs_setattr, 466 .setattr = nilfs_setattr,
467 .permission = nilfs_permission, 467 .permission = nilfs_permission,
468}; 468};
469 469
470struct inode_operations nilfs_symlink_inode_operations = { 470const struct inode_operations nilfs_symlink_inode_operations = {
471 .readlink = generic_readlink, 471 .readlink = generic_readlink,
472 .follow_link = page_follow_link_light, 472 .follow_link = page_follow_link_light,
473 .put_link = page_put_link, 473 .put_link = page_put_link,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 724c63766e82..bad7368782d0 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -295,12 +295,12 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *);
295 * Inodes and files operations 295 * Inodes and files operations
296 */ 296 */
297extern struct file_operations nilfs_dir_operations; 297extern struct file_operations nilfs_dir_operations;
298extern struct inode_operations nilfs_file_inode_operations; 298extern const struct inode_operations nilfs_file_inode_operations;
299extern struct file_operations nilfs_file_operations; 299extern struct file_operations nilfs_file_operations;
300extern struct address_space_operations nilfs_aops; 300extern const struct address_space_operations nilfs_aops;
301extern struct inode_operations nilfs_dir_inode_operations; 301extern const struct inode_operations nilfs_dir_inode_operations;
302extern struct inode_operations nilfs_special_inode_operations; 302extern const struct inode_operations nilfs_special_inode_operations;
303extern struct inode_operations nilfs_symlink_inode_operations; 303extern const struct inode_operations nilfs_symlink_inode_operations;
304 304
305/* 305/*
306 * filesystem type 306 * filesystem type
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 55f3d6b60732..644e66727dd0 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -504,7 +504,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
504 return 0; 504 return 0;
505} 505}
506 506
507static struct super_operations nilfs_sops = { 507static const struct super_operations nilfs_sops = {
508 .alloc_inode = nilfs_alloc_inode, 508 .alloc_inode = nilfs_alloc_inode,
509 .destroy_inode = nilfs_destroy_inode, 509 .destroy_inode = nilfs_destroy_inode,
510 .dirty_inode = nilfs_dirty_inode, 510 .dirty_inode = nilfs_dirty_inode,
@@ -560,7 +560,7 @@ nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len,
560 nilfs_nfs_get_inode); 560 nilfs_nfs_get_inode);
561} 561}
562 562
563static struct export_operations nilfs_export_ops = { 563static const struct export_operations nilfs_export_ops = {
564 .fh_to_dentry = nilfs_fh_to_dentry, 564 .fh_to_dentry = nilfs_fh_to_dentry,
565 .fh_to_parent = nilfs_fh_to_parent, 565 .fh_to_parent = nilfs_fh_to_parent,
566 .get_parent = nilfs_get_parent, 566 .get_parent = nilfs_get_parent,
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 4350d4993b18..663c0e341f8b 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2146,46 +2146,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2146} 2146}
2147 2147
2148/** 2148/**
2149 * ntfs_file_writev -
2150 *
2151 * Basically the same as generic_file_writev() except that it ends up calling
2152 * ntfs_file_aio_write_nolock() instead of __generic_file_aio_write_nolock().
2153 */
2154static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
2155 unsigned long nr_segs, loff_t *ppos)
2156{
2157 struct address_space *mapping = file->f_mapping;
2158 struct inode *inode = mapping->host;
2159 struct kiocb kiocb;
2160 ssize_t ret;
2161
2162 mutex_lock(&inode->i_mutex);
2163 init_sync_kiocb(&kiocb, file);
2164 ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
2165 if (ret == -EIOCBQUEUED)
2166 ret = wait_on_sync_kiocb(&kiocb);
2167 mutex_unlock(&inode->i_mutex);
2168 if (ret > 0) {
2169 int err = generic_write_sync(file, *ppos - ret, ret);
2170 if (err < 0)
2171 ret = err;
2172 }
2173 return ret;
2174}
2175
2176/**
2177 * ntfs_file_write - simple wrapper for ntfs_file_writev()
2178 */
2179static ssize_t ntfs_file_write(struct file *file, const char __user *buf,
2180 size_t count, loff_t *ppos)
2181{
2182 struct iovec local_iov = { .iov_base = (void __user *)buf,
2183 .iov_len = count };
2184
2185 return ntfs_file_writev(file, &local_iov, 1, ppos);
2186}
2187
2188/**
2189 * ntfs_file_fsync - sync a file to disk 2149 * ntfs_file_fsync - sync a file to disk
2190 * @filp: file to be synced 2150 * @filp: file to be synced
2191 * @dentry: dentry describing the file to sync 2151 * @dentry: dentry describing the file to sync
@@ -2247,7 +2207,7 @@ const struct file_operations ntfs_file_ops = {
2247 .read = do_sync_read, /* Read from file. */ 2207 .read = do_sync_read, /* Read from file. */
2248 .aio_read = generic_file_aio_read, /* Async read from file. */ 2208 .aio_read = generic_file_aio_read, /* Async read from file. */
2249#ifdef NTFS_RW 2209#ifdef NTFS_RW
2250 .write = ntfs_file_write, /* Write to file. */ 2210 .write = do_sync_write, /* Write to file. */
2251 .aio_write = ntfs_file_aio_write, /* Async write to file. */ 2211 .aio_write = ntfs_file_aio_write, /* Async write to file. */
2252 /*.release = ,*/ /* Last file is closed. See 2212 /*.release = ,*/ /* Last file is closed. See
2253 fs/ext2/file.c:: 2213 fs/ext2/file.c::
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 50931b1ce4b9..8b2549f672bf 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -829,7 +829,7 @@ enum {
829 /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the 829 /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
830 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, 830 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
831 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask 831 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask
832 is used to to obtain all flags that are valid for setting. */ 832 is used to obtain all flags that are valid for setting. */
833 /* 833 /*
834 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all 834 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
835 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION 835 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index cd0be3f5c3cd..a44b14cbceeb 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
47 return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); 47 return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
48 /* return (void *)__get_free_page(gfp_mask); */ 48 /* return (void *)__get_free_page(gfp_mask); */
49 } 49 }
50 if (likely(size >> PAGE_SHIFT < num_physpages)) 50 if (likely((size >> PAGE_SHIFT) < totalram_pages))
51 return __vmalloc(size, gfp_mask, PAGE_KERNEL); 51 return __vmalloc(size, gfp_mask, PAGE_KERNEL);
52 return NULL; 52 return NULL;
53} 53}
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 01596079dd63..31f25ce32c97 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -28,6 +28,7 @@ ocfs2-objs := \
28 locks.o \ 28 locks.o \
29 mmap.o \ 29 mmap.o \
30 namei.o \ 30 namei.o \
31 refcounttree.o \
31 resize.o \ 32 resize.o \
32 slot_map.o \ 33 slot_map.o \
33 suballoc.o \ 34 suballoc.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ab513ddaeff2..38a42f5d59ff 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,10 +49,21 @@
49#include "super.h" 49#include "super.h"
50#include "uptodate.h" 50#include "uptodate.h"
51#include "xattr.h" 51#include "xattr.h"
52#include "refcounttree.h"
52 53
53#include "buffer_head_io.h" 54#include "buffer_head_io.h"
54 55
56enum ocfs2_contig_type {
57 CONTIG_NONE = 0,
58 CONTIG_LEFT,
59 CONTIG_RIGHT,
60 CONTIG_LEFTRIGHT,
61};
55 62
63static enum ocfs2_contig_type
64 ocfs2_extent_rec_contig(struct super_block *sb,
65 struct ocfs2_extent_rec *ext,
66 struct ocfs2_extent_rec *insert_rec);
56/* 67/*
57 * Operations for a specific extent tree type. 68 * Operations for a specific extent tree type.
58 * 69 *
@@ -79,18 +90,30 @@ struct ocfs2_extent_tree_operations {
79 * that value. new_clusters is the delta, and must be 90 * that value. new_clusters is the delta, and must be
80 * added to the total. Required. 91 * added to the total. Required.
81 */ 92 */
82 void (*eo_update_clusters)(struct inode *inode, 93 void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
83 struct ocfs2_extent_tree *et,
84 u32 new_clusters); 94 u32 new_clusters);
85 95
86 /* 96 /*
97 * If this extent tree is supported by an extent map, insert
98 * a record into the map.
99 */
100 void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et,
101 struct ocfs2_extent_rec *rec);
102
103 /*
104 * If this extent tree is supported by an extent map, truncate the
105 * map to clusters,
106 */
107 void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et,
108 u32 clusters);
109
110 /*
87 * If ->eo_insert_check() exists, it is called before rec is 111 * If ->eo_insert_check() exists, it is called before rec is
88 * inserted into the extent tree. It is optional. 112 * inserted into the extent tree. It is optional.
89 */ 113 */
90 int (*eo_insert_check)(struct inode *inode, 114 int (*eo_insert_check)(struct ocfs2_extent_tree *et,
91 struct ocfs2_extent_tree *et,
92 struct ocfs2_extent_rec *rec); 115 struct ocfs2_extent_rec *rec);
93 int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et); 116 int (*eo_sanity_check)(struct ocfs2_extent_tree *et);
94 117
95 /* 118 /*
96 * -------------------------------------------------------------- 119 * --------------------------------------------------------------
@@ -109,8 +132,17 @@ struct ocfs2_extent_tree_operations {
109 * it exists. If it does not, et->et_max_leaf_clusters is set 132 * it exists. If it does not, et->et_max_leaf_clusters is set
110 * to 0 (unlimited). Optional. 133 * to 0 (unlimited). Optional.
111 */ 134 */
112 void (*eo_fill_max_leaf_clusters)(struct inode *inode, 135 void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
113 struct ocfs2_extent_tree *et); 136
137 /*
138 * ->eo_extent_contig test whether the 2 ocfs2_extent_rec
139 * are contiguous or not. Optional. Don't need to set it if use
140 * ocfs2_extent_rec as the tree leaf.
141 */
142 enum ocfs2_contig_type
143 (*eo_extent_contig)(struct ocfs2_extent_tree *et,
144 struct ocfs2_extent_rec *ext,
145 struct ocfs2_extent_rec *insert_rec);
114}; 146};
115 147
116 148
@@ -121,19 +153,22 @@ struct ocfs2_extent_tree_operations {
121static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et); 153static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
122static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, 154static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
123 u64 blkno); 155 u64 blkno);
124static void ocfs2_dinode_update_clusters(struct inode *inode, 156static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
125 struct ocfs2_extent_tree *et,
126 u32 clusters); 157 u32 clusters);
127static int ocfs2_dinode_insert_check(struct inode *inode, 158static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
128 struct ocfs2_extent_tree *et, 159 struct ocfs2_extent_rec *rec);
160static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
161 u32 clusters);
162static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
129 struct ocfs2_extent_rec *rec); 163 struct ocfs2_extent_rec *rec);
130static int ocfs2_dinode_sanity_check(struct inode *inode, 164static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
131 struct ocfs2_extent_tree *et);
132static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); 165static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
133static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { 166static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
134 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, 167 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
135 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, 168 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
136 .eo_update_clusters = ocfs2_dinode_update_clusters, 169 .eo_update_clusters = ocfs2_dinode_update_clusters,
170 .eo_extent_map_insert = ocfs2_dinode_extent_map_insert,
171 .eo_extent_map_truncate = ocfs2_dinode_extent_map_truncate,
137 .eo_insert_check = ocfs2_dinode_insert_check, 172 .eo_insert_check = ocfs2_dinode_insert_check,
138 .eo_sanity_check = ocfs2_dinode_sanity_check, 173 .eo_sanity_check = ocfs2_dinode_sanity_check,
139 .eo_fill_root_el = ocfs2_dinode_fill_root_el, 174 .eo_fill_root_el = ocfs2_dinode_fill_root_el,
@@ -156,40 +191,53 @@ static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
156 return le64_to_cpu(di->i_last_eb_blk); 191 return le64_to_cpu(di->i_last_eb_blk);
157} 192}
158 193
159static void ocfs2_dinode_update_clusters(struct inode *inode, 194static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
160 struct ocfs2_extent_tree *et,
161 u32 clusters) 195 u32 clusters)
162{ 196{
197 struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
163 struct ocfs2_dinode *di = et->et_object; 198 struct ocfs2_dinode *di = et->et_object;
164 199
165 le32_add_cpu(&di->i_clusters, clusters); 200 le32_add_cpu(&di->i_clusters, clusters);
166 spin_lock(&OCFS2_I(inode)->ip_lock); 201 spin_lock(&oi->ip_lock);
167 OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); 202 oi->ip_clusters = le32_to_cpu(di->i_clusters);
168 spin_unlock(&OCFS2_I(inode)->ip_lock); 203 spin_unlock(&oi->ip_lock);
169} 204}
170 205
171static int ocfs2_dinode_insert_check(struct inode *inode, 206static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
172 struct ocfs2_extent_tree *et, 207 struct ocfs2_extent_rec *rec)
208{
209 struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
210
211 ocfs2_extent_map_insert_rec(inode, rec);
212}
213
214static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
215 u32 clusters)
216{
217 struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
218
219 ocfs2_extent_map_trunc(inode, clusters);
220}
221
222static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
173 struct ocfs2_extent_rec *rec) 223 struct ocfs2_extent_rec *rec)
174{ 224{
175 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 225 struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
226 struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb);
176 227
177 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); 228 BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL);
178 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && 229 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
179 (OCFS2_I(inode)->ip_clusters != 230 (oi->ip_clusters != le32_to_cpu(rec->e_cpos)),
180 le32_to_cpu(rec->e_cpos)),
181 "Device %s, asking for sparse allocation: inode %llu, " 231 "Device %s, asking for sparse allocation: inode %llu, "
182 "cpos %u, clusters %u\n", 232 "cpos %u, clusters %u\n",
183 osb->dev_str, 233 osb->dev_str,
184 (unsigned long long)OCFS2_I(inode)->ip_blkno, 234 (unsigned long long)oi->ip_blkno,
185 rec->e_cpos, 235 rec->e_cpos, oi->ip_clusters);
186 OCFS2_I(inode)->ip_clusters);
187 236
188 return 0; 237 return 0;
189} 238}
190 239
191static int ocfs2_dinode_sanity_check(struct inode *inode, 240static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et)
192 struct ocfs2_extent_tree *et)
193{ 241{
194 struct ocfs2_dinode *di = et->et_object; 242 struct ocfs2_dinode *di = et->et_object;
195 243
@@ -229,8 +277,7 @@ static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
229 return le64_to_cpu(vb->vb_xv->xr_last_eb_blk); 277 return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
230} 278}
231 279
232static void ocfs2_xattr_value_update_clusters(struct inode *inode, 280static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
233 struct ocfs2_extent_tree *et,
234 u32 clusters) 281 u32 clusters)
235{ 282{
236 struct ocfs2_xattr_value_buf *vb = et->et_object; 283 struct ocfs2_xattr_value_buf *vb = et->et_object;
@@ -252,12 +299,11 @@ static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
252 et->et_root_el = &xb->xb_attrs.xb_root.xt_list; 299 et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
253} 300}
254 301
255static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode, 302static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et)
256 struct ocfs2_extent_tree *et)
257{ 303{
304 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
258 et->et_max_leaf_clusters = 305 et->et_max_leaf_clusters =
259 ocfs2_clusters_for_bytes(inode->i_sb, 306 ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
260 OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
261} 307}
262 308
263static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, 309static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
@@ -277,8 +323,7 @@ static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
277 return le64_to_cpu(xt->xt_last_eb_blk); 323 return le64_to_cpu(xt->xt_last_eb_blk);
278} 324}
279 325
280static void ocfs2_xattr_tree_update_clusters(struct inode *inode, 326static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
281 struct ocfs2_extent_tree *et,
282 u32 clusters) 327 u32 clusters)
283{ 328{
284 struct ocfs2_xattr_block *xb = et->et_object; 329 struct ocfs2_xattr_block *xb = et->et_object;
@@ -309,8 +354,7 @@ static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
309 return le64_to_cpu(dx_root->dr_last_eb_blk); 354 return le64_to_cpu(dx_root->dr_last_eb_blk);
310} 355}
311 356
312static void ocfs2_dx_root_update_clusters(struct inode *inode, 357static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
313 struct ocfs2_extent_tree *et,
314 u32 clusters) 358 u32 clusters)
315{ 359{
316 struct ocfs2_dx_root_block *dx_root = et->et_object; 360 struct ocfs2_dx_root_block *dx_root = et->et_object;
@@ -318,8 +362,7 @@ static void ocfs2_dx_root_update_clusters(struct inode *inode,
318 le32_add_cpu(&dx_root->dr_clusters, clusters); 362 le32_add_cpu(&dx_root->dr_clusters, clusters);
319} 363}
320 364
321static int ocfs2_dx_root_sanity_check(struct inode *inode, 365static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
322 struct ocfs2_extent_tree *et)
323{ 366{
324 struct ocfs2_dx_root_block *dx_root = et->et_object; 367 struct ocfs2_dx_root_block *dx_root = et->et_object;
325 368
@@ -343,8 +386,54 @@ static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
343 .eo_fill_root_el = ocfs2_dx_root_fill_root_el, 386 .eo_fill_root_el = ocfs2_dx_root_fill_root_el,
344}; 387};
345 388
389static void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et)
390{
391 struct ocfs2_refcount_block *rb = et->et_object;
392
393 et->et_root_el = &rb->rf_list;
394}
395
396static void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
397 u64 blkno)
398{
399 struct ocfs2_refcount_block *rb = et->et_object;
400
401 rb->rf_last_eb_blk = cpu_to_le64(blkno);
402}
403
404static u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
405{
406 struct ocfs2_refcount_block *rb = et->et_object;
407
408 return le64_to_cpu(rb->rf_last_eb_blk);
409}
410
411static void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et,
412 u32 clusters)
413{
414 struct ocfs2_refcount_block *rb = et->et_object;
415
416 le32_add_cpu(&rb->rf_clusters, clusters);
417}
418
419static enum ocfs2_contig_type
420ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
421 struct ocfs2_extent_rec *ext,
422 struct ocfs2_extent_rec *insert_rec)
423{
424 return CONTIG_NONE;
425}
426
427static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
428 .eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk,
429 .eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk,
430 .eo_update_clusters = ocfs2_refcount_tree_update_clusters,
431 .eo_fill_root_el = ocfs2_refcount_tree_fill_root_el,
432 .eo_extent_contig = ocfs2_refcount_tree_extent_contig,
433};
434
346static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, 435static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
347 struct inode *inode, 436 struct ocfs2_caching_info *ci,
348 struct buffer_head *bh, 437 struct buffer_head *bh,
349 ocfs2_journal_access_func access, 438 ocfs2_journal_access_func access,
350 void *obj, 439 void *obj,
@@ -352,6 +441,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
352{ 441{
353 et->et_ops = ops; 442 et->et_ops = ops;
354 et->et_root_bh = bh; 443 et->et_root_bh = bh;
444 et->et_ci = ci;
355 et->et_root_journal_access = access; 445 et->et_root_journal_access = access;
356 if (!obj) 446 if (!obj)
357 obj = (void *)bh->b_data; 447 obj = (void *)bh->b_data;
@@ -361,41 +451,49 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
361 if (!et->et_ops->eo_fill_max_leaf_clusters) 451 if (!et->et_ops->eo_fill_max_leaf_clusters)
362 et->et_max_leaf_clusters = 0; 452 et->et_max_leaf_clusters = 0;
363 else 453 else
364 et->et_ops->eo_fill_max_leaf_clusters(inode, et); 454 et->et_ops->eo_fill_max_leaf_clusters(et);
365} 455}
366 456
367void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, 457void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
368 struct inode *inode, 458 struct ocfs2_caching_info *ci,
369 struct buffer_head *bh) 459 struct buffer_head *bh)
370{ 460{
371 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di, 461 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di,
372 NULL, &ocfs2_dinode_et_ops); 462 NULL, &ocfs2_dinode_et_ops);
373} 463}
374 464
375void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 465void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
376 struct inode *inode, 466 struct ocfs2_caching_info *ci,
377 struct buffer_head *bh) 467 struct buffer_head *bh)
378{ 468{
379 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb, 469 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb,
380 NULL, &ocfs2_xattr_tree_et_ops); 470 NULL, &ocfs2_xattr_tree_et_ops);
381} 471}
382 472
383void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 473void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
384 struct inode *inode, 474 struct ocfs2_caching_info *ci,
385 struct ocfs2_xattr_value_buf *vb) 475 struct ocfs2_xattr_value_buf *vb)
386{ 476{
387 __ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb, 477 __ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb,
388 &ocfs2_xattr_value_et_ops); 478 &ocfs2_xattr_value_et_ops);
389} 479}
390 480
391void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, 481void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
392 struct inode *inode, 482 struct ocfs2_caching_info *ci,
393 struct buffer_head *bh) 483 struct buffer_head *bh)
394{ 484{
395 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr, 485 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr,
396 NULL, &ocfs2_dx_root_et_ops); 486 NULL, &ocfs2_dx_root_et_ops);
397} 487}
398 488
489void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
490 struct ocfs2_caching_info *ci,
491 struct buffer_head *bh)
492{
493 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb,
494 NULL, &ocfs2_refcount_tree_et_ops);
495}
496
399static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, 497static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
400 u64 new_last_eb_blk) 498 u64 new_last_eb_blk)
401{ 499{
@@ -407,78 +505,71 @@ static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
407 return et->et_ops->eo_get_last_eb_blk(et); 505 return et->et_ops->eo_get_last_eb_blk(et);
408} 506}
409 507
410static inline void ocfs2_et_update_clusters(struct inode *inode, 508static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
411 struct ocfs2_extent_tree *et,
412 u32 clusters) 509 u32 clusters)
413{ 510{
414 et->et_ops->eo_update_clusters(inode, et, clusters); 511 et->et_ops->eo_update_clusters(et, clusters);
512}
513
514static inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et,
515 struct ocfs2_extent_rec *rec)
516{
517 if (et->et_ops->eo_extent_map_insert)
518 et->et_ops->eo_extent_map_insert(et, rec);
519}
520
521static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
522 u32 clusters)
523{
524 if (et->et_ops->eo_extent_map_truncate)
525 et->et_ops->eo_extent_map_truncate(et, clusters);
415} 526}
416 527
417static inline int ocfs2_et_root_journal_access(handle_t *handle, 528static inline int ocfs2_et_root_journal_access(handle_t *handle,
418 struct inode *inode,
419 struct ocfs2_extent_tree *et, 529 struct ocfs2_extent_tree *et,
420 int type) 530 int type)
421{ 531{
422 return et->et_root_journal_access(handle, inode, et->et_root_bh, 532 return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh,
423 type); 533 type);
424} 534}
425 535
426static inline int ocfs2_et_insert_check(struct inode *inode, 536static inline enum ocfs2_contig_type
427 struct ocfs2_extent_tree *et, 537 ocfs2_et_extent_contig(struct ocfs2_extent_tree *et,
538 struct ocfs2_extent_rec *rec,
539 struct ocfs2_extent_rec *insert_rec)
540{
541 if (et->et_ops->eo_extent_contig)
542 return et->et_ops->eo_extent_contig(et, rec, insert_rec);
543
544 return ocfs2_extent_rec_contig(
545 ocfs2_metadata_cache_get_super(et->et_ci),
546 rec, insert_rec);
547}
548
549static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
428 struct ocfs2_extent_rec *rec) 550 struct ocfs2_extent_rec *rec)
429{ 551{
430 int ret = 0; 552 int ret = 0;
431 553
432 if (et->et_ops->eo_insert_check) 554 if (et->et_ops->eo_insert_check)
433 ret = et->et_ops->eo_insert_check(inode, et, rec); 555 ret = et->et_ops->eo_insert_check(et, rec);
434 return ret; 556 return ret;
435} 557}
436 558
437static inline int ocfs2_et_sanity_check(struct inode *inode, 559static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
438 struct ocfs2_extent_tree *et)
439{ 560{
440 int ret = 0; 561 int ret = 0;
441 562
442 if (et->et_ops->eo_sanity_check) 563 if (et->et_ops->eo_sanity_check)
443 ret = et->et_ops->eo_sanity_check(inode, et); 564 ret = et->et_ops->eo_sanity_check(et);
444 return ret; 565 return ret;
445} 566}
446 567
447static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 568static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
448static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 569static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
449 struct ocfs2_extent_block *eb); 570 struct ocfs2_extent_block *eb);
450 571static void ocfs2_adjust_rightmost_records(handle_t *handle,
451/* 572 struct ocfs2_extent_tree *et,
452 * Structures which describe a path through a btree, and functions to
453 * manipulate them.
454 *
455 * The idea here is to be as generic as possible with the tree
456 * manipulation code.
457 */
458struct ocfs2_path_item {
459 struct buffer_head *bh;
460 struct ocfs2_extent_list *el;
461};
462
463#define OCFS2_MAX_PATH_DEPTH 5
464
465struct ocfs2_path {
466 int p_tree_depth;
467 ocfs2_journal_access_func p_root_access;
468 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
469};
470
471#define path_root_bh(_path) ((_path)->p_node[0].bh)
472#define path_root_el(_path) ((_path)->p_node[0].el)
473#define path_root_access(_path)((_path)->p_root_access)
474#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
475#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
476#define path_num_items(_path) ((_path)->p_tree_depth + 1)
477
478static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
479 u32 cpos);
480static void ocfs2_adjust_rightmost_records(struct inode *inode,
481 handle_t *handle,
482 struct ocfs2_path *path, 573 struct ocfs2_path *path,
483 struct ocfs2_extent_rec *insert_rec); 574 struct ocfs2_extent_rec *insert_rec);
484/* 575/*
@@ -486,7 +577,7 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
486 * to build another path. Generally, this involves freeing the buffer 577 * to build another path. Generally, this involves freeing the buffer
487 * heads. 578 * heads.
488 */ 579 */
489static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) 580void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
490{ 581{
491 int i, start = 0, depth = 0; 582 int i, start = 0, depth = 0;
492 struct ocfs2_path_item *node; 583 struct ocfs2_path_item *node;
@@ -515,7 +606,7 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
515 path->p_tree_depth = depth; 606 path->p_tree_depth = depth;
516} 607}
517 608
518static void ocfs2_free_path(struct ocfs2_path *path) 609void ocfs2_free_path(struct ocfs2_path *path)
519{ 610{
520 if (path) { 611 if (path) {
521 ocfs2_reinit_path(path, 0); 612 ocfs2_reinit_path(path, 0);
@@ -613,13 +704,13 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
613 return path; 704 return path;
614} 705}
615 706
616static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path) 707struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
617{ 708{
618 return ocfs2_new_path(path_root_bh(path), path_root_el(path), 709 return ocfs2_new_path(path_root_bh(path), path_root_el(path),
619 path_root_access(path)); 710 path_root_access(path));
620} 711}
621 712
622static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) 713struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
623{ 714{
624 return ocfs2_new_path(et->et_root_bh, et->et_root_el, 715 return ocfs2_new_path(et->et_root_bh, et->et_root_el,
625 et->et_root_journal_access); 716 et->et_root_journal_access);
@@ -632,10 +723,10 @@ static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
632 * I don't like the way this function's name looks next to 723 * I don't like the way this function's name looks next to
633 * ocfs2_journal_access_path(), but I don't have a better one. 724 * ocfs2_journal_access_path(), but I don't have a better one.
634 */ 725 */
635static int ocfs2_path_bh_journal_access(handle_t *handle, 726int ocfs2_path_bh_journal_access(handle_t *handle,
636 struct inode *inode, 727 struct ocfs2_caching_info *ci,
637 struct ocfs2_path *path, 728 struct ocfs2_path *path,
638 int idx) 729 int idx)
639{ 730{
640 ocfs2_journal_access_func access = path_root_access(path); 731 ocfs2_journal_access_func access = path_root_access(path);
641 732
@@ -645,15 +736,16 @@ static int ocfs2_path_bh_journal_access(handle_t *handle,
645 if (idx) 736 if (idx)
646 access = ocfs2_journal_access_eb; 737 access = ocfs2_journal_access_eb;
647 738
648 return access(handle, inode, path->p_node[idx].bh, 739 return access(handle, ci, path->p_node[idx].bh,
649 OCFS2_JOURNAL_ACCESS_WRITE); 740 OCFS2_JOURNAL_ACCESS_WRITE);
650} 741}
651 742
652/* 743/*
653 * Convenience function to journal all components in a path. 744 * Convenience function to journal all components in a path.
654 */ 745 */
655static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, 746int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
656 struct ocfs2_path *path) 747 handle_t *handle,
748 struct ocfs2_path *path)
657{ 749{
658 int i, ret = 0; 750 int i, ret = 0;
659 751
@@ -661,7 +753,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
661 goto out; 753 goto out;
662 754
663 for(i = 0; i < path_num_items(path); i++) { 755 for(i = 0; i < path_num_items(path); i++) {
664 ret = ocfs2_path_bh_journal_access(handle, inode, path, i); 756 ret = ocfs2_path_bh_journal_access(handle, ci, path, i);
665 if (ret < 0) { 757 if (ret < 0) {
666 mlog_errno(ret); 758 mlog_errno(ret);
667 goto out; 759 goto out;
@@ -702,17 +794,9 @@ int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
702 return ret; 794 return ret;
703} 795}
704 796
705enum ocfs2_contig_type {
706 CONTIG_NONE = 0,
707 CONTIG_LEFT,
708 CONTIG_RIGHT,
709 CONTIG_LEFTRIGHT,
710};
711
712
713/* 797/*
714 * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and 798 * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
715 * ocfs2_extent_contig only work properly against leaf nodes! 799 * ocfs2_extent_rec_contig only work properly against leaf nodes!
716 */ 800 */
717static int ocfs2_block_extent_contig(struct super_block *sb, 801static int ocfs2_block_extent_contig(struct super_block *sb,
718 struct ocfs2_extent_rec *ext, 802 struct ocfs2_extent_rec *ext,
@@ -738,9 +822,9 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
738} 822}
739 823
740static enum ocfs2_contig_type 824static enum ocfs2_contig_type
741 ocfs2_extent_contig(struct inode *inode, 825 ocfs2_extent_rec_contig(struct super_block *sb,
742 struct ocfs2_extent_rec *ext, 826 struct ocfs2_extent_rec *ext,
743 struct ocfs2_extent_rec *insert_rec) 827 struct ocfs2_extent_rec *insert_rec)
744{ 828{
745 u64 blkno = le64_to_cpu(insert_rec->e_blkno); 829 u64 blkno = le64_to_cpu(insert_rec->e_blkno);
746 830
@@ -753,12 +837,12 @@ static enum ocfs2_contig_type
753 return CONTIG_NONE; 837 return CONTIG_NONE;
754 838
755 if (ocfs2_extents_adjacent(ext, insert_rec) && 839 if (ocfs2_extents_adjacent(ext, insert_rec) &&
756 ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) 840 ocfs2_block_extent_contig(sb, ext, blkno))
757 return CONTIG_RIGHT; 841 return CONTIG_RIGHT;
758 842
759 blkno = le64_to_cpu(ext->e_blkno); 843 blkno = le64_to_cpu(ext->e_blkno);
760 if (ocfs2_extents_adjacent(insert_rec, ext) && 844 if (ocfs2_extents_adjacent(insert_rec, ext) &&
761 ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno)) 845 ocfs2_block_extent_contig(sb, insert_rec, blkno))
762 return CONTIG_LEFT; 846 return CONTIG_LEFT;
763 847
764 return CONTIG_NONE; 848 return CONTIG_NONE;
@@ -853,13 +937,13 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
853 return 0; 937 return 0;
854} 938}
855 939
856int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, 940int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
857 struct buffer_head **bh) 941 struct buffer_head **bh)
858{ 942{
859 int rc; 943 int rc;
860 struct buffer_head *tmp = *bh; 944 struct buffer_head *tmp = *bh;
861 945
862 rc = ocfs2_read_block(inode, eb_blkno, &tmp, 946 rc = ocfs2_read_block(ci, eb_blkno, &tmp,
863 ocfs2_validate_extent_block); 947 ocfs2_validate_extent_block);
864 948
865 /* If ocfs2_read_block() got us a new bh, pass it up. */ 949 /* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -874,7 +958,6 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
874 * How many free extents have we got before we need more meta data? 958 * How many free extents have we got before we need more meta data?
875 */ 959 */
876int ocfs2_num_free_extents(struct ocfs2_super *osb, 960int ocfs2_num_free_extents(struct ocfs2_super *osb,
877 struct inode *inode,
878 struct ocfs2_extent_tree *et) 961 struct ocfs2_extent_tree *et)
879{ 962{
880 int retval; 963 int retval;
@@ -889,7 +972,8 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
889 last_eb_blk = ocfs2_et_get_last_eb_blk(et); 972 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
890 973
891 if (last_eb_blk) { 974 if (last_eb_blk) {
892 retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); 975 retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk,
976 &eb_bh);
893 if (retval < 0) { 977 if (retval < 0) {
894 mlog_errno(retval); 978 mlog_errno(retval);
895 goto bail; 979 goto bail;
@@ -913,9 +997,8 @@ bail:
913 * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 997 * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
914 * l_count for you 998 * l_count for you
915 */ 999 */
916static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 1000static int ocfs2_create_new_meta_bhs(handle_t *handle,
917 handle_t *handle, 1001 struct ocfs2_extent_tree *et,
918 struct inode *inode,
919 int wanted, 1002 int wanted,
920 struct ocfs2_alloc_context *meta_ac, 1003 struct ocfs2_alloc_context *meta_ac,
921 struct buffer_head *bhs[]) 1004 struct buffer_head *bhs[])
@@ -924,6 +1007,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
924 u16 suballoc_bit_start; 1007 u16 suballoc_bit_start;
925 u32 num_got; 1008 u32 num_got;
926 u64 first_blkno; 1009 u64 first_blkno;
1010 struct ocfs2_super *osb =
1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
927 struct ocfs2_extent_block *eb; 1012 struct ocfs2_extent_block *eb;
928 1013
929 mlog_entry_void(); 1014 mlog_entry_void();
@@ -949,9 +1034,10 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
949 mlog_errno(status); 1034 mlog_errno(status);
950 goto bail; 1035 goto bail;
951 } 1036 }
952 ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 1037 ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]);
953 1038
954 status = ocfs2_journal_access_eb(handle, inode, bhs[i], 1039 status = ocfs2_journal_access_eb(handle, et->et_ci,
1040 bhs[i],
955 OCFS2_JOURNAL_ACCESS_CREATE); 1041 OCFS2_JOURNAL_ACCESS_CREATE);
956 if (status < 0) { 1042 if (status < 0) {
957 mlog_errno(status); 1043 mlog_errno(status);
@@ -1023,7 +1109,6 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
1023 * extent block's rightmost record. 1109 * extent block's rightmost record.
1024 */ 1110 */
1025static int ocfs2_adjust_rightmost_branch(handle_t *handle, 1111static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1026 struct inode *inode,
1027 struct ocfs2_extent_tree *et) 1112 struct ocfs2_extent_tree *et)
1028{ 1113{
1029 int status; 1114 int status;
@@ -1037,7 +1122,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1037 return status; 1122 return status;
1038 } 1123 }
1039 1124
1040 status = ocfs2_find_path(inode, path, UINT_MAX); 1125 status = ocfs2_find_path(et->et_ci, path, UINT_MAX);
1041 if (status < 0) { 1126 if (status < 0) {
1042 mlog_errno(status); 1127 mlog_errno(status);
1043 goto out; 1128 goto out;
@@ -1050,7 +1135,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1050 goto out; 1135 goto out;
1051 } 1136 }
1052 1137
1053 status = ocfs2_journal_access_path(inode, handle, path); 1138 status = ocfs2_journal_access_path(et->et_ci, handle, path);
1054 if (status < 0) { 1139 if (status < 0) {
1055 mlog_errno(status); 1140 mlog_errno(status);
1056 goto out; 1141 goto out;
@@ -1059,7 +1144,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1059 el = path_leaf_el(path); 1144 el = path_leaf_el(path);
1060 rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; 1145 rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
1061 1146
1062 ocfs2_adjust_rightmost_records(inode, handle, path, rec); 1147 ocfs2_adjust_rightmost_records(handle, et, path, rec);
1063 1148
1064out: 1149out:
1065 ocfs2_free_path(path); 1150 ocfs2_free_path(path);
@@ -1068,7 +1153,7 @@ out:
1068 1153
1069/* 1154/*
1070 * Add an entire tree branch to our inode. eb_bh is the extent block 1155 * Add an entire tree branch to our inode. eb_bh is the extent block
1071 * to start at, if we don't want to start the branch at the dinode 1156 * to start at, if we don't want to start the branch at the root
1072 * structure. 1157 * structure.
1073 * 1158 *
1074 * last_eb_bh is required as we have to update it's next_leaf pointer 1159 * last_eb_bh is required as we have to update it's next_leaf pointer
@@ -1077,9 +1162,7 @@ out:
1077 * the new branch will be 'empty' in the sense that every block will 1162 * the new branch will be 'empty' in the sense that every block will
1078 * contain a single record with cluster count == 0. 1163 * contain a single record with cluster count == 0.
1079 */ 1164 */
1080static int ocfs2_add_branch(struct ocfs2_super *osb, 1165static int ocfs2_add_branch(handle_t *handle,
1081 handle_t *handle,
1082 struct inode *inode,
1083 struct ocfs2_extent_tree *et, 1166 struct ocfs2_extent_tree *et,
1084 struct buffer_head *eb_bh, 1167 struct buffer_head *eb_bh,
1085 struct buffer_head **last_eb_bh, 1168 struct buffer_head **last_eb_bh,
@@ -1123,7 +1206,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1123 if (root_end > new_cpos) { 1206 if (root_end > new_cpos) {
1124 mlog(0, "adjust the cluster end from %u to %u\n", 1207 mlog(0, "adjust the cluster end from %u to %u\n",
1125 root_end, new_cpos); 1208 root_end, new_cpos);
1126 status = ocfs2_adjust_rightmost_branch(handle, inode, et); 1209 status = ocfs2_adjust_rightmost_branch(handle, et);
1127 if (status) { 1210 if (status) {
1128 mlog_errno(status); 1211 mlog_errno(status);
1129 goto bail; 1212 goto bail;
@@ -1139,7 +1222,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1139 goto bail; 1222 goto bail;
1140 } 1223 }
1141 1224
1142 status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 1225 status = ocfs2_create_new_meta_bhs(handle, et, new_blocks,
1143 meta_ac, new_eb_bhs); 1226 meta_ac, new_eb_bhs);
1144 if (status < 0) { 1227 if (status < 0) {
1145 mlog_errno(status); 1228 mlog_errno(status);
@@ -1161,7 +1244,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1161 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); 1244 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
1162 eb_el = &eb->h_list; 1245 eb_el = &eb->h_list;
1163 1246
1164 status = ocfs2_journal_access_eb(handle, inode, bh, 1247 status = ocfs2_journal_access_eb(handle, et->et_ci, bh,
1165 OCFS2_JOURNAL_ACCESS_CREATE); 1248 OCFS2_JOURNAL_ACCESS_CREATE);
1166 if (status < 0) { 1249 if (status < 0) {
1167 mlog_errno(status); 1250 mlog_errno(status);
@@ -1201,20 +1284,20 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1201 * journal_dirty erroring as it won't unless we've aborted the 1284 * journal_dirty erroring as it won't unless we've aborted the
1202 * handle (in which case we would never be here) so reserving 1285 * handle (in which case we would never be here) so reserving
1203 * the write with journal_access is all we need to do. */ 1286 * the write with journal_access is all we need to do. */
1204 status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh, 1287 status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh,
1205 OCFS2_JOURNAL_ACCESS_WRITE); 1288 OCFS2_JOURNAL_ACCESS_WRITE);
1206 if (status < 0) { 1289 if (status < 0) {
1207 mlog_errno(status); 1290 mlog_errno(status);
1208 goto bail; 1291 goto bail;
1209 } 1292 }
1210 status = ocfs2_et_root_journal_access(handle, inode, et, 1293 status = ocfs2_et_root_journal_access(handle, et,
1211 OCFS2_JOURNAL_ACCESS_WRITE); 1294 OCFS2_JOURNAL_ACCESS_WRITE);
1212 if (status < 0) { 1295 if (status < 0) {
1213 mlog_errno(status); 1296 mlog_errno(status);
1214 goto bail; 1297 goto bail;
1215 } 1298 }
1216 if (eb_bh) { 1299 if (eb_bh) {
1217 status = ocfs2_journal_access_eb(handle, inode, eb_bh, 1300 status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh,
1218 OCFS2_JOURNAL_ACCESS_WRITE); 1301 OCFS2_JOURNAL_ACCESS_WRITE);
1219 if (status < 0) { 1302 if (status < 0) {
1220 mlog_errno(status); 1303 mlog_errno(status);
@@ -1274,9 +1357,7 @@ bail:
1274 * returns back the new extent block so you can add a branch to it 1357 * returns back the new extent block so you can add a branch to it
1275 * after this call. 1358 * after this call.
1276 */ 1359 */
1277static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 1360static int ocfs2_shift_tree_depth(handle_t *handle,
1278 handle_t *handle,
1279 struct inode *inode,
1280 struct ocfs2_extent_tree *et, 1361 struct ocfs2_extent_tree *et,
1281 struct ocfs2_alloc_context *meta_ac, 1362 struct ocfs2_alloc_context *meta_ac,
1282 struct buffer_head **ret_new_eb_bh) 1363 struct buffer_head **ret_new_eb_bh)
@@ -1290,7 +1371,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1290 1371
1291 mlog_entry_void(); 1372 mlog_entry_void();
1292 1373
1293 status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 1374 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
1294 &new_eb_bh); 1375 &new_eb_bh);
1295 if (status < 0) { 1376 if (status < 0) {
1296 mlog_errno(status); 1377 mlog_errno(status);
@@ -1304,7 +1385,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1304 eb_el = &eb->h_list; 1385 eb_el = &eb->h_list;
1305 root_el = et->et_root_el; 1386 root_el = et->et_root_el;
1306 1387
1307 status = ocfs2_journal_access_eb(handle, inode, new_eb_bh, 1388 status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh,
1308 OCFS2_JOURNAL_ACCESS_CREATE); 1389 OCFS2_JOURNAL_ACCESS_CREATE);
1309 if (status < 0) { 1390 if (status < 0) {
1310 mlog_errno(status); 1391 mlog_errno(status);
@@ -1323,7 +1404,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1323 goto bail; 1404 goto bail;
1324 } 1405 }
1325 1406
1326 status = ocfs2_et_root_journal_access(handle, inode, et, 1407 status = ocfs2_et_root_journal_access(handle, et,
1327 OCFS2_JOURNAL_ACCESS_WRITE); 1408 OCFS2_JOURNAL_ACCESS_WRITE);
1328 if (status < 0) { 1409 if (status < 0) {
1329 mlog_errno(status); 1410 mlog_errno(status);
@@ -1379,9 +1460,7 @@ bail:
1379 * 1460 *
1380 * return status < 0 indicates an error. 1461 * return status < 0 indicates an error.
1381 */ 1462 */
1382static int ocfs2_find_branch_target(struct ocfs2_super *osb, 1463static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
1383 struct inode *inode,
1384 struct ocfs2_extent_tree *et,
1385 struct buffer_head **target_bh) 1464 struct buffer_head **target_bh)
1386{ 1465{
1387 int status = 0, i; 1466 int status = 0, i;
@@ -1399,19 +1478,21 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
1399 1478
1400 while(le16_to_cpu(el->l_tree_depth) > 1) { 1479 while(le16_to_cpu(el->l_tree_depth) > 1) {
1401 if (le16_to_cpu(el->l_next_free_rec) == 0) { 1480 if (le16_to_cpu(el->l_next_free_rec) == 0) {
1402 ocfs2_error(inode->i_sb, "Dinode %llu has empty " 1481 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
1482 "Owner %llu has empty "
1403 "extent list (next_free_rec == 0)", 1483 "extent list (next_free_rec == 0)",
1404 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1484 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
1405 status = -EIO; 1485 status = -EIO;
1406 goto bail; 1486 goto bail;
1407 } 1487 }
1408 i = le16_to_cpu(el->l_next_free_rec) - 1; 1488 i = le16_to_cpu(el->l_next_free_rec) - 1;
1409 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 1489 blkno = le64_to_cpu(el->l_recs[i].e_blkno);
1410 if (!blkno) { 1490 if (!blkno) {
1411 ocfs2_error(inode->i_sb, "Dinode %llu has extent " 1491 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
1492 "Owner %llu has extent "
1412 "list where extent # %d has no physical " 1493 "list where extent # %d has no physical "
1413 "block start", 1494 "block start",
1414 (unsigned long long)OCFS2_I(inode)->ip_blkno, i); 1495 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
1415 status = -EIO; 1496 status = -EIO;
1416 goto bail; 1497 goto bail;
1417 } 1498 }
@@ -1419,7 +1500,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
1419 brelse(bh); 1500 brelse(bh);
1420 bh = NULL; 1501 bh = NULL;
1421 1502
1422 status = ocfs2_read_extent_block(inode, blkno, &bh); 1503 status = ocfs2_read_extent_block(et->et_ci, blkno, &bh);
1423 if (status < 0) { 1504 if (status < 0) {
1424 mlog_errno(status); 1505 mlog_errno(status);
1425 goto bail; 1506 goto bail;
@@ -1460,20 +1541,18 @@ bail:
1460 * 1541 *
1461 * *last_eb_bh will be updated by ocfs2_add_branch(). 1542 * *last_eb_bh will be updated by ocfs2_add_branch().
1462 */ 1543 */
1463static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, 1544static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
1464 struct ocfs2_extent_tree *et, int *final_depth, 1545 int *final_depth, struct buffer_head **last_eb_bh,
1465 struct buffer_head **last_eb_bh,
1466 struct ocfs2_alloc_context *meta_ac) 1546 struct ocfs2_alloc_context *meta_ac)
1467{ 1547{
1468 int ret, shift; 1548 int ret, shift;
1469 struct ocfs2_extent_list *el = et->et_root_el; 1549 struct ocfs2_extent_list *el = et->et_root_el;
1470 int depth = le16_to_cpu(el->l_tree_depth); 1550 int depth = le16_to_cpu(el->l_tree_depth);
1471 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1472 struct buffer_head *bh = NULL; 1551 struct buffer_head *bh = NULL;
1473 1552
1474 BUG_ON(meta_ac == NULL); 1553 BUG_ON(meta_ac == NULL);
1475 1554
1476 shift = ocfs2_find_branch_target(osb, inode, et, &bh); 1555 shift = ocfs2_find_branch_target(et, &bh);
1477 if (shift < 0) { 1556 if (shift < 0) {
1478 ret = shift; 1557 ret = shift;
1479 mlog_errno(ret); 1558 mlog_errno(ret);
@@ -1490,8 +1569,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
1490 /* ocfs2_shift_tree_depth will return us a buffer with 1569 /* ocfs2_shift_tree_depth will return us a buffer with
1491 * the new extent block (so we can pass that to 1570 * the new extent block (so we can pass that to
1492 * ocfs2_add_branch). */ 1571 * ocfs2_add_branch). */
1493 ret = ocfs2_shift_tree_depth(osb, handle, inode, et, 1572 ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh);
1494 meta_ac, &bh);
1495 if (ret < 0) { 1573 if (ret < 0) {
1496 mlog_errno(ret); 1574 mlog_errno(ret);
1497 goto out; 1575 goto out;
@@ -1517,7 +1595,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
1517 /* call ocfs2_add_branch to add the final part of the tree with 1595 /* call ocfs2_add_branch to add the final part of the tree with
1518 * the new data. */ 1596 * the new data. */
1519 mlog(0, "add branch. bh = %p\n", bh); 1597 mlog(0, "add branch. bh = %p\n", bh);
1520 ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh, 1598 ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
1521 meta_ac); 1599 meta_ac);
1522 if (ret < 0) { 1600 if (ret < 0) {
1523 mlog_errno(ret); 1601 mlog_errno(ret);
@@ -1687,7 +1765,7 @@ set_and_inc:
1687 * 1765 *
1688 * The array index of the subtree root is passed back. 1766 * The array index of the subtree root is passed back.
1689 */ 1767 */
1690static int ocfs2_find_subtree_root(struct inode *inode, 1768static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
1691 struct ocfs2_path *left, 1769 struct ocfs2_path *left,
1692 struct ocfs2_path *right) 1770 struct ocfs2_path *right)
1693{ 1771{
@@ -1705,10 +1783,10 @@ static int ocfs2_find_subtree_root(struct inode *inode,
1705 * The caller didn't pass two adjacent paths. 1783 * The caller didn't pass two adjacent paths.
1706 */ 1784 */
1707 mlog_bug_on_msg(i > left->p_tree_depth, 1785 mlog_bug_on_msg(i > left->p_tree_depth,
1708 "Inode %lu, left depth %u, right depth %u\n" 1786 "Owner %llu, left depth %u, right depth %u\n"
1709 "left leaf blk %llu, right leaf blk %llu\n", 1787 "left leaf blk %llu, right leaf blk %llu\n",
1710 inode->i_ino, left->p_tree_depth, 1788 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
1711 right->p_tree_depth, 1789 left->p_tree_depth, right->p_tree_depth,
1712 (unsigned long long)path_leaf_bh(left)->b_blocknr, 1790 (unsigned long long)path_leaf_bh(left)->b_blocknr,
1713 (unsigned long long)path_leaf_bh(right)->b_blocknr); 1791 (unsigned long long)path_leaf_bh(right)->b_blocknr);
1714 } while (left->p_node[i].bh->b_blocknr == 1792 } while (left->p_node[i].bh->b_blocknr ==
@@ -1725,7 +1803,7 @@ typedef void (path_insert_t)(void *, struct buffer_head *);
1725 * This code can be called with a cpos larger than the tree, in which 1803 * This code can be called with a cpos larger than the tree, in which
1726 * case it will return the rightmost path. 1804 * case it will return the rightmost path.
1727 */ 1805 */
1728static int __ocfs2_find_path(struct inode *inode, 1806static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
1729 struct ocfs2_extent_list *root_el, u32 cpos, 1807 struct ocfs2_extent_list *root_el, u32 cpos,
1730 path_insert_t *func, void *data) 1808 path_insert_t *func, void *data)
1731{ 1809{
@@ -1736,15 +1814,14 @@ static int __ocfs2_find_path(struct inode *inode,
1736 struct ocfs2_extent_block *eb; 1814 struct ocfs2_extent_block *eb;
1737 struct ocfs2_extent_list *el; 1815 struct ocfs2_extent_list *el;
1738 struct ocfs2_extent_rec *rec; 1816 struct ocfs2_extent_rec *rec;
1739 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1740 1817
1741 el = root_el; 1818 el = root_el;
1742 while (el->l_tree_depth) { 1819 while (el->l_tree_depth) {
1743 if (le16_to_cpu(el->l_next_free_rec) == 0) { 1820 if (le16_to_cpu(el->l_next_free_rec) == 0) {
1744 ocfs2_error(inode->i_sb, 1821 ocfs2_error(ocfs2_metadata_cache_get_super(ci),
1745 "Inode %llu has empty extent list at " 1822 "Owner %llu has empty extent list at "
1746 "depth %u\n", 1823 "depth %u\n",
1747 (unsigned long long)oi->ip_blkno, 1824 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1748 le16_to_cpu(el->l_tree_depth)); 1825 le16_to_cpu(el->l_tree_depth));
1749 ret = -EROFS; 1826 ret = -EROFS;
1750 goto out; 1827 goto out;
@@ -1767,10 +1844,10 @@ static int __ocfs2_find_path(struct inode *inode,
1767 1844
1768 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 1845 blkno = le64_to_cpu(el->l_recs[i].e_blkno);
1769 if (blkno == 0) { 1846 if (blkno == 0) {
1770 ocfs2_error(inode->i_sb, 1847 ocfs2_error(ocfs2_metadata_cache_get_super(ci),
1771 "Inode %llu has bad blkno in extent list " 1848 "Owner %llu has bad blkno in extent list "
1772 "at depth %u (index %d)\n", 1849 "at depth %u (index %d)\n",
1773 (unsigned long long)oi->ip_blkno, 1850 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1774 le16_to_cpu(el->l_tree_depth), i); 1851 le16_to_cpu(el->l_tree_depth), i);
1775 ret = -EROFS; 1852 ret = -EROFS;
1776 goto out; 1853 goto out;
@@ -1778,7 +1855,7 @@ static int __ocfs2_find_path(struct inode *inode,
1778 1855
1779 brelse(bh); 1856 brelse(bh);
1780 bh = NULL; 1857 bh = NULL;
1781 ret = ocfs2_read_extent_block(inode, blkno, &bh); 1858 ret = ocfs2_read_extent_block(ci, blkno, &bh);
1782 if (ret) { 1859 if (ret) {
1783 mlog_errno(ret); 1860 mlog_errno(ret);
1784 goto out; 1861 goto out;
@@ -1789,10 +1866,10 @@ static int __ocfs2_find_path(struct inode *inode,
1789 1866
1790 if (le16_to_cpu(el->l_next_free_rec) > 1867 if (le16_to_cpu(el->l_next_free_rec) >
1791 le16_to_cpu(el->l_count)) { 1868 le16_to_cpu(el->l_count)) {
1792 ocfs2_error(inode->i_sb, 1869 ocfs2_error(ocfs2_metadata_cache_get_super(ci),
1793 "Inode %llu has bad count in extent list " 1870 "Owner %llu has bad count in extent list "
1794 "at block %llu (next free=%u, count=%u)\n", 1871 "at block %llu (next free=%u, count=%u)\n",
1795 (unsigned long long)oi->ip_blkno, 1872 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1796 (unsigned long long)bh->b_blocknr, 1873 (unsigned long long)bh->b_blocknr,
1797 le16_to_cpu(el->l_next_free_rec), 1874 le16_to_cpu(el->l_next_free_rec),
1798 le16_to_cpu(el->l_count)); 1875 le16_to_cpu(el->l_count));
@@ -1836,14 +1913,14 @@ static void find_path_ins(void *data, struct buffer_head *bh)
1836 ocfs2_path_insert_eb(fp->path, fp->index, bh); 1913 ocfs2_path_insert_eb(fp->path, fp->index, bh);
1837 fp->index++; 1914 fp->index++;
1838} 1915}
1839static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, 1916int ocfs2_find_path(struct ocfs2_caching_info *ci,
1840 u32 cpos) 1917 struct ocfs2_path *path, u32 cpos)
1841{ 1918{
1842 struct find_path_data data; 1919 struct find_path_data data;
1843 1920
1844 data.index = 1; 1921 data.index = 1;
1845 data.path = path; 1922 data.path = path;
1846 return __ocfs2_find_path(inode, path_root_el(path), cpos, 1923 return __ocfs2_find_path(ci, path_root_el(path), cpos,
1847 find_path_ins, &data); 1924 find_path_ins, &data);
1848} 1925}
1849 1926
@@ -1868,13 +1945,14 @@ static void find_leaf_ins(void *data, struct buffer_head *bh)
1868 * 1945 *
1869 * This function doesn't handle non btree extent lists. 1946 * This function doesn't handle non btree extent lists.
1870 */ 1947 */
1871int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 1948int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
1872 u32 cpos, struct buffer_head **leaf_bh) 1949 struct ocfs2_extent_list *root_el, u32 cpos,
1950 struct buffer_head **leaf_bh)
1873{ 1951{
1874 int ret; 1952 int ret;
1875 struct buffer_head *bh = NULL; 1953 struct buffer_head *bh = NULL;
1876 1954
1877 ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh); 1955 ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh);
1878 if (ret) { 1956 if (ret) {
1879 mlog_errno(ret); 1957 mlog_errno(ret);
1880 goto out; 1958 goto out;
@@ -1980,7 +2058,7 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
1980 * - When we've adjusted the last extent record in the left path leaf and the 2058 * - When we've adjusted the last extent record in the left path leaf and the
1981 * 1st extent record in the right path leaf during cross extent block merge. 2059 * 1st extent record in the right path leaf during cross extent block merge.
1982 */ 2060 */
1983static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, 2061static void ocfs2_complete_edge_insert(handle_t *handle,
1984 struct ocfs2_path *left_path, 2062 struct ocfs2_path *left_path,
1985 struct ocfs2_path *right_path, 2063 struct ocfs2_path *right_path,
1986 int subtree_index) 2064 int subtree_index)
@@ -2058,8 +2136,8 @@ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
2058 mlog_errno(ret); 2136 mlog_errno(ret);
2059} 2137}
2060 2138
2061static int ocfs2_rotate_subtree_right(struct inode *inode, 2139static int ocfs2_rotate_subtree_right(handle_t *handle,
2062 handle_t *handle, 2140 struct ocfs2_extent_tree *et,
2063 struct ocfs2_path *left_path, 2141 struct ocfs2_path *left_path,
2064 struct ocfs2_path *right_path, 2142 struct ocfs2_path *right_path,
2065 int subtree_index) 2143 int subtree_index)
@@ -2075,10 +2153,10 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2075 left_el = path_leaf_el(left_path); 2153 left_el = path_leaf_el(left_path);
2076 2154
2077 if (left_el->l_next_free_rec != left_el->l_count) { 2155 if (left_el->l_next_free_rec != left_el->l_count) {
2078 ocfs2_error(inode->i_sb, 2156 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
2079 "Inode %llu has non-full interior leaf node %llu" 2157 "Inode %llu has non-full interior leaf node %llu"
2080 "(next free = %u)", 2158 "(next free = %u)",
2081 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2159 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2082 (unsigned long long)left_leaf_bh->b_blocknr, 2160 (unsigned long long)left_leaf_bh->b_blocknr,
2083 le16_to_cpu(left_el->l_next_free_rec)); 2161 le16_to_cpu(left_el->l_next_free_rec));
2084 return -EROFS; 2162 return -EROFS;
@@ -2094,7 +2172,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2094 root_bh = left_path->p_node[subtree_index].bh; 2172 root_bh = left_path->p_node[subtree_index].bh;
2095 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 2173 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
2096 2174
2097 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 2175 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
2098 subtree_index); 2176 subtree_index);
2099 if (ret) { 2177 if (ret) {
2100 mlog_errno(ret); 2178 mlog_errno(ret);
@@ -2102,14 +2180,14 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2102 } 2180 }
2103 2181
2104 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 2182 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
2105 ret = ocfs2_path_bh_journal_access(handle, inode, 2183 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2106 right_path, i); 2184 right_path, i);
2107 if (ret) { 2185 if (ret) {
2108 mlog_errno(ret); 2186 mlog_errno(ret);
2109 goto out; 2187 goto out;
2110 } 2188 }
2111 2189
2112 ret = ocfs2_path_bh_journal_access(handle, inode, 2190 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2113 left_path, i); 2191 left_path, i);
2114 if (ret) { 2192 if (ret) {
2115 mlog_errno(ret); 2193 mlog_errno(ret);
@@ -2123,7 +2201,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2123 /* This is a code error, not a disk corruption. */ 2201 /* This is a code error, not a disk corruption. */
2124 mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails " 2202 mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
2125 "because rightmost leaf block %llu is empty\n", 2203 "because rightmost leaf block %llu is empty\n",
2126 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2204 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2127 (unsigned long long)right_leaf_bh->b_blocknr); 2205 (unsigned long long)right_leaf_bh->b_blocknr);
2128 2206
2129 ocfs2_create_empty_extent(right_el); 2207 ocfs2_create_empty_extent(right_el);
@@ -2157,8 +2235,8 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2157 goto out; 2235 goto out;
2158 } 2236 }
2159 2237
2160 ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 2238 ocfs2_complete_edge_insert(handle, left_path, right_path,
2161 subtree_index); 2239 subtree_index);
2162 2240
2163out: 2241out:
2164 return ret; 2242 return ret;
@@ -2248,10 +2326,18 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
2248 int op_credits, 2326 int op_credits,
2249 struct ocfs2_path *path) 2327 struct ocfs2_path *path)
2250{ 2328{
2329 int ret;
2251 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; 2330 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
2252 2331
2253 if (handle->h_buffer_credits < credits) 2332 if (handle->h_buffer_credits < credits) {
2254 return ocfs2_extend_trans(handle, credits); 2333 ret = ocfs2_extend_trans(handle,
2334 credits - handle->h_buffer_credits);
2335 if (ret)
2336 return ret;
2337
2338 if (unlikely(handle->h_buffer_credits < credits))
2339 return ocfs2_extend_trans(handle, credits);
2340 }
2255 2341
2256 return 0; 2342 return 0;
2257} 2343}
@@ -2321,8 +2407,8 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
2321 * *ret_left_path will contain a valid path which can be passed to 2407 * *ret_left_path will contain a valid path which can be passed to
2322 * ocfs2_insert_path(). 2408 * ocfs2_insert_path().
2323 */ 2409 */
2324static int ocfs2_rotate_tree_right(struct inode *inode, 2410static int ocfs2_rotate_tree_right(handle_t *handle,
2325 handle_t *handle, 2411 struct ocfs2_extent_tree *et,
2326 enum ocfs2_split_type split, 2412 enum ocfs2_split_type split,
2327 u32 insert_cpos, 2413 u32 insert_cpos,
2328 struct ocfs2_path *right_path, 2414 struct ocfs2_path *right_path,
@@ -2331,6 +2417,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2331 int ret, start, orig_credits = handle->h_buffer_credits; 2417 int ret, start, orig_credits = handle->h_buffer_credits;
2332 u32 cpos; 2418 u32 cpos;
2333 struct ocfs2_path *left_path = NULL; 2419 struct ocfs2_path *left_path = NULL;
2420 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
2334 2421
2335 *ret_left_path = NULL; 2422 *ret_left_path = NULL;
2336 2423
@@ -2341,7 +2428,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2341 goto out; 2428 goto out;
2342 } 2429 }
2343 2430
2344 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos); 2431 ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
2345 if (ret) { 2432 if (ret) {
2346 mlog_errno(ret); 2433 mlog_errno(ret);
2347 goto out; 2434 goto out;
@@ -2379,7 +2466,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2379 mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", 2466 mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
2380 insert_cpos, cpos); 2467 insert_cpos, cpos);
2381 2468
2382 ret = ocfs2_find_path(inode, left_path, cpos); 2469 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
2383 if (ret) { 2470 if (ret) {
2384 mlog_errno(ret); 2471 mlog_errno(ret);
2385 goto out; 2472 goto out;
@@ -2387,10 +2474,11 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2387 2474
2388 mlog_bug_on_msg(path_leaf_bh(left_path) == 2475 mlog_bug_on_msg(path_leaf_bh(left_path) ==
2389 path_leaf_bh(right_path), 2476 path_leaf_bh(right_path),
2390 "Inode %lu: error during insert of %u " 2477 "Owner %llu: error during insert of %u "
2391 "(left path cpos %u) results in two identical " 2478 "(left path cpos %u) results in two identical "
2392 "paths ending at %llu\n", 2479 "paths ending at %llu\n",
2393 inode->i_ino, insert_cpos, cpos, 2480 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2481 insert_cpos, cpos,
2394 (unsigned long long) 2482 (unsigned long long)
2395 path_leaf_bh(left_path)->b_blocknr); 2483 path_leaf_bh(left_path)->b_blocknr);
2396 2484
@@ -2416,7 +2504,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2416 goto out_ret_path; 2504 goto out_ret_path;
2417 } 2505 }
2418 2506
2419 start = ocfs2_find_subtree_root(inode, left_path, right_path); 2507 start = ocfs2_find_subtree_root(et, left_path, right_path);
2420 2508
2421 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2509 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
2422 start, 2510 start,
@@ -2430,7 +2518,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2430 goto out; 2518 goto out;
2431 } 2519 }
2432 2520
2433 ret = ocfs2_rotate_subtree_right(inode, handle, left_path, 2521 ret = ocfs2_rotate_subtree_right(handle, et, left_path,
2434 right_path, start); 2522 right_path, start);
2435 if (ret) { 2523 if (ret) {
2436 mlog_errno(ret); 2524 mlog_errno(ret);
@@ -2462,8 +2550,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2462 */ 2550 */
2463 ocfs2_mv_path(right_path, left_path); 2551 ocfs2_mv_path(right_path, left_path);
2464 2552
2465 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 2553 ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
2466 &cpos);
2467 if (ret) { 2554 if (ret) {
2468 mlog_errno(ret); 2555 mlog_errno(ret);
2469 goto out; 2556 goto out;
@@ -2477,7 +2564,8 @@ out_ret_path:
2477 return ret; 2564 return ret;
2478} 2565}
2479 2566
2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2567static int ocfs2_update_edge_lengths(handle_t *handle,
2568 struct ocfs2_extent_tree *et,
2481 int subtree_index, struct ocfs2_path *path) 2569 int subtree_index, struct ocfs2_path *path)
2482{ 2570{
2483 int i, idx, ret; 2571 int i, idx, ret;
@@ -2502,7 +2590,7 @@ static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2502 goto out; 2590 goto out;
2503 } 2591 }
2504 2592
2505 ret = ocfs2_journal_access_path(inode, handle, path); 2593 ret = ocfs2_journal_access_path(et->et_ci, handle, path);
2506 if (ret) { 2594 if (ret) {
2507 mlog_errno(ret); 2595 mlog_errno(ret);
2508 goto out; 2596 goto out;
@@ -2532,7 +2620,8 @@ out:
2532 return ret; 2620 return ret;
2533} 2621}
2534 2622
2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2623static void ocfs2_unlink_path(handle_t *handle,
2624 struct ocfs2_extent_tree *et,
2536 struct ocfs2_cached_dealloc_ctxt *dealloc, 2625 struct ocfs2_cached_dealloc_ctxt *dealloc,
2537 struct ocfs2_path *path, int unlink_start) 2626 struct ocfs2_path *path, int unlink_start)
2538{ 2627{
@@ -2554,12 +2643,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
2554 mlog(ML_ERROR, 2643 mlog(ML_ERROR,
2555 "Inode %llu, attempted to remove extent block " 2644 "Inode %llu, attempted to remove extent block "
2556 "%llu with %u records\n", 2645 "%llu with %u records\n",
2557 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2646 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2558 (unsigned long long)le64_to_cpu(eb->h_blkno), 2647 (unsigned long long)le64_to_cpu(eb->h_blkno),
2559 le16_to_cpu(el->l_next_free_rec)); 2648 le16_to_cpu(el->l_next_free_rec));
2560 2649
2561 ocfs2_journal_dirty(handle, bh); 2650 ocfs2_journal_dirty(handle, bh);
2562 ocfs2_remove_from_cache(inode, bh); 2651 ocfs2_remove_from_cache(et->et_ci, bh);
2563 continue; 2652 continue;
2564 } 2653 }
2565 2654
@@ -2572,11 +2661,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
2572 if (ret) 2661 if (ret)
2573 mlog_errno(ret); 2662 mlog_errno(ret);
2574 2663
2575 ocfs2_remove_from_cache(inode, bh); 2664 ocfs2_remove_from_cache(et->et_ci, bh);
2576 } 2665 }
2577} 2666}
2578 2667
2579static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, 2668static void ocfs2_unlink_subtree(handle_t *handle,
2669 struct ocfs2_extent_tree *et,
2580 struct ocfs2_path *left_path, 2670 struct ocfs2_path *left_path,
2581 struct ocfs2_path *right_path, 2671 struct ocfs2_path *right_path,
2582 int subtree_index, 2672 int subtree_index,
@@ -2607,17 +2697,17 @@ static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
2607 ocfs2_journal_dirty(handle, root_bh); 2697 ocfs2_journal_dirty(handle, root_bh);
2608 ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 2698 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
2609 2699
2610 ocfs2_unlink_path(inode, handle, dealloc, right_path, 2700 ocfs2_unlink_path(handle, et, dealloc, right_path,
2611 subtree_index + 1); 2701 subtree_index + 1);
2612} 2702}
2613 2703
2614static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, 2704static int ocfs2_rotate_subtree_left(handle_t *handle,
2705 struct ocfs2_extent_tree *et,
2615 struct ocfs2_path *left_path, 2706 struct ocfs2_path *left_path,
2616 struct ocfs2_path *right_path, 2707 struct ocfs2_path *right_path,
2617 int subtree_index, 2708 int subtree_index,
2618 struct ocfs2_cached_dealloc_ctxt *dealloc, 2709 struct ocfs2_cached_dealloc_ctxt *dealloc,
2619 int *deleted, 2710 int *deleted)
2620 struct ocfs2_extent_tree *et)
2621{ 2711{
2622 int ret, i, del_right_subtree = 0, right_has_empty = 0; 2712 int ret, i, del_right_subtree = 0, right_has_empty = 0;
2623 struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); 2713 struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
@@ -2653,7 +2743,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2653 return -EAGAIN; 2743 return -EAGAIN;
2654 2744
2655 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { 2745 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
2656 ret = ocfs2_journal_access_eb(handle, inode, 2746 ret = ocfs2_journal_access_eb(handle, et->et_ci,
2657 path_leaf_bh(right_path), 2747 path_leaf_bh(right_path),
2658 OCFS2_JOURNAL_ACCESS_WRITE); 2748 OCFS2_JOURNAL_ACCESS_WRITE);
2659 if (ret) { 2749 if (ret) {
@@ -2672,7 +2762,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2672 * We have to update i_last_eb_blk during the meta 2762 * We have to update i_last_eb_blk during the meta
2673 * data delete. 2763 * data delete.
2674 */ 2764 */
2675 ret = ocfs2_et_root_journal_access(handle, inode, et, 2765 ret = ocfs2_et_root_journal_access(handle, et,
2676 OCFS2_JOURNAL_ACCESS_WRITE); 2766 OCFS2_JOURNAL_ACCESS_WRITE);
2677 if (ret) { 2767 if (ret) {
2678 mlog_errno(ret); 2768 mlog_errno(ret);
@@ -2688,7 +2778,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2688 */ 2778 */
2689 BUG_ON(right_has_empty && !del_right_subtree); 2779 BUG_ON(right_has_empty && !del_right_subtree);
2690 2780
2691 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 2781 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
2692 subtree_index); 2782 subtree_index);
2693 if (ret) { 2783 if (ret) {
2694 mlog_errno(ret); 2784 mlog_errno(ret);
@@ -2696,14 +2786,14 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2696 } 2786 }
2697 2787
2698 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 2788 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
2699 ret = ocfs2_path_bh_journal_access(handle, inode, 2789 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2700 right_path, i); 2790 right_path, i);
2701 if (ret) { 2791 if (ret) {
2702 mlog_errno(ret); 2792 mlog_errno(ret);
2703 goto out; 2793 goto out;
2704 } 2794 }
2705 2795
2706 ret = ocfs2_path_bh_journal_access(handle, inode, 2796 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2707 left_path, i); 2797 left_path, i);
2708 if (ret) { 2798 if (ret) {
2709 mlog_errno(ret); 2799 mlog_errno(ret);
@@ -2740,9 +2830,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2740 mlog_errno(ret); 2830 mlog_errno(ret);
2741 2831
2742 if (del_right_subtree) { 2832 if (del_right_subtree) {
2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2833 ocfs2_unlink_subtree(handle, et, left_path, right_path,
2744 subtree_index, dealloc); 2834 subtree_index, dealloc);
2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, 2835 ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
2746 left_path); 2836 left_path);
2747 if (ret) { 2837 if (ret) {
2748 mlog_errno(ret); 2838 mlog_errno(ret);
@@ -2766,7 +2856,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2766 2856
2767 *deleted = 1; 2857 *deleted = 1;
2768 } else 2858 } else
2769 ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 2859 ocfs2_complete_edge_insert(handle, left_path, right_path,
2770 subtree_index); 2860 subtree_index);
2771 2861
2772out: 2862out:
@@ -2852,8 +2942,8 @@ out:
2852 return ret; 2942 return ret;
2853} 2943}
2854 2944
2855static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, 2945static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
2856 handle_t *handle, 2946 struct ocfs2_extent_tree *et,
2857 struct ocfs2_path *path) 2947 struct ocfs2_path *path)
2858{ 2948{
2859 int ret; 2949 int ret;
@@ -2863,7 +2953,7 @@ static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
2863 if (!ocfs2_is_empty_extent(&el->l_recs[0])) 2953 if (!ocfs2_is_empty_extent(&el->l_recs[0]))
2864 return 0; 2954 return 0;
2865 2955
2866 ret = ocfs2_path_bh_journal_access(handle, inode, path, 2956 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
2867 path_num_items(path) - 1); 2957 path_num_items(path) - 1);
2868 if (ret) { 2958 if (ret) {
2869 mlog_errno(ret); 2959 mlog_errno(ret);
@@ -2880,24 +2970,24 @@ out:
2880 return ret; 2970 return ret;
2881} 2971}
2882 2972
2883static int __ocfs2_rotate_tree_left(struct inode *inode, 2973static int __ocfs2_rotate_tree_left(handle_t *handle,
2884 handle_t *handle, int orig_credits, 2974 struct ocfs2_extent_tree *et,
2975 int orig_credits,
2885 struct ocfs2_path *path, 2976 struct ocfs2_path *path,
2886 struct ocfs2_cached_dealloc_ctxt *dealloc, 2977 struct ocfs2_cached_dealloc_ctxt *dealloc,
2887 struct ocfs2_path **empty_extent_path, 2978 struct ocfs2_path **empty_extent_path)
2888 struct ocfs2_extent_tree *et)
2889{ 2979{
2890 int ret, subtree_root, deleted; 2980 int ret, subtree_root, deleted;
2891 u32 right_cpos; 2981 u32 right_cpos;
2892 struct ocfs2_path *left_path = NULL; 2982 struct ocfs2_path *left_path = NULL;
2893 struct ocfs2_path *right_path = NULL; 2983 struct ocfs2_path *right_path = NULL;
2984 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
2894 2985
2895 BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); 2986 BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
2896 2987
2897 *empty_extent_path = NULL; 2988 *empty_extent_path = NULL;
2898 2989
2899 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, 2990 ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
2900 &right_cpos);
2901 if (ret) { 2991 if (ret) {
2902 mlog_errno(ret); 2992 mlog_errno(ret);
2903 goto out; 2993 goto out;
@@ -2920,13 +3010,13 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2920 } 3010 }
2921 3011
2922 while (right_cpos) { 3012 while (right_cpos) {
2923 ret = ocfs2_find_path(inode, right_path, right_cpos); 3013 ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
2924 if (ret) { 3014 if (ret) {
2925 mlog_errno(ret); 3015 mlog_errno(ret);
2926 goto out; 3016 goto out;
2927 } 3017 }
2928 3018
2929 subtree_root = ocfs2_find_subtree_root(inode, left_path, 3019 subtree_root = ocfs2_find_subtree_root(et, left_path,
2930 right_path); 3020 right_path);
2931 3021
2932 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 3022 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
@@ -2946,16 +3036,16 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2946 * Caller might still want to make changes to the 3036 * Caller might still want to make changes to the
2947 * tree root, so re-add it to the journal here. 3037 * tree root, so re-add it to the journal here.
2948 */ 3038 */
2949 ret = ocfs2_path_bh_journal_access(handle, inode, 3039 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2950 left_path, 0); 3040 left_path, 0);
2951 if (ret) { 3041 if (ret) {
2952 mlog_errno(ret); 3042 mlog_errno(ret);
2953 goto out; 3043 goto out;
2954 } 3044 }
2955 3045
2956 ret = ocfs2_rotate_subtree_left(inode, handle, left_path, 3046 ret = ocfs2_rotate_subtree_left(handle, et, left_path,
2957 right_path, subtree_root, 3047 right_path, subtree_root,
2958 dealloc, &deleted, et); 3048 dealloc, &deleted);
2959 if (ret == -EAGAIN) { 3049 if (ret == -EAGAIN) {
2960 /* 3050 /*
2961 * The rotation has to temporarily stop due to 3051 * The rotation has to temporarily stop due to
@@ -2982,7 +3072,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2982 3072
2983 ocfs2_mv_path(left_path, right_path); 3073 ocfs2_mv_path(left_path, right_path);
2984 3074
2985 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, 3075 ret = ocfs2_find_cpos_for_right_leaf(sb, left_path,
2986 &right_cpos); 3076 &right_cpos);
2987 if (ret) { 3077 if (ret) {
2988 mlog_errno(ret); 3078 mlog_errno(ret);
@@ -2997,10 +3087,10 @@ out:
2997 return ret; 3087 return ret;
2998} 3088}
2999 3089
3000static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, 3090static int ocfs2_remove_rightmost_path(handle_t *handle,
3091 struct ocfs2_extent_tree *et,
3001 struct ocfs2_path *path, 3092 struct ocfs2_path *path,
3002 struct ocfs2_cached_dealloc_ctxt *dealloc, 3093 struct ocfs2_cached_dealloc_ctxt *dealloc)
3003 struct ocfs2_extent_tree *et)
3004{ 3094{
3005 int ret, subtree_index; 3095 int ret, subtree_index;
3006 u32 cpos; 3096 u32 cpos;
@@ -3009,7 +3099,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3009 struct ocfs2_extent_list *el; 3099 struct ocfs2_extent_list *el;
3010 3100
3011 3101
3012 ret = ocfs2_et_sanity_check(inode, et); 3102 ret = ocfs2_et_sanity_check(et);
3013 if (ret) 3103 if (ret)
3014 goto out; 3104 goto out;
3015 /* 3105 /*
@@ -3024,13 +3114,14 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3024 goto out; 3114 goto out;
3025 } 3115 }
3026 3116
3027 ret = ocfs2_journal_access_path(inode, handle, path); 3117 ret = ocfs2_journal_access_path(et->et_ci, handle, path);
3028 if (ret) { 3118 if (ret) {
3029 mlog_errno(ret); 3119 mlog_errno(ret);
3030 goto out; 3120 goto out;
3031 } 3121 }
3032 3122
3033 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); 3123 ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3124 path, &cpos);
3034 if (ret) { 3125 if (ret) {
3035 mlog_errno(ret); 3126 mlog_errno(ret);
3036 goto out; 3127 goto out;
@@ -3048,23 +3139,23 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3048 goto out; 3139 goto out;
3049 } 3140 }
3050 3141
3051 ret = ocfs2_find_path(inode, left_path, cpos); 3142 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
3052 if (ret) { 3143 if (ret) {
3053 mlog_errno(ret); 3144 mlog_errno(ret);
3054 goto out; 3145 goto out;
3055 } 3146 }
3056 3147
3057 ret = ocfs2_journal_access_path(inode, handle, left_path); 3148 ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
3058 if (ret) { 3149 if (ret) {
3059 mlog_errno(ret); 3150 mlog_errno(ret);
3060 goto out; 3151 goto out;
3061 } 3152 }
3062 3153
3063 subtree_index = ocfs2_find_subtree_root(inode, left_path, path); 3154 subtree_index = ocfs2_find_subtree_root(et, left_path, path);
3064 3155
3065 ocfs2_unlink_subtree(inode, handle, left_path, path, 3156 ocfs2_unlink_subtree(handle, et, left_path, path,
3066 subtree_index, dealloc); 3157 subtree_index, dealloc);
3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, 3158 ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
3068 left_path); 3159 left_path);
3069 if (ret) { 3160 if (ret) {
3070 mlog_errno(ret); 3161 mlog_errno(ret);
@@ -3078,10 +3169,10 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3078 * 'path' is also the leftmost path which 3169 * 'path' is also the leftmost path which
3079 * means it must be the only one. This gets 3170 * means it must be the only one. This gets
3080 * handled differently because we want to 3171 * handled differently because we want to
3081 * revert the inode back to having extents 3172 * revert the root back to having extents
3082 * in-line. 3173 * in-line.
3083 */ 3174 */
3084 ocfs2_unlink_path(inode, handle, dealloc, path, 1); 3175 ocfs2_unlink_path(handle, et, dealloc, path, 1);
3085 3176
3086 el = et->et_root_el; 3177 el = et->et_root_el;
3087 el->l_tree_depth = 0; 3178 el->l_tree_depth = 0;
@@ -3114,10 +3205,10 @@ out:
3114 * the rightmost tree leaf record is removed so the caller is 3205 * the rightmost tree leaf record is removed so the caller is
3115 * responsible for detecting and correcting that. 3206 * responsible for detecting and correcting that.
3116 */ 3207 */
3117static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, 3208static int ocfs2_rotate_tree_left(handle_t *handle,
3209 struct ocfs2_extent_tree *et,
3118 struct ocfs2_path *path, 3210 struct ocfs2_path *path,
3119 struct ocfs2_cached_dealloc_ctxt *dealloc, 3211 struct ocfs2_cached_dealloc_ctxt *dealloc)
3120 struct ocfs2_extent_tree *et)
3121{ 3212{
3122 int ret, orig_credits = handle->h_buffer_credits; 3213 int ret, orig_credits = handle->h_buffer_credits;
3123 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; 3214 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
@@ -3134,8 +3225,7 @@ rightmost_no_delete:
3134 * Inline extents. This is trivially handled, so do 3225 * Inline extents. This is trivially handled, so do
3135 * it up front. 3226 * it up front.
3136 */ 3227 */
3137 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, 3228 ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path);
3138 path);
3139 if (ret) 3229 if (ret)
3140 mlog_errno(ret); 3230 mlog_errno(ret);
3141 goto out; 3231 goto out;
@@ -3151,7 +3241,7 @@ rightmost_no_delete:
3151 * 3241 *
3152 * 1) is handled via ocfs2_rotate_rightmost_leaf_left() 3242 * 1) is handled via ocfs2_rotate_rightmost_leaf_left()
3153 * 2a) we need the left branch so that we can update it with the unlink 3243 * 2a) we need the left branch so that we can update it with the unlink
3154 * 2b) we need to bring the inode back to inline extents. 3244 * 2b) we need to bring the root back to inline extents.
3155 */ 3245 */
3156 3246
3157 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 3247 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
@@ -3167,9 +3257,9 @@ rightmost_no_delete:
3167 3257
3168 if (le16_to_cpu(el->l_next_free_rec) == 0) { 3258 if (le16_to_cpu(el->l_next_free_rec) == 0) {
3169 ret = -EIO; 3259 ret = -EIO;
3170 ocfs2_error(inode->i_sb, 3260 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
3171 "Inode %llu has empty extent block at %llu", 3261 "Owner %llu has empty extent block at %llu",
3172 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3262 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
3173 (unsigned long long)le64_to_cpu(eb->h_blkno)); 3263 (unsigned long long)le64_to_cpu(eb->h_blkno));
3174 goto out; 3264 goto out;
3175 } 3265 }
@@ -3183,8 +3273,8 @@ rightmost_no_delete:
3183 * nonempty list. 3273 * nonempty list.
3184 */ 3274 */
3185 3275
3186 ret = ocfs2_remove_rightmost_path(inode, handle, path, 3276 ret = ocfs2_remove_rightmost_path(handle, et, path,
3187 dealloc, et); 3277 dealloc);
3188 if (ret) 3278 if (ret)
3189 mlog_errno(ret); 3279 mlog_errno(ret);
3190 goto out; 3280 goto out;
@@ -3195,8 +3285,8 @@ rightmost_no_delete:
3195 * and restarting from there. 3285 * and restarting from there.
3196 */ 3286 */
3197try_rotate: 3287try_rotate:
3198 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, 3288 ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path,
3199 dealloc, &restart_path, et); 3289 dealloc, &restart_path);
3200 if (ret && ret != -EAGAIN) { 3290 if (ret && ret != -EAGAIN) {
3201 mlog_errno(ret); 3291 mlog_errno(ret);
3202 goto out; 3292 goto out;
@@ -3206,9 +3296,9 @@ try_rotate:
3206 tmp_path = restart_path; 3296 tmp_path = restart_path;
3207 restart_path = NULL; 3297 restart_path = NULL;
3208 3298
3209 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, 3299 ret = __ocfs2_rotate_tree_left(handle, et, orig_credits,
3210 tmp_path, dealloc, 3300 tmp_path, dealloc,
3211 &restart_path, et); 3301 &restart_path);
3212 if (ret && ret != -EAGAIN) { 3302 if (ret && ret != -EAGAIN) {
3213 mlog_errno(ret); 3303 mlog_errno(ret);
3214 goto out; 3304 goto out;
@@ -3259,7 +3349,7 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
3259 } 3349 }
3260} 3350}
3261 3351
3262static int ocfs2_get_right_path(struct inode *inode, 3352static int ocfs2_get_right_path(struct ocfs2_extent_tree *et,
3263 struct ocfs2_path *left_path, 3353 struct ocfs2_path *left_path,
3264 struct ocfs2_path **ret_right_path) 3354 struct ocfs2_path **ret_right_path)
3265{ 3355{
@@ -3276,8 +3366,8 @@ static int ocfs2_get_right_path(struct inode *inode,
3276 left_el = path_leaf_el(left_path); 3366 left_el = path_leaf_el(left_path);
3277 BUG_ON(left_el->l_next_free_rec != left_el->l_count); 3367 BUG_ON(left_el->l_next_free_rec != left_el->l_count);
3278 3368
3279 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, 3369 ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3280 &right_cpos); 3370 left_path, &right_cpos);
3281 if (ret) { 3371 if (ret) {
3282 mlog_errno(ret); 3372 mlog_errno(ret);
3283 goto out; 3373 goto out;
@@ -3293,7 +3383,7 @@ static int ocfs2_get_right_path(struct inode *inode,
3293 goto out; 3383 goto out;
3294 } 3384 }
3295 3385
3296 ret = ocfs2_find_path(inode, right_path, right_cpos); 3386 ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
3297 if (ret) { 3387 if (ret) {
3298 mlog_errno(ret); 3388 mlog_errno(ret);
3299 goto out; 3389 goto out;
@@ -3313,9 +3403,9 @@ out:
3313 * For index == l_count - 1, the "next" means the 1st extent rec of the 3403 * For index == l_count - 1, the "next" means the 1st extent rec of the
3314 * next extent block. 3404 * next extent block.
3315 */ 3405 */
3316static int ocfs2_merge_rec_right(struct inode *inode, 3406static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3317 struct ocfs2_path *left_path,
3318 handle_t *handle, 3407 handle_t *handle,
3408 struct ocfs2_extent_tree *et,
3319 struct ocfs2_extent_rec *split_rec, 3409 struct ocfs2_extent_rec *split_rec,
3320 int index) 3410 int index)
3321{ 3411{
@@ -3336,7 +3426,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3336 if (index == le16_to_cpu(el->l_next_free_rec) - 1 && 3426 if (index == le16_to_cpu(el->l_next_free_rec) - 1 &&
3337 le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { 3427 le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
3338 /* we meet with a cross extent block merge. */ 3428 /* we meet with a cross extent block merge. */
3339 ret = ocfs2_get_right_path(inode, left_path, &right_path); 3429 ret = ocfs2_get_right_path(et, left_path, &right_path);
3340 if (ret) { 3430 if (ret) {
3341 mlog_errno(ret); 3431 mlog_errno(ret);
3342 goto out; 3432 goto out;
@@ -3355,8 +3445,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3355 le16_to_cpu(left_rec->e_leaf_clusters) != 3445 le16_to_cpu(left_rec->e_leaf_clusters) !=
3356 le32_to_cpu(right_rec->e_cpos)); 3446 le32_to_cpu(right_rec->e_cpos));
3357 3447
3358 subtree_index = ocfs2_find_subtree_root(inode, 3448 subtree_index = ocfs2_find_subtree_root(et, left_path,
3359 left_path, right_path); 3449 right_path);
3360 3450
3361 ret = ocfs2_extend_rotate_transaction(handle, subtree_index, 3451 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
3362 handle->h_buffer_credits, 3452 handle->h_buffer_credits,
@@ -3369,7 +3459,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3369 root_bh = left_path->p_node[subtree_index].bh; 3459 root_bh = left_path->p_node[subtree_index].bh;
3370 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3460 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3371 3461
3372 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 3462 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
3373 subtree_index); 3463 subtree_index);
3374 if (ret) { 3464 if (ret) {
3375 mlog_errno(ret); 3465 mlog_errno(ret);
@@ -3378,14 +3468,14 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3378 3468
3379 for (i = subtree_index + 1; 3469 for (i = subtree_index + 1;
3380 i < path_num_items(right_path); i++) { 3470 i < path_num_items(right_path); i++) {
3381 ret = ocfs2_path_bh_journal_access(handle, inode, 3471 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3382 right_path, i); 3472 right_path, i);
3383 if (ret) { 3473 if (ret) {
3384 mlog_errno(ret); 3474 mlog_errno(ret);
3385 goto out; 3475 goto out;
3386 } 3476 }
3387 3477
3388 ret = ocfs2_path_bh_journal_access(handle, inode, 3478 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3389 left_path, i); 3479 left_path, i);
3390 if (ret) { 3480 if (ret) {
3391 mlog_errno(ret); 3481 mlog_errno(ret);
@@ -3398,7 +3488,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3398 right_rec = &el->l_recs[index + 1]; 3488 right_rec = &el->l_recs[index + 1];
3399 } 3489 }
3400 3490
3401 ret = ocfs2_path_bh_journal_access(handle, inode, left_path, 3491 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path,
3402 path_num_items(left_path) - 1); 3492 path_num_items(left_path) - 1);
3403 if (ret) { 3493 if (ret) {
3404 mlog_errno(ret); 3494 mlog_errno(ret);
@@ -3409,7 +3499,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3409 3499
3410 le32_add_cpu(&right_rec->e_cpos, -split_clusters); 3500 le32_add_cpu(&right_rec->e_cpos, -split_clusters);
3411 le64_add_cpu(&right_rec->e_blkno, 3501 le64_add_cpu(&right_rec->e_blkno,
3412 -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); 3502 -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
3503 split_clusters));
3413 le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); 3504 le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
3414 3505
3415 ocfs2_cleanup_merge(el, index); 3506 ocfs2_cleanup_merge(el, index);
@@ -3423,8 +3514,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3423 if (ret) 3514 if (ret)
3424 mlog_errno(ret); 3515 mlog_errno(ret);
3425 3516
3426 ocfs2_complete_edge_insert(inode, handle, left_path, 3517 ocfs2_complete_edge_insert(handle, left_path, right_path,
3427 right_path, subtree_index); 3518 subtree_index);
3428 } 3519 }
3429out: 3520out:
3430 if (right_path) 3521 if (right_path)
@@ -3432,7 +3523,7 @@ out:
3432 return ret; 3523 return ret;
3433} 3524}
3434 3525
3435static int ocfs2_get_left_path(struct inode *inode, 3526static int ocfs2_get_left_path(struct ocfs2_extent_tree *et,
3436 struct ocfs2_path *right_path, 3527 struct ocfs2_path *right_path,
3437 struct ocfs2_path **ret_left_path) 3528 struct ocfs2_path **ret_left_path)
3438{ 3529{
@@ -3445,7 +3536,7 @@ static int ocfs2_get_left_path(struct inode *inode,
3445 /* This function shouldn't be called for non-trees. */ 3536 /* This function shouldn't be called for non-trees. */
3446 BUG_ON(right_path->p_tree_depth == 0); 3537 BUG_ON(right_path->p_tree_depth == 0);
3447 3538
3448 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, 3539 ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3449 right_path, &left_cpos); 3540 right_path, &left_cpos);
3450 if (ret) { 3541 if (ret) {
3451 mlog_errno(ret); 3542 mlog_errno(ret);
@@ -3462,7 +3553,7 @@ static int ocfs2_get_left_path(struct inode *inode,
3462 goto out; 3553 goto out;
3463 } 3554 }
3464 3555
3465 ret = ocfs2_find_path(inode, left_path, left_cpos); 3556 ret = ocfs2_find_path(et->et_ci, left_path, left_cpos);
3466 if (ret) { 3557 if (ret) {
3467 mlog_errno(ret); 3558 mlog_errno(ret);
3468 goto out; 3559 goto out;
@@ -3485,12 +3576,11 @@ out:
3485 * remove the rightmost leaf extent block in the right_path and change 3576 * remove the rightmost leaf extent block in the right_path and change
3486 * the right path to indicate the new rightmost path. 3577 * the right path to indicate the new rightmost path.
3487 */ 3578 */
3488static int ocfs2_merge_rec_left(struct inode *inode, 3579static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3489 struct ocfs2_path *right_path,
3490 handle_t *handle, 3580 handle_t *handle,
3581 struct ocfs2_extent_tree *et,
3491 struct ocfs2_extent_rec *split_rec, 3582 struct ocfs2_extent_rec *split_rec,
3492 struct ocfs2_cached_dealloc_ctxt *dealloc, 3583 struct ocfs2_cached_dealloc_ctxt *dealloc,
3493 struct ocfs2_extent_tree *et,
3494 int index) 3584 int index)
3495{ 3585{
3496 int ret, i, subtree_index = 0, has_empty_extent = 0; 3586 int ret, i, subtree_index = 0, has_empty_extent = 0;
@@ -3508,7 +3598,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3508 right_rec = &el->l_recs[index]; 3598 right_rec = &el->l_recs[index];
3509 if (index == 0) { 3599 if (index == 0) {
3510 /* we meet with a cross extent block merge. */ 3600 /* we meet with a cross extent block merge. */
3511 ret = ocfs2_get_left_path(inode, right_path, &left_path); 3601 ret = ocfs2_get_left_path(et, right_path, &left_path);
3512 if (ret) { 3602 if (ret) {
3513 mlog_errno(ret); 3603 mlog_errno(ret);
3514 goto out; 3604 goto out;
@@ -3524,8 +3614,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3524 le16_to_cpu(left_rec->e_leaf_clusters) != 3614 le16_to_cpu(left_rec->e_leaf_clusters) !=
3525 le32_to_cpu(split_rec->e_cpos)); 3615 le32_to_cpu(split_rec->e_cpos));
3526 3616
3527 subtree_index = ocfs2_find_subtree_root(inode, 3617 subtree_index = ocfs2_find_subtree_root(et, left_path,
3528 left_path, right_path); 3618 right_path);
3529 3619
3530 ret = ocfs2_extend_rotate_transaction(handle, subtree_index, 3620 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
3531 handle->h_buffer_credits, 3621 handle->h_buffer_credits,
@@ -3538,7 +3628,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3538 root_bh = left_path->p_node[subtree_index].bh; 3628 root_bh = left_path->p_node[subtree_index].bh;
3539 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3629 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3540 3630
3541 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 3631 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
3542 subtree_index); 3632 subtree_index);
3543 if (ret) { 3633 if (ret) {
3544 mlog_errno(ret); 3634 mlog_errno(ret);
@@ -3547,14 +3637,14 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3547 3637
3548 for (i = subtree_index + 1; 3638 for (i = subtree_index + 1;
3549 i < path_num_items(right_path); i++) { 3639 i < path_num_items(right_path); i++) {
3550 ret = ocfs2_path_bh_journal_access(handle, inode, 3640 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3551 right_path, i); 3641 right_path, i);
3552 if (ret) { 3642 if (ret) {
3553 mlog_errno(ret); 3643 mlog_errno(ret);
3554 goto out; 3644 goto out;
3555 } 3645 }
3556 3646
3557 ret = ocfs2_path_bh_journal_access(handle, inode, 3647 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3558 left_path, i); 3648 left_path, i);
3559 if (ret) { 3649 if (ret) {
3560 mlog_errno(ret); 3650 mlog_errno(ret);
@@ -3567,7 +3657,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3567 has_empty_extent = 1; 3657 has_empty_extent = 1;
3568 } 3658 }
3569 3659
3570 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 3660 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
3571 path_num_items(right_path) - 1); 3661 path_num_items(right_path) - 1);
3572 if (ret) { 3662 if (ret) {
3573 mlog_errno(ret); 3663 mlog_errno(ret);
@@ -3586,7 +3676,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3586 3676
3587 le32_add_cpu(&right_rec->e_cpos, split_clusters); 3677 le32_add_cpu(&right_rec->e_cpos, split_clusters);
3588 le64_add_cpu(&right_rec->e_blkno, 3678 le64_add_cpu(&right_rec->e_blkno,
3589 ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); 3679 ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
3680 split_clusters));
3590 le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); 3681 le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
3591 3682
3592 ocfs2_cleanup_merge(el, index); 3683 ocfs2_cleanup_merge(el, index);
@@ -3608,9 +3699,9 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3608 if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && 3699 if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
3609 le16_to_cpu(el->l_next_free_rec) == 1) { 3700 le16_to_cpu(el->l_next_free_rec) == 1) {
3610 3701
3611 ret = ocfs2_remove_rightmost_path(inode, handle, 3702 ret = ocfs2_remove_rightmost_path(handle, et,
3612 right_path, 3703 right_path,
3613 dealloc, et); 3704 dealloc);
3614 if (ret) { 3705 if (ret) {
3615 mlog_errno(ret); 3706 mlog_errno(ret);
3616 goto out; 3707 goto out;
@@ -3622,7 +3713,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3622 ocfs2_mv_path(right_path, left_path); 3713 ocfs2_mv_path(right_path, left_path);
3623 left_path = NULL; 3714 left_path = NULL;
3624 } else 3715 } else
3625 ocfs2_complete_edge_insert(inode, handle, left_path, 3716 ocfs2_complete_edge_insert(handle, left_path,
3626 right_path, subtree_index); 3717 right_path, subtree_index);
3627 } 3718 }
3628out: 3719out:
@@ -3631,15 +3722,13 @@ out:
3631 return ret; 3722 return ret;
3632} 3723}
3633 3724
3634static int ocfs2_try_to_merge_extent(struct inode *inode, 3725static int ocfs2_try_to_merge_extent(handle_t *handle,
3635 handle_t *handle, 3726 struct ocfs2_extent_tree *et,
3636 struct ocfs2_path *path, 3727 struct ocfs2_path *path,
3637 int split_index, 3728 int split_index,
3638 struct ocfs2_extent_rec *split_rec, 3729 struct ocfs2_extent_rec *split_rec,
3639 struct ocfs2_cached_dealloc_ctxt *dealloc, 3730 struct ocfs2_cached_dealloc_ctxt *dealloc,
3640 struct ocfs2_merge_ctxt *ctxt, 3731 struct ocfs2_merge_ctxt *ctxt)
3641 struct ocfs2_extent_tree *et)
3642
3643{ 3732{
3644 int ret = 0; 3733 int ret = 0;
3645 struct ocfs2_extent_list *el = path_leaf_el(path); 3734 struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -3655,8 +3744,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3655 * extents - having more than one in a leaf is 3744 * extents - having more than one in a leaf is
3656 * illegal. 3745 * illegal.
3657 */ 3746 */
3658 ret = ocfs2_rotate_tree_left(inode, handle, path, 3747 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
3659 dealloc, et);
3660 if (ret) { 3748 if (ret) {
3661 mlog_errno(ret); 3749 mlog_errno(ret);
3662 goto out; 3750 goto out;
@@ -3685,8 +3773,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3685 * prevoius extent block. It is more efficient and easier 3773 * prevoius extent block. It is more efficient and easier
3686 * if we do merge_right first and merge_left later. 3774 * if we do merge_right first and merge_left later.
3687 */ 3775 */
3688 ret = ocfs2_merge_rec_right(inode, path, 3776 ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
3689 handle, split_rec,
3690 split_index); 3777 split_index);
3691 if (ret) { 3778 if (ret) {
3692 mlog_errno(ret); 3779 mlog_errno(ret);
@@ -3699,8 +3786,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3699 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); 3786 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
3700 3787
3701 /* The merge left us with an empty extent, remove it. */ 3788 /* The merge left us with an empty extent, remove it. */
3702 ret = ocfs2_rotate_tree_left(inode, handle, path, 3789 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
3703 dealloc, et);
3704 if (ret) { 3790 if (ret) {
3705 mlog_errno(ret); 3791 mlog_errno(ret);
3706 goto out; 3792 goto out;
@@ -3712,18 +3798,15 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3712 * Note that we don't pass split_rec here on purpose - 3798 * Note that we don't pass split_rec here on purpose -
3713 * we've merged it into the rec already. 3799 * we've merged it into the rec already.
3714 */ 3800 */
3715 ret = ocfs2_merge_rec_left(inode, path, 3801 ret = ocfs2_merge_rec_left(path, handle, et, rec,
3716 handle, rec, 3802 dealloc, split_index);
3717 dealloc, et,
3718 split_index);
3719 3803
3720 if (ret) { 3804 if (ret) {
3721 mlog_errno(ret); 3805 mlog_errno(ret);
3722 goto out; 3806 goto out;
3723 } 3807 }
3724 3808
3725 ret = ocfs2_rotate_tree_left(inode, handle, path, 3809 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
3726 dealloc, et);
3727 /* 3810 /*
3728 * Error from this last rotate is not critical, so 3811 * Error from this last rotate is not critical, so
3729 * print but don't bubble it up. 3812 * print but don't bubble it up.
@@ -3740,19 +3823,16 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3740 * the record on the left (hence the left merge). 3823 * the record on the left (hence the left merge).
3741 */ 3824 */
3742 if (ctxt->c_contig_type == CONTIG_RIGHT) { 3825 if (ctxt->c_contig_type == CONTIG_RIGHT) {
3743 ret = ocfs2_merge_rec_left(inode, 3826 ret = ocfs2_merge_rec_left(path, handle, et,
3744 path, 3827 split_rec, dealloc,
3745 handle, split_rec,
3746 dealloc, et,
3747 split_index); 3828 split_index);
3748 if (ret) { 3829 if (ret) {
3749 mlog_errno(ret); 3830 mlog_errno(ret);
3750 goto out; 3831 goto out;
3751 } 3832 }
3752 } else { 3833 } else {
3753 ret = ocfs2_merge_rec_right(inode, 3834 ret = ocfs2_merge_rec_right(path, handle,
3754 path, 3835 et, split_rec,
3755 handle, split_rec,
3756 split_index); 3836 split_index);
3757 if (ret) { 3837 if (ret) {
3758 mlog_errno(ret); 3838 mlog_errno(ret);
@@ -3765,8 +3845,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3765 * The merge may have left an empty extent in 3845 * The merge may have left an empty extent in
3766 * our leaf. Try to rotate it away. 3846 * our leaf. Try to rotate it away.
3767 */ 3847 */
3768 ret = ocfs2_rotate_tree_left(inode, handle, path, 3848 ret = ocfs2_rotate_tree_left(handle, et, path,
3769 dealloc, et); 3849 dealloc);
3770 if (ret) 3850 if (ret)
3771 mlog_errno(ret); 3851 mlog_errno(ret);
3772 ret = 0; 3852 ret = 0;
@@ -3812,10 +3892,10 @@ static void ocfs2_subtract_from_rec(struct super_block *sb,
3812 * list. If this leaf is part of an allocation tree, it is assumed 3892 * list. If this leaf is part of an allocation tree, it is assumed
3813 * that the tree above has been prepared. 3893 * that the tree above has been prepared.
3814 */ 3894 */
3815static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, 3895static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
3896 struct ocfs2_extent_rec *insert_rec,
3816 struct ocfs2_extent_list *el, 3897 struct ocfs2_extent_list *el,
3817 struct ocfs2_insert_type *insert, 3898 struct ocfs2_insert_type *insert)
3818 struct inode *inode)
3819{ 3899{
3820 int i = insert->ins_contig_index; 3900 int i = insert->ins_contig_index;
3821 unsigned int range; 3901 unsigned int range;
@@ -3827,7 +3907,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
3827 i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); 3907 i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
3828 BUG_ON(i == -1); 3908 BUG_ON(i == -1);
3829 rec = &el->l_recs[i]; 3909 rec = &el->l_recs[i];
3830 ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, 3910 ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
3911 insert->ins_split, rec,
3831 insert_rec); 3912 insert_rec);
3832 goto rotate; 3913 goto rotate;
3833 } 3914 }
@@ -3869,10 +3950,10 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
3869 3950
3870 mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= 3951 mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
3871 le16_to_cpu(el->l_count), 3952 le16_to_cpu(el->l_count),
3872 "inode %lu, depth %u, count %u, next free %u, " 3953 "owner %llu, depth %u, count %u, next free %u, "
3873 "rec.cpos %u, rec.clusters %u, " 3954 "rec.cpos %u, rec.clusters %u, "
3874 "insert.cpos %u, insert.clusters %u\n", 3955 "insert.cpos %u, insert.clusters %u\n",
3875 inode->i_ino, 3956 ocfs2_metadata_cache_owner(et->et_ci),
3876 le16_to_cpu(el->l_tree_depth), 3957 le16_to_cpu(el->l_tree_depth),
3877 le16_to_cpu(el->l_count), 3958 le16_to_cpu(el->l_count),
3878 le16_to_cpu(el->l_next_free_rec), 3959 le16_to_cpu(el->l_next_free_rec),
@@ -3900,8 +3981,8 @@ rotate:
3900 ocfs2_rotate_leaf(el, insert_rec); 3981 ocfs2_rotate_leaf(el, insert_rec);
3901} 3982}
3902 3983
3903static void ocfs2_adjust_rightmost_records(struct inode *inode, 3984static void ocfs2_adjust_rightmost_records(handle_t *handle,
3904 handle_t *handle, 3985 struct ocfs2_extent_tree *et,
3905 struct ocfs2_path *path, 3986 struct ocfs2_path *path,
3906 struct ocfs2_extent_rec *insert_rec) 3987 struct ocfs2_extent_rec *insert_rec)
3907{ 3988{
@@ -3919,9 +4000,9 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
3919 4000
3920 next_free = le16_to_cpu(el->l_next_free_rec); 4001 next_free = le16_to_cpu(el->l_next_free_rec);
3921 if (next_free == 0) { 4002 if (next_free == 0) {
3922 ocfs2_error(inode->i_sb, 4003 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
3923 "Dinode %llu has a bad extent list", 4004 "Owner %llu has a bad extent list",
3924 (unsigned long long)OCFS2_I(inode)->ip_blkno); 4005 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
3925 ret = -EIO; 4006 ret = -EIO;
3926 return; 4007 return;
3927 } 4008 }
@@ -3941,7 +4022,8 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
3941 } 4022 }
3942} 4023}
3943 4024
3944static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, 4025static int ocfs2_append_rec_to_path(handle_t *handle,
4026 struct ocfs2_extent_tree *et,
3945 struct ocfs2_extent_rec *insert_rec, 4027 struct ocfs2_extent_rec *insert_rec,
3946 struct ocfs2_path *right_path, 4028 struct ocfs2_path *right_path,
3947 struct ocfs2_path **ret_left_path) 4029 struct ocfs2_path **ret_left_path)
@@ -3969,8 +4051,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
3969 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { 4051 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
3970 u32 left_cpos; 4052 u32 left_cpos;
3971 4053
3972 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 4054 ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3973 &left_cpos); 4055 right_path, &left_cpos);
3974 if (ret) { 4056 if (ret) {
3975 mlog_errno(ret); 4057 mlog_errno(ret);
3976 goto out; 4058 goto out;
@@ -3992,7 +4074,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
3992 goto out; 4074 goto out;
3993 } 4075 }
3994 4076
3995 ret = ocfs2_find_path(inode, left_path, left_cpos); 4077 ret = ocfs2_find_path(et->et_ci, left_path,
4078 left_cpos);
3996 if (ret) { 4079 if (ret) {
3997 mlog_errno(ret); 4080 mlog_errno(ret);
3998 goto out; 4081 goto out;
@@ -4005,13 +4088,13 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
4005 } 4088 }
4006 } 4089 }
4007 4090
4008 ret = ocfs2_journal_access_path(inode, handle, right_path); 4091 ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
4009 if (ret) { 4092 if (ret) {
4010 mlog_errno(ret); 4093 mlog_errno(ret);
4011 goto out; 4094 goto out;
4012 } 4095 }
4013 4096
4014 ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec); 4097 ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec);
4015 4098
4016 *ret_left_path = left_path; 4099 *ret_left_path = left_path;
4017 ret = 0; 4100 ret = 0;
@@ -4022,7 +4105,7 @@ out:
4022 return ret; 4105 return ret;
4023} 4106}
4024 4107
4025static void ocfs2_split_record(struct inode *inode, 4108static void ocfs2_split_record(struct ocfs2_extent_tree *et,
4026 struct ocfs2_path *left_path, 4109 struct ocfs2_path *left_path,
4027 struct ocfs2_path *right_path, 4110 struct ocfs2_path *right_path,
4028 struct ocfs2_extent_rec *split_rec, 4111 struct ocfs2_extent_rec *split_rec,
@@ -4095,7 +4178,8 @@ static void ocfs2_split_record(struct inode *inode,
4095 } 4178 }
4096 4179
4097 rec = &el->l_recs[index]; 4180 rec = &el->l_recs[index];
4098 ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); 4181 ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
4182 split, rec, split_rec);
4099 ocfs2_rotate_leaf(insert_el, split_rec); 4183 ocfs2_rotate_leaf(insert_el, split_rec);
4100} 4184}
4101 4185
@@ -4107,8 +4191,8 @@ static void ocfs2_split_record(struct inode *inode,
4107 * in. left_path should only be passed in if we need to update that 4191 * in. left_path should only be passed in if we need to update that
4108 * portion of the tree after an edge insert. 4192 * portion of the tree after an edge insert.
4109 */ 4193 */
4110static int ocfs2_insert_path(struct inode *inode, 4194static int ocfs2_insert_path(handle_t *handle,
4111 handle_t *handle, 4195 struct ocfs2_extent_tree *et,
4112 struct ocfs2_path *left_path, 4196 struct ocfs2_path *left_path,
4113 struct ocfs2_path *right_path, 4197 struct ocfs2_path *right_path,
4114 struct ocfs2_extent_rec *insert_rec, 4198 struct ocfs2_extent_rec *insert_rec,
@@ -4134,7 +4218,7 @@ static int ocfs2_insert_path(struct inode *inode,
4134 goto out; 4218 goto out;
4135 } 4219 }
4136 4220
4137 ret = ocfs2_journal_access_path(inode, handle, left_path); 4221 ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
4138 if (ret < 0) { 4222 if (ret < 0) {
4139 mlog_errno(ret); 4223 mlog_errno(ret);
4140 goto out; 4224 goto out;
@@ -4145,7 +4229,7 @@ static int ocfs2_insert_path(struct inode *inode,
4145 * Pass both paths to the journal. The majority of inserts 4229 * Pass both paths to the journal. The majority of inserts
4146 * will be touching all components anyway. 4230 * will be touching all components anyway.
4147 */ 4231 */
4148 ret = ocfs2_journal_access_path(inode, handle, right_path); 4232 ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
4149 if (ret < 0) { 4233 if (ret < 0) {
4150 mlog_errno(ret); 4234 mlog_errno(ret);
4151 goto out; 4235 goto out;
@@ -4157,7 +4241,7 @@ static int ocfs2_insert_path(struct inode *inode,
4157 * of splits, but it's easier to just let one separate 4241 * of splits, but it's easier to just let one separate
4158 * function sort it all out. 4242 * function sort it all out.
4159 */ 4243 */
4160 ocfs2_split_record(inode, left_path, right_path, 4244 ocfs2_split_record(et, left_path, right_path,
4161 insert_rec, insert->ins_split); 4245 insert_rec, insert->ins_split);
4162 4246
4163 /* 4247 /*
@@ -4171,8 +4255,8 @@ static int ocfs2_insert_path(struct inode *inode,
4171 if (ret) 4255 if (ret)
4172 mlog_errno(ret); 4256 mlog_errno(ret);
4173 } else 4257 } else
4174 ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), 4258 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
4175 insert, inode); 4259 insert);
4176 4260
4177 ret = ocfs2_journal_dirty(handle, leaf_bh); 4261 ret = ocfs2_journal_dirty(handle, leaf_bh);
4178 if (ret) 4262 if (ret)
@@ -4185,10 +4269,10 @@ static int ocfs2_insert_path(struct inode *inode,
4185 * 4269 *
4186 * XXX: Should we extend the transaction here? 4270 * XXX: Should we extend the transaction here?
4187 */ 4271 */
4188 subtree_index = ocfs2_find_subtree_root(inode, left_path, 4272 subtree_index = ocfs2_find_subtree_root(et, left_path,
4189 right_path); 4273 right_path);
4190 ocfs2_complete_edge_insert(inode, handle, left_path, 4274 ocfs2_complete_edge_insert(handle, left_path, right_path,
4191 right_path, subtree_index); 4275 subtree_index);
4192 } 4276 }
4193 4277
4194 ret = 0; 4278 ret = 0;
@@ -4196,8 +4280,7 @@ out:
4196 return ret; 4280 return ret;
4197} 4281}
4198 4282
4199static int ocfs2_do_insert_extent(struct inode *inode, 4283static int ocfs2_do_insert_extent(handle_t *handle,
4200 handle_t *handle,
4201 struct ocfs2_extent_tree *et, 4284 struct ocfs2_extent_tree *et,
4202 struct ocfs2_extent_rec *insert_rec, 4285 struct ocfs2_extent_rec *insert_rec,
4203 struct ocfs2_insert_type *type) 4286 struct ocfs2_insert_type *type)
@@ -4210,7 +4293,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4210 4293
4211 el = et->et_root_el; 4294 el = et->et_root_el;
4212 4295
4213 ret = ocfs2_et_root_journal_access(handle, inode, et, 4296 ret = ocfs2_et_root_journal_access(handle, et,
4214 OCFS2_JOURNAL_ACCESS_WRITE); 4297 OCFS2_JOURNAL_ACCESS_WRITE);
4215 if (ret) { 4298 if (ret) {
4216 mlog_errno(ret); 4299 mlog_errno(ret);
@@ -4218,7 +4301,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4218 } 4301 }
4219 4302
4220 if (le16_to_cpu(el->l_tree_depth) == 0) { 4303 if (le16_to_cpu(el->l_tree_depth) == 0) {
4221 ocfs2_insert_at_leaf(insert_rec, el, type, inode); 4304 ocfs2_insert_at_leaf(et, insert_rec, el, type);
4222 goto out_update_clusters; 4305 goto out_update_clusters;
4223 } 4306 }
4224 4307
@@ -4241,7 +4324,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4241 cpos = UINT_MAX; 4324 cpos = UINT_MAX;
4242 } 4325 }
4243 4326
4244 ret = ocfs2_find_path(inode, right_path, cpos); 4327 ret = ocfs2_find_path(et->et_ci, right_path, cpos);
4245 if (ret) { 4328 if (ret) {
4246 mlog_errno(ret); 4329 mlog_errno(ret);
4247 goto out; 4330 goto out;
@@ -4260,7 +4343,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4260 * can wind up skipping both of these two special cases... 4343 * can wind up skipping both of these two special cases...
4261 */ 4344 */
4262 if (rotate) { 4345 if (rotate) {
4263 ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, 4346 ret = ocfs2_rotate_tree_right(handle, et, type->ins_split,
4264 le32_to_cpu(insert_rec->e_cpos), 4347 le32_to_cpu(insert_rec->e_cpos),
4265 right_path, &left_path); 4348 right_path, &left_path);
4266 if (ret) { 4349 if (ret) {
@@ -4272,7 +4355,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4272 * ocfs2_rotate_tree_right() might have extended the 4355 * ocfs2_rotate_tree_right() might have extended the
4273 * transaction without re-journaling our tree root. 4356 * transaction without re-journaling our tree root.
4274 */ 4357 */
4275 ret = ocfs2_et_root_journal_access(handle, inode, et, 4358 ret = ocfs2_et_root_journal_access(handle, et,
4276 OCFS2_JOURNAL_ACCESS_WRITE); 4359 OCFS2_JOURNAL_ACCESS_WRITE);
4277 if (ret) { 4360 if (ret) {
4278 mlog_errno(ret); 4361 mlog_errno(ret);
@@ -4280,7 +4363,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4280 } 4363 }
4281 } else if (type->ins_appending == APPEND_TAIL 4364 } else if (type->ins_appending == APPEND_TAIL
4282 && type->ins_contig != CONTIG_LEFT) { 4365 && type->ins_contig != CONTIG_LEFT) {
4283 ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, 4366 ret = ocfs2_append_rec_to_path(handle, et, insert_rec,
4284 right_path, &left_path); 4367 right_path, &left_path);
4285 if (ret) { 4368 if (ret) {
4286 mlog_errno(ret); 4369 mlog_errno(ret);
@@ -4288,7 +4371,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4288 } 4371 }
4289 } 4372 }
4290 4373
4291 ret = ocfs2_insert_path(inode, handle, left_path, right_path, 4374 ret = ocfs2_insert_path(handle, et, left_path, right_path,
4292 insert_rec, type); 4375 insert_rec, type);
4293 if (ret) { 4376 if (ret) {
4294 mlog_errno(ret); 4377 mlog_errno(ret);
@@ -4297,7 +4380,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4297 4380
4298out_update_clusters: 4381out_update_clusters:
4299 if (type->ins_split == SPLIT_NONE) 4382 if (type->ins_split == SPLIT_NONE)
4300 ocfs2_et_update_clusters(inode, et, 4383 ocfs2_et_update_clusters(et,
4301 le16_to_cpu(insert_rec->e_leaf_clusters)); 4384 le16_to_cpu(insert_rec->e_leaf_clusters));
4302 4385
4303 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 4386 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4312,7 +4395,8 @@ out:
4312} 4395}
4313 4396
4314static enum ocfs2_contig_type 4397static enum ocfs2_contig_type
4315ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, 4398ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4399 struct ocfs2_path *path,
4316 struct ocfs2_extent_list *el, int index, 4400 struct ocfs2_extent_list *el, int index,
4317 struct ocfs2_extent_rec *split_rec) 4401 struct ocfs2_extent_rec *split_rec)
4318{ 4402{
@@ -4324,12 +4408,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4324 struct ocfs2_path *left_path = NULL, *right_path = NULL; 4408 struct ocfs2_path *left_path = NULL, *right_path = NULL;
4325 struct buffer_head *bh; 4409 struct buffer_head *bh;
4326 struct ocfs2_extent_block *eb; 4410 struct ocfs2_extent_block *eb;
4411 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
4327 4412
4328 if (index > 0) { 4413 if (index > 0) {
4329 rec = &el->l_recs[index - 1]; 4414 rec = &el->l_recs[index - 1];
4330 } else if (path->p_tree_depth > 0) { 4415 } else if (path->p_tree_depth > 0) {
4331 status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, 4416 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
4332 path, &left_cpos);
4333 if (status) 4417 if (status)
4334 goto out; 4418 goto out;
4335 4419
@@ -4338,7 +4422,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4338 if (!left_path) 4422 if (!left_path)
4339 goto out; 4423 goto out;
4340 4424
4341 status = ocfs2_find_path(inode, left_path, left_cpos); 4425 status = ocfs2_find_path(et->et_ci, left_path,
4426 left_cpos);
4342 if (status) 4427 if (status)
4343 goto out; 4428 goto out;
4344 4429
@@ -4348,7 +4433,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4348 le16_to_cpu(new_el->l_count)) { 4433 le16_to_cpu(new_el->l_count)) {
4349 bh = path_leaf_bh(left_path); 4434 bh = path_leaf_bh(left_path);
4350 eb = (struct ocfs2_extent_block *)bh->b_data; 4435 eb = (struct ocfs2_extent_block *)bh->b_data;
4351 ocfs2_error(inode->i_sb, 4436 ocfs2_error(sb,
4352 "Extent block #%llu has an " 4437 "Extent block #%llu has an "
4353 "invalid l_next_free_rec of " 4438 "invalid l_next_free_rec of "
4354 "%d. It should have " 4439 "%d. It should have "
@@ -4373,7 +4458,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4373 if (split_rec->e_cpos == el->l_recs[index].e_cpos) 4458 if (split_rec->e_cpos == el->l_recs[index].e_cpos)
4374 ret = CONTIG_RIGHT; 4459 ret = CONTIG_RIGHT;
4375 } else { 4460 } else {
4376 ret = ocfs2_extent_contig(inode, rec, split_rec); 4461 ret = ocfs2_et_extent_contig(et, rec, split_rec);
4377 } 4462 }
4378 } 4463 }
4379 4464
@@ -4382,8 +4467,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4382 rec = &el->l_recs[index + 1]; 4467 rec = &el->l_recs[index + 1];
4383 else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && 4468 else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
4384 path->p_tree_depth > 0) { 4469 path->p_tree_depth > 0) {
4385 status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, 4470 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
4386 path, &right_cpos);
4387 if (status) 4471 if (status)
4388 goto out; 4472 goto out;
4389 4473
@@ -4394,7 +4478,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4394 if (!right_path) 4478 if (!right_path)
4395 goto out; 4479 goto out;
4396 4480
4397 status = ocfs2_find_path(inode, right_path, right_cpos); 4481 status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
4398 if (status) 4482 if (status)
4399 goto out; 4483 goto out;
4400 4484
@@ -4404,7 +4488,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4404 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { 4488 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
4405 bh = path_leaf_bh(right_path); 4489 bh = path_leaf_bh(right_path);
4406 eb = (struct ocfs2_extent_block *)bh->b_data; 4490 eb = (struct ocfs2_extent_block *)bh->b_data;
4407 ocfs2_error(inode->i_sb, 4491 ocfs2_error(sb,
4408 "Extent block #%llu has an " 4492 "Extent block #%llu has an "
4409 "invalid l_next_free_rec of %d", 4493 "invalid l_next_free_rec of %d",
4410 (unsigned long long)le64_to_cpu(eb->h_blkno), 4494 (unsigned long long)le64_to_cpu(eb->h_blkno),
@@ -4419,7 +4503,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4419 if (rec) { 4503 if (rec) {
4420 enum ocfs2_contig_type contig_type; 4504 enum ocfs2_contig_type contig_type;
4421 4505
4422 contig_type = ocfs2_extent_contig(inode, rec, split_rec); 4506 contig_type = ocfs2_et_extent_contig(et, rec, split_rec);
4423 4507
4424 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) 4508 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
4425 ret = CONTIG_LEFTRIGHT; 4509 ret = CONTIG_LEFTRIGHT;
@@ -4436,11 +4520,10 @@ out:
4436 return ret; 4520 return ret;
4437} 4521}
4438 4522
4439static void ocfs2_figure_contig_type(struct inode *inode, 4523static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
4440 struct ocfs2_insert_type *insert, 4524 struct ocfs2_insert_type *insert,
4441 struct ocfs2_extent_list *el, 4525 struct ocfs2_extent_list *el,
4442 struct ocfs2_extent_rec *insert_rec, 4526 struct ocfs2_extent_rec *insert_rec)
4443 struct ocfs2_extent_tree *et)
4444{ 4527{
4445 int i; 4528 int i;
4446 enum ocfs2_contig_type contig_type = CONTIG_NONE; 4529 enum ocfs2_contig_type contig_type = CONTIG_NONE;
@@ -4448,8 +4531,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
4448 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); 4531 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
4449 4532
4450 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 4533 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
4451 contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], 4534 contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i],
4452 insert_rec); 4535 insert_rec);
4453 if (contig_type != CONTIG_NONE) { 4536 if (contig_type != CONTIG_NONE) {
4454 insert->ins_contig_index = i; 4537 insert->ins_contig_index = i;
4455 break; 4538 break;
@@ -4530,8 +4613,7 @@ set_tail_append:
4530 * All of the information is stored on the ocfs2_insert_type 4613 * All of the information is stored on the ocfs2_insert_type
4531 * structure. 4614 * structure.
4532 */ 4615 */
4533static int ocfs2_figure_insert_type(struct inode *inode, 4616static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
4534 struct ocfs2_extent_tree *et,
4535 struct buffer_head **last_eb_bh, 4617 struct buffer_head **last_eb_bh,
4536 struct ocfs2_extent_rec *insert_rec, 4618 struct ocfs2_extent_rec *insert_rec,
4537 int *free_records, 4619 int *free_records,
@@ -4555,7 +4637,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4555 * ocfs2_figure_insert_type() and ocfs2_add_branch() 4637 * ocfs2_figure_insert_type() and ocfs2_add_branch()
4556 * may want it later. 4638 * may want it later.
4557 */ 4639 */
4558 ret = ocfs2_read_extent_block(inode, 4640 ret = ocfs2_read_extent_block(et->et_ci,
4559 ocfs2_et_get_last_eb_blk(et), 4641 ocfs2_et_get_last_eb_blk(et),
4560 &bh); 4642 &bh);
4561 if (ret) { 4643 if (ret) {
@@ -4578,7 +4660,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4578 le16_to_cpu(el->l_next_free_rec); 4660 le16_to_cpu(el->l_next_free_rec);
4579 4661
4580 if (!insert->ins_tree_depth) { 4662 if (!insert->ins_tree_depth) {
4581 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); 4663 ocfs2_figure_contig_type(et, insert, el, insert_rec);
4582 ocfs2_figure_appending_type(insert, el, insert_rec); 4664 ocfs2_figure_appending_type(insert, el, insert_rec);
4583 return 0; 4665 return 0;
4584 } 4666 }
@@ -4596,7 +4678,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4596 * us the rightmost tree path. This is accounted for below in 4678 * us the rightmost tree path. This is accounted for below in
4597 * the appending code. 4679 * the appending code.
4598 */ 4680 */
4599 ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos)); 4681 ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos));
4600 if (ret) { 4682 if (ret) {
4601 mlog_errno(ret); 4683 mlog_errno(ret);
4602 goto out; 4684 goto out;
@@ -4612,7 +4694,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4612 * into two types of appends: simple record append, or a 4694 * into two types of appends: simple record append, or a
4613 * rotate inside the tail leaf. 4695 * rotate inside the tail leaf.
4614 */ 4696 */
4615 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); 4697 ocfs2_figure_contig_type(et, insert, el, insert_rec);
4616 4698
4617 /* 4699 /*
4618 * The insert code isn't quite ready to deal with all cases of 4700 * The insert code isn't quite ready to deal with all cases of
@@ -4657,13 +4739,11 @@ out:
4657} 4739}
4658 4740
4659/* 4741/*
4660 * Insert an extent into an inode btree. 4742 * Insert an extent into a btree.
4661 * 4743 *
4662 * The caller needs to update fe->i_clusters 4744 * The caller needs to update the owning btree's cluster count.
4663 */ 4745 */
4664int ocfs2_insert_extent(struct ocfs2_super *osb, 4746int ocfs2_insert_extent(handle_t *handle,
4665 handle_t *handle,
4666 struct inode *inode,
4667 struct ocfs2_extent_tree *et, 4747 struct ocfs2_extent_tree *et,
4668 u32 cpos, 4748 u32 cpos,
4669 u64 start_blk, 4749 u64 start_blk,
@@ -4677,21 +4757,22 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4677 struct ocfs2_insert_type insert = {0, }; 4757 struct ocfs2_insert_type insert = {0, };
4678 struct ocfs2_extent_rec rec; 4758 struct ocfs2_extent_rec rec;
4679 4759
4680 mlog(0, "add %u clusters at position %u to inode %llu\n", 4760 mlog(0, "add %u clusters at position %u to owner %llu\n",
4681 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4761 new_clusters, cpos,
4762 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
4682 4763
4683 memset(&rec, 0, sizeof(rec)); 4764 memset(&rec, 0, sizeof(rec));
4684 rec.e_cpos = cpu_to_le32(cpos); 4765 rec.e_cpos = cpu_to_le32(cpos);
4685 rec.e_blkno = cpu_to_le64(start_blk); 4766 rec.e_blkno = cpu_to_le64(start_blk);
4686 rec.e_leaf_clusters = cpu_to_le16(new_clusters); 4767 rec.e_leaf_clusters = cpu_to_le16(new_clusters);
4687 rec.e_flags = flags; 4768 rec.e_flags = flags;
4688 status = ocfs2_et_insert_check(inode, et, &rec); 4769 status = ocfs2_et_insert_check(et, &rec);
4689 if (status) { 4770 if (status) {
4690 mlog_errno(status); 4771 mlog_errno(status);
4691 goto bail; 4772 goto bail;
4692 } 4773 }
4693 4774
4694 status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec, 4775 status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec,
4695 &free_records, &insert); 4776 &free_records, &insert);
4696 if (status < 0) { 4777 if (status < 0) {
4697 mlog_errno(status); 4778 mlog_errno(status);
@@ -4705,7 +4786,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4705 free_records, insert.ins_tree_depth); 4786 free_records, insert.ins_tree_depth);
4706 4787
4707 if (insert.ins_contig == CONTIG_NONE && free_records == 0) { 4788 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
4708 status = ocfs2_grow_tree(inode, handle, et, 4789 status = ocfs2_grow_tree(handle, et,
4709 &insert.ins_tree_depth, &last_eb_bh, 4790 &insert.ins_tree_depth, &last_eb_bh,
4710 meta_ac); 4791 meta_ac);
4711 if (status) { 4792 if (status) {
@@ -4715,11 +4796,11 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4715 } 4796 }
4716 4797
4717 /* Finally, we can add clusters. This might rotate the tree for us. */ 4798 /* Finally, we can add clusters. This might rotate the tree for us. */
4718 status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert); 4799 status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
4719 if (status < 0) 4800 if (status < 0)
4720 mlog_errno(status); 4801 mlog_errno(status);
4721 else if (et->et_ops == &ocfs2_dinode_et_ops) 4802 else
4722 ocfs2_extent_map_insert_rec(inode, &rec); 4803 ocfs2_et_extent_map_insert(et, &rec);
4723 4804
4724bail: 4805bail:
4725 brelse(last_eb_bh); 4806 brelse(last_eb_bh);
@@ -4735,13 +4816,11 @@ bail:
4735 * it is not limited to the file storage. Any extent tree can use this 4816 * it is not limited to the file storage. Any extent tree can use this
4736 * function if it implements the proper ocfs2_extent_tree. 4817 * function if it implements the proper ocfs2_extent_tree.
4737 */ 4818 */
4738int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, 4819int ocfs2_add_clusters_in_btree(handle_t *handle,
4739 struct inode *inode, 4820 struct ocfs2_extent_tree *et,
4740 u32 *logical_offset, 4821 u32 *logical_offset,
4741 u32 clusters_to_add, 4822 u32 clusters_to_add,
4742 int mark_unwritten, 4823 int mark_unwritten,
4743 struct ocfs2_extent_tree *et,
4744 handle_t *handle,
4745 struct ocfs2_alloc_context *data_ac, 4824 struct ocfs2_alloc_context *data_ac,
4746 struct ocfs2_alloc_context *meta_ac, 4825 struct ocfs2_alloc_context *meta_ac,
4747 enum ocfs2_alloc_restarted *reason_ret) 4826 enum ocfs2_alloc_restarted *reason_ret)
@@ -4752,13 +4831,15 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4752 u32 bit_off, num_bits; 4831 u32 bit_off, num_bits;
4753 u64 block; 4832 u64 block;
4754 u8 flags = 0; 4833 u8 flags = 0;
4834 struct ocfs2_super *osb =
4835 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
4755 4836
4756 BUG_ON(!clusters_to_add); 4837 BUG_ON(!clusters_to_add);
4757 4838
4758 if (mark_unwritten) 4839 if (mark_unwritten)
4759 flags = OCFS2_EXT_UNWRITTEN; 4840 flags = OCFS2_EXT_UNWRITTEN;
4760 4841
4761 free_extents = ocfs2_num_free_extents(osb, inode, et); 4842 free_extents = ocfs2_num_free_extents(osb, et);
4762 if (free_extents < 0) { 4843 if (free_extents < 0) {
4763 status = free_extents; 4844 status = free_extents;
4764 mlog_errno(status); 4845 mlog_errno(status);
@@ -4795,7 +4876,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4795 BUG_ON(num_bits > clusters_to_add); 4876 BUG_ON(num_bits > clusters_to_add);
4796 4877
4797 /* reserve our write early -- insert_extent may update the tree root */ 4878 /* reserve our write early -- insert_extent may update the tree root */
4798 status = ocfs2_et_root_journal_access(handle, inode, et, 4879 status = ocfs2_et_root_journal_access(handle, et,
4799 OCFS2_JOURNAL_ACCESS_WRITE); 4880 OCFS2_JOURNAL_ACCESS_WRITE);
4800 if (status < 0) { 4881 if (status < 0) {
4801 mlog_errno(status); 4882 mlog_errno(status);
@@ -4803,10 +4884,10 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4803 } 4884 }
4804 4885
4805 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 4886 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4806 mlog(0, "Allocating %u clusters at block %u for inode %llu\n", 4887 mlog(0, "Allocating %u clusters at block %u for owner %llu\n",
4807 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4888 num_bits, bit_off,
4808 status = ocfs2_insert_extent(osb, handle, inode, et, 4889 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
4809 *logical_offset, block, 4890 status = ocfs2_insert_extent(handle, et, *logical_offset, block,
4810 num_bits, flags, meta_ac); 4891 num_bits, flags, meta_ac);
4811 if (status < 0) { 4892 if (status < 0) {
4812 mlog_errno(status); 4893 mlog_errno(status);
@@ -4856,10 +4937,9 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
4856 split_rec->e_flags = rec->e_flags; 4937 split_rec->e_flags = rec->e_flags;
4857} 4938}
4858 4939
4859static int ocfs2_split_and_insert(struct inode *inode, 4940static int ocfs2_split_and_insert(handle_t *handle,
4860 handle_t *handle,
4861 struct ocfs2_path *path,
4862 struct ocfs2_extent_tree *et, 4941 struct ocfs2_extent_tree *et,
4942 struct ocfs2_path *path,
4863 struct buffer_head **last_eb_bh, 4943 struct buffer_head **last_eb_bh,
4864 int split_index, 4944 int split_index,
4865 struct ocfs2_extent_rec *orig_split_rec, 4945 struct ocfs2_extent_rec *orig_split_rec,
@@ -4892,7 +4972,7 @@ leftright:
4892 4972
4893 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4973 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4894 le16_to_cpu(rightmost_el->l_count)) { 4974 le16_to_cpu(rightmost_el->l_count)) {
4895 ret = ocfs2_grow_tree(inode, handle, et, 4975 ret = ocfs2_grow_tree(handle, et,
4896 &depth, last_eb_bh, meta_ac); 4976 &depth, last_eb_bh, meta_ac);
4897 if (ret) { 4977 if (ret) {
4898 mlog_errno(ret); 4978 mlog_errno(ret);
@@ -4921,8 +5001,8 @@ leftright:
4921 */ 5001 */
4922 insert.ins_split = SPLIT_RIGHT; 5002 insert.ins_split = SPLIT_RIGHT;
4923 5003
4924 ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range, 5004 ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
4925 &rec); 5005 &tmprec, insert_range, &rec);
4926 5006
4927 split_rec = tmprec; 5007 split_rec = tmprec;
4928 5008
@@ -4930,7 +5010,7 @@ leftright:
4930 do_leftright = 1; 5010 do_leftright = 1;
4931 } 5011 }
4932 5012
4933 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); 5013 ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
4934 if (ret) { 5014 if (ret) {
4935 mlog_errno(ret); 5015 mlog_errno(ret);
4936 goto out; 5016 goto out;
@@ -4946,7 +5026,7 @@ leftright:
4946 ocfs2_reinit_path(path, 1); 5026 ocfs2_reinit_path(path, 1);
4947 5027
4948 cpos = le32_to_cpu(split_rec.e_cpos); 5028 cpos = le32_to_cpu(split_rec.e_cpos);
4949 ret = ocfs2_find_path(inode, path, cpos); 5029 ret = ocfs2_find_path(et->et_ci, path, cpos);
4950 if (ret) { 5030 if (ret) {
4951 mlog_errno(ret); 5031 mlog_errno(ret);
4952 goto out; 5032 goto out;
@@ -4961,8 +5041,8 @@ out:
4961 return ret; 5041 return ret;
4962} 5042}
4963 5043
4964static int ocfs2_replace_extent_rec(struct inode *inode, 5044static int ocfs2_replace_extent_rec(handle_t *handle,
4965 handle_t *handle, 5045 struct ocfs2_extent_tree *et,
4966 struct ocfs2_path *path, 5046 struct ocfs2_path *path,
4967 struct ocfs2_extent_list *el, 5047 struct ocfs2_extent_list *el,
4968 int split_index, 5048 int split_index,
@@ -4970,7 +5050,7 @@ static int ocfs2_replace_extent_rec(struct inode *inode,
4970{ 5050{
4971 int ret; 5051 int ret;
4972 5052
4973 ret = ocfs2_path_bh_journal_access(handle, inode, path, 5053 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
4974 path_num_items(path) - 1); 5054 path_num_items(path) - 1);
4975 if (ret) { 5055 if (ret) {
4976 mlog_errno(ret); 5056 mlog_errno(ret);
@@ -4985,9 +5065,8 @@ out:
4985} 5065}
4986 5066
4987/* 5067/*
4988 * Mark part or all of the extent record at split_index in the leaf 5068 * Split part or all of the extent record at split_index in the leaf
4989 * pointed to by path as written. This removes the unwritten 5069 * pointed to by path. Merge with the contiguous extent record if needed.
4990 * extent flag.
4991 * 5070 *
4992 * Care is taken to handle contiguousness so as to not grow the tree. 5071 * Care is taken to handle contiguousness so as to not grow the tree.
4993 * 5072 *
@@ -5004,14 +5083,13 @@ out:
5004 * have been brought into cache (and pinned via the journal), so the 5083 * have been brought into cache (and pinned via the journal), so the
5005 * extra overhead is not expressed in terms of disk reads. 5084 * extra overhead is not expressed in terms of disk reads.
5006 */ 5085 */
5007static int __ocfs2_mark_extent_written(struct inode *inode, 5086int ocfs2_split_extent(handle_t *handle,
5008 struct ocfs2_extent_tree *et, 5087 struct ocfs2_extent_tree *et,
5009 handle_t *handle, 5088 struct ocfs2_path *path,
5010 struct ocfs2_path *path, 5089 int split_index,
5011 int split_index, 5090 struct ocfs2_extent_rec *split_rec,
5012 struct ocfs2_extent_rec *split_rec, 5091 struct ocfs2_alloc_context *meta_ac,
5013 struct ocfs2_alloc_context *meta_ac, 5092 struct ocfs2_cached_dealloc_ctxt *dealloc)
5014 struct ocfs2_cached_dealloc_ctxt *dealloc)
5015{ 5093{
5016 int ret = 0; 5094 int ret = 0;
5017 struct ocfs2_extent_list *el = path_leaf_el(path); 5095 struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5020,12 +5098,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
5020 struct ocfs2_merge_ctxt ctxt; 5098 struct ocfs2_merge_ctxt ctxt;
5021 struct ocfs2_extent_list *rightmost_el; 5099 struct ocfs2_extent_list *rightmost_el;
5022 5100
5023 if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) {
5024 ret = -EIO;
5025 mlog_errno(ret);
5026 goto out;
5027 }
5028
5029 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || 5101 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
5030 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < 5102 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
5031 (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { 5103 (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
@@ -5034,19 +5106,19 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
5034 goto out; 5106 goto out;
5035 } 5107 }
5036 5108
5037 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, 5109 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(et, path, el,
5038 split_index, 5110 split_index,
5039 split_rec); 5111 split_rec);
5040 5112
5041 /* 5113 /*
5042 * The core merge / split code wants to know how much room is 5114 * The core merge / split code wants to know how much room is
5043 * left in this inodes allocation tree, so we pass the 5115 * left in this allocation tree, so we pass the
5044 * rightmost extent list. 5116 * rightmost extent list.
5045 */ 5117 */
5046 if (path->p_tree_depth) { 5118 if (path->p_tree_depth) {
5047 struct ocfs2_extent_block *eb; 5119 struct ocfs2_extent_block *eb;
5048 5120
5049 ret = ocfs2_read_extent_block(inode, 5121 ret = ocfs2_read_extent_block(et->et_ci,
5050 ocfs2_et_get_last_eb_blk(et), 5122 ocfs2_et_get_last_eb_blk(et),
5051 &last_eb_bh); 5123 &last_eb_bh);
5052 if (ret) { 5124 if (ret) {
@@ -5073,19 +5145,18 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
5073 5145
5074 if (ctxt.c_contig_type == CONTIG_NONE) { 5146 if (ctxt.c_contig_type == CONTIG_NONE) {
5075 if (ctxt.c_split_covers_rec) 5147 if (ctxt.c_split_covers_rec)
5076 ret = ocfs2_replace_extent_rec(inode, handle, 5148 ret = ocfs2_replace_extent_rec(handle, et, path, el,
5077 path, el,
5078 split_index, split_rec); 5149 split_index, split_rec);
5079 else 5150 else
5080 ret = ocfs2_split_and_insert(inode, handle, path, et, 5151 ret = ocfs2_split_and_insert(handle, et, path,
5081 &last_eb_bh, split_index, 5152 &last_eb_bh, split_index,
5082 split_rec, meta_ac); 5153 split_rec, meta_ac);
5083 if (ret) 5154 if (ret)
5084 mlog_errno(ret); 5155 mlog_errno(ret);
5085 } else { 5156 } else {
5086 ret = ocfs2_try_to_merge_extent(inode, handle, path, 5157 ret = ocfs2_try_to_merge_extent(handle, et, path,
5087 split_index, split_rec, 5158 split_index, split_rec,
5088 dealloc, &ctxt, et); 5159 dealloc, &ctxt);
5089 if (ret) 5160 if (ret)
5090 mlog_errno(ret); 5161 mlog_errno(ret);
5091 } 5162 }
@@ -5096,46 +5167,31 @@ out:
5096} 5167}
5097 5168
5098/* 5169/*
5099 * Mark the already-existing extent at cpos as written for len clusters. 5170 * Change the flags of the already-existing extent at cpos for len clusters.
5171 *
5172 * new_flags: the flags we want to set.
5173 * clear_flags: the flags we want to clear.
5174 * phys: the new physical offset we want this new extent starts from.
5100 * 5175 *
5101 * If the existing extent is larger than the request, initiate a 5176 * If the existing extent is larger than the request, initiate a
5102 * split. An attempt will be made at merging with adjacent extents. 5177 * split. An attempt will be made at merging with adjacent extents.
5103 * 5178 *
5104 * The caller is responsible for passing down meta_ac if we'll need it. 5179 * The caller is responsible for passing down meta_ac if we'll need it.
5105 */ 5180 */
5106int ocfs2_mark_extent_written(struct inode *inode, 5181int ocfs2_change_extent_flag(handle_t *handle,
5107 struct ocfs2_extent_tree *et, 5182 struct ocfs2_extent_tree *et,
5108 handle_t *handle, u32 cpos, u32 len, u32 phys, 5183 u32 cpos, u32 len, u32 phys,
5109 struct ocfs2_alloc_context *meta_ac, 5184 struct ocfs2_alloc_context *meta_ac,
5110 struct ocfs2_cached_dealloc_ctxt *dealloc) 5185 struct ocfs2_cached_dealloc_ctxt *dealloc,
5186 int new_flags, int clear_flags)
5111{ 5187{
5112 int ret, index; 5188 int ret, index;
5113 u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); 5189 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
5190 u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys);
5114 struct ocfs2_extent_rec split_rec; 5191 struct ocfs2_extent_rec split_rec;
5115 struct ocfs2_path *left_path = NULL; 5192 struct ocfs2_path *left_path = NULL;
5116 struct ocfs2_extent_list *el; 5193 struct ocfs2_extent_list *el;
5117 5194 struct ocfs2_extent_rec *rec;
5118 mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
5119 inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
5120
5121 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
5122 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
5123 "that are being written to, but the feature bit "
5124 "is not set in the super block.",
5125 (unsigned long long)OCFS2_I(inode)->ip_blkno);
5126 ret = -EROFS;
5127 goto out;
5128 }
5129
5130 /*
5131 * XXX: This should be fixed up so that we just re-insert the
5132 * next extent records.
5133 *
5134 * XXX: This is a hack on the extent tree, maybe it should be
5135 * an op?
5136 */
5137 if (et->et_ops == &ocfs2_dinode_et_ops)
5138 ocfs2_extent_map_trunc(inode, 0);
5139 5195
5140 left_path = ocfs2_new_path_from_et(et); 5196 left_path = ocfs2_new_path_from_et(et);
5141 if (!left_path) { 5197 if (!left_path) {
@@ -5144,7 +5200,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
5144 goto out; 5200 goto out;
5145 } 5201 }
5146 5202
5147 ret = ocfs2_find_path(inode, left_path, cpos); 5203 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
5148 if (ret) { 5204 if (ret) {
5149 mlog_errno(ret); 5205 mlog_errno(ret);
5150 goto out; 5206 goto out;
@@ -5153,34 +5209,102 @@ int ocfs2_mark_extent_written(struct inode *inode,
5153 5209
5154 index = ocfs2_search_extent_list(el, cpos); 5210 index = ocfs2_search_extent_list(el, cpos);
5155 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5211 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
5156 ocfs2_error(inode->i_sb, 5212 ocfs2_error(sb,
5157 "Inode %llu has an extent at cpos %u which can no " 5213 "Owner %llu has an extent at cpos %u which can no "
5158 "longer be found.\n", 5214 "longer be found.\n",
5159 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); 5215 (unsigned long long)
5216 ocfs2_metadata_cache_owner(et->et_ci), cpos);
5160 ret = -EROFS; 5217 ret = -EROFS;
5161 goto out; 5218 goto out;
5162 } 5219 }
5163 5220
5221 ret = -EIO;
5222 rec = &el->l_recs[index];
5223 if (new_flags && (rec->e_flags & new_flags)) {
5224 mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
5225 "extent that already had them",
5226 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5227 new_flags);
5228 goto out;
5229 }
5230
5231 if (clear_flags && !(rec->e_flags & clear_flags)) {
5232 mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
5233 "extent that didn't have them",
5234 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5235 clear_flags);
5236 goto out;
5237 }
5238
5164 memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); 5239 memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
5165 split_rec.e_cpos = cpu_to_le32(cpos); 5240 split_rec.e_cpos = cpu_to_le32(cpos);
5166 split_rec.e_leaf_clusters = cpu_to_le16(len); 5241 split_rec.e_leaf_clusters = cpu_to_le16(len);
5167 split_rec.e_blkno = cpu_to_le64(start_blkno); 5242 split_rec.e_blkno = cpu_to_le64(start_blkno);
5168 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; 5243 split_rec.e_flags = rec->e_flags;
5169 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; 5244 if (new_flags)
5170 5245 split_rec.e_flags |= new_flags;
5171 ret = __ocfs2_mark_extent_written(inode, et, handle, left_path, 5246 if (clear_flags)
5172 index, &split_rec, meta_ac, 5247 split_rec.e_flags &= ~clear_flags;
5173 dealloc); 5248
5249 ret = ocfs2_split_extent(handle, et, left_path,
5250 index, &split_rec, meta_ac,
5251 dealloc);
5174 if (ret) 5252 if (ret)
5175 mlog_errno(ret); 5253 mlog_errno(ret);
5176 5254
5177out: 5255out:
5178 ocfs2_free_path(left_path); 5256 ocfs2_free_path(left_path);
5179 return ret; 5257 return ret;
5258
5180} 5259}
5181 5260
5182static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, 5261/*
5183 handle_t *handle, struct ocfs2_path *path, 5262 * Mark the already-existing extent at cpos as written for len clusters.
5263 * This removes the unwritten extent flag.
5264 *
5265 * If the existing extent is larger than the request, initiate a
5266 * split. An attempt will be made at merging with adjacent extents.
5267 *
5268 * The caller is responsible for passing down meta_ac if we'll need it.
5269 */
5270int ocfs2_mark_extent_written(struct inode *inode,
5271 struct ocfs2_extent_tree *et,
5272 handle_t *handle, u32 cpos, u32 len, u32 phys,
5273 struct ocfs2_alloc_context *meta_ac,
5274 struct ocfs2_cached_dealloc_ctxt *dealloc)
5275{
5276 int ret;
5277
5278 mlog(0, "Inode %lu cpos %u, len %u, phys clusters %u\n",
5279 inode->i_ino, cpos, len, phys);
5280
5281 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
5282 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
5283 "that are being written to, but the feature bit "
5284 "is not set in the super block.",
5285 (unsigned long long)OCFS2_I(inode)->ip_blkno);
5286 ret = -EROFS;
5287 goto out;
5288 }
5289
5290 /*
5291 * XXX: This should be fixed up so that we just re-insert the
5292 * next extent records.
5293 */
5294 ocfs2_et_extent_map_truncate(et, 0);
5295
5296 ret = ocfs2_change_extent_flag(handle, et, cpos,
5297 len, phys, meta_ac, dealloc,
5298 0, OCFS2_EXT_UNWRITTEN);
5299 if (ret)
5300 mlog_errno(ret);
5301
5302out:
5303 return ret;
5304}
5305
5306static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5307 struct ocfs2_path *path,
5184 int index, u32 new_range, 5308 int index, u32 new_range,
5185 struct ocfs2_alloc_context *meta_ac) 5309 struct ocfs2_alloc_context *meta_ac)
5186{ 5310{
@@ -5197,11 +5321,12 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
5197 */ 5321 */
5198 el = path_leaf_el(path); 5322 el = path_leaf_el(path);
5199 rec = &el->l_recs[index]; 5323 rec = &el->l_recs[index];
5200 ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec); 5324 ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
5325 &split_rec, new_range, rec);
5201 5326
5202 depth = path->p_tree_depth; 5327 depth = path->p_tree_depth;
5203 if (depth > 0) { 5328 if (depth > 0) {
5204 ret = ocfs2_read_extent_block(inode, 5329 ret = ocfs2_read_extent_block(et->et_ci,
5205 ocfs2_et_get_last_eb_blk(et), 5330 ocfs2_et_get_last_eb_blk(et),
5206 &last_eb_bh); 5331 &last_eb_bh);
5207 if (ret < 0) { 5332 if (ret < 0) {
@@ -5224,7 +5349,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
5224 5349
5225 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 5350 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
5226 le16_to_cpu(rightmost_el->l_count)) { 5351 le16_to_cpu(rightmost_el->l_count)) {
5227 ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh, 5352 ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh,
5228 meta_ac); 5353 meta_ac);
5229 if (ret) { 5354 if (ret) {
5230 mlog_errno(ret); 5355 mlog_errno(ret);
@@ -5238,7 +5363,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
5238 insert.ins_split = SPLIT_RIGHT; 5363 insert.ins_split = SPLIT_RIGHT;
5239 insert.ins_tree_depth = depth; 5364 insert.ins_tree_depth = depth;
5240 5365
5241 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); 5366 ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
5242 if (ret) 5367 if (ret)
5243 mlog_errno(ret); 5368 mlog_errno(ret);
5244 5369
@@ -5247,23 +5372,23 @@ out:
5247 return ret; 5372 return ret;
5248} 5373}
5249 5374
5250static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, 5375static int ocfs2_truncate_rec(handle_t *handle,
5376 struct ocfs2_extent_tree *et,
5251 struct ocfs2_path *path, int index, 5377 struct ocfs2_path *path, int index,
5252 struct ocfs2_cached_dealloc_ctxt *dealloc, 5378 struct ocfs2_cached_dealloc_ctxt *dealloc,
5253 u32 cpos, u32 len, 5379 u32 cpos, u32 len)
5254 struct ocfs2_extent_tree *et)
5255{ 5380{
5256 int ret; 5381 int ret;
5257 u32 left_cpos, rec_range, trunc_range; 5382 u32 left_cpos, rec_range, trunc_range;
5258 int wants_rotate = 0, is_rightmost_tree_rec = 0; 5383 int wants_rotate = 0, is_rightmost_tree_rec = 0;
5259 struct super_block *sb = inode->i_sb; 5384 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
5260 struct ocfs2_path *left_path = NULL; 5385 struct ocfs2_path *left_path = NULL;
5261 struct ocfs2_extent_list *el = path_leaf_el(path); 5386 struct ocfs2_extent_list *el = path_leaf_el(path);
5262 struct ocfs2_extent_rec *rec; 5387 struct ocfs2_extent_rec *rec;
5263 struct ocfs2_extent_block *eb; 5388 struct ocfs2_extent_block *eb;
5264 5389
5265 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { 5390 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
5266 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); 5391 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
5267 if (ret) { 5392 if (ret) {
5268 mlog_errno(ret); 5393 mlog_errno(ret);
5269 goto out; 5394 goto out;
@@ -5295,14 +5420,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5295 * by this leaf and the one to it's left. 5420 * by this leaf and the one to it's left.
5296 * 5421 *
5297 * There are two cases we can skip: 5422 * There are two cases we can skip:
5298 * 1) Path is the leftmost one in our inode tree. 5423 * 1) Path is the leftmost one in our btree.
5299 * 2) The leaf is rightmost and will be empty after 5424 * 2) The leaf is rightmost and will be empty after
5300 * we remove the extent record - the rotate code 5425 * we remove the extent record - the rotate code
5301 * knows how to update the newly formed edge. 5426 * knows how to update the newly formed edge.
5302 */ 5427 */
5303 5428
5304 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, 5429 ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
5305 &left_cpos);
5306 if (ret) { 5430 if (ret) {
5307 mlog_errno(ret); 5431 mlog_errno(ret);
5308 goto out; 5432 goto out;
@@ -5316,7 +5440,8 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5316 goto out; 5440 goto out;
5317 } 5441 }
5318 5442
5319 ret = ocfs2_find_path(inode, left_path, left_cpos); 5443 ret = ocfs2_find_path(et->et_ci, left_path,
5444 left_cpos);
5320 if (ret) { 5445 if (ret) {
5321 mlog_errno(ret); 5446 mlog_errno(ret);
5322 goto out; 5447 goto out;
@@ -5332,13 +5457,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5332 goto out; 5457 goto out;
5333 } 5458 }
5334 5459
5335 ret = ocfs2_journal_access_path(inode, handle, path); 5460 ret = ocfs2_journal_access_path(et->et_ci, handle, path);
5336 if (ret) { 5461 if (ret) {
5337 mlog_errno(ret); 5462 mlog_errno(ret);
5338 goto out; 5463 goto out;
5339 } 5464 }
5340 5465
5341 ret = ocfs2_journal_access_path(inode, handle, left_path); 5466 ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
5342 if (ret) { 5467 if (ret) {
5343 mlog_errno(ret); 5468 mlog_errno(ret);
5344 goto out; 5469 goto out;
@@ -5361,7 +5486,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5361 * be deleted by the rotate code. 5486 * be deleted by the rotate code.
5362 */ 5487 */
5363 rec = &el->l_recs[next_free - 1]; 5488 rec = &el->l_recs[next_free - 1];
5364 ocfs2_adjust_rightmost_records(inode, handle, path, 5489 ocfs2_adjust_rightmost_records(handle, et, path,
5365 rec); 5490 rec);
5366 } 5491 }
5367 } else if (le32_to_cpu(rec->e_cpos) == cpos) { 5492 } else if (le32_to_cpu(rec->e_cpos) == cpos) {
@@ -5373,11 +5498,12 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5373 /* Remove rightmost portion of the record */ 5498 /* Remove rightmost portion of the record */
5374 le16_add_cpu(&rec->e_leaf_clusters, -len); 5499 le16_add_cpu(&rec->e_leaf_clusters, -len);
5375 if (is_rightmost_tree_rec) 5500 if (is_rightmost_tree_rec)
5376 ocfs2_adjust_rightmost_records(inode, handle, path, rec); 5501 ocfs2_adjust_rightmost_records(handle, et, path, rec);
5377 } else { 5502 } else {
5378 /* Caller should have trapped this. */ 5503 /* Caller should have trapped this. */
5379 mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) " 5504 mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) "
5380 "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, 5505 "(%u, %u)\n",
5506 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5381 le32_to_cpu(rec->e_cpos), 5507 le32_to_cpu(rec->e_cpos),
5382 le16_to_cpu(rec->e_leaf_clusters), cpos, len); 5508 le16_to_cpu(rec->e_leaf_clusters), cpos, len);
5383 BUG(); 5509 BUG();
@@ -5386,14 +5512,14 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5386 if (left_path) { 5512 if (left_path) {
5387 int subtree_index; 5513 int subtree_index;
5388 5514
5389 subtree_index = ocfs2_find_subtree_root(inode, left_path, path); 5515 subtree_index = ocfs2_find_subtree_root(et, left_path, path);
5390 ocfs2_complete_edge_insert(inode, handle, left_path, path, 5516 ocfs2_complete_edge_insert(handle, left_path, path,
5391 subtree_index); 5517 subtree_index);
5392 } 5518 }
5393 5519
5394 ocfs2_journal_dirty(handle, path_leaf_bh(path)); 5520 ocfs2_journal_dirty(handle, path_leaf_bh(path));
5395 5521
5396 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); 5522 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
5397 if (ret) { 5523 if (ret) {
5398 mlog_errno(ret); 5524 mlog_errno(ret);
5399 goto out; 5525 goto out;
@@ -5404,9 +5530,9 @@ out:
5404 return ret; 5530 return ret;
5405} 5531}
5406 5532
5407int ocfs2_remove_extent(struct inode *inode, 5533int ocfs2_remove_extent(handle_t *handle,
5408 struct ocfs2_extent_tree *et, 5534 struct ocfs2_extent_tree *et,
5409 u32 cpos, u32 len, handle_t *handle, 5535 u32 cpos, u32 len,
5410 struct ocfs2_alloc_context *meta_ac, 5536 struct ocfs2_alloc_context *meta_ac,
5411 struct ocfs2_cached_dealloc_ctxt *dealloc) 5537 struct ocfs2_cached_dealloc_ctxt *dealloc)
5412{ 5538{
@@ -5416,7 +5542,11 @@ int ocfs2_remove_extent(struct inode *inode,
5416 struct ocfs2_extent_list *el; 5542 struct ocfs2_extent_list *el;
5417 struct ocfs2_path *path = NULL; 5543 struct ocfs2_path *path = NULL;
5418 5544
5419 ocfs2_extent_map_trunc(inode, 0); 5545 /*
5546 * XXX: Why are we truncating to 0 instead of wherever this
5547 * affects us?
5548 */
5549 ocfs2_et_extent_map_truncate(et, 0);
5420 5550
5421 path = ocfs2_new_path_from_et(et); 5551 path = ocfs2_new_path_from_et(et);
5422 if (!path) { 5552 if (!path) {
@@ -5425,7 +5555,7 @@ int ocfs2_remove_extent(struct inode *inode,
5425 goto out; 5555 goto out;
5426 } 5556 }
5427 5557
5428 ret = ocfs2_find_path(inode, path, cpos); 5558 ret = ocfs2_find_path(et->et_ci, path, cpos);
5429 if (ret) { 5559 if (ret) {
5430 mlog_errno(ret); 5560 mlog_errno(ret);
5431 goto out; 5561 goto out;
@@ -5434,10 +5564,11 @@ int ocfs2_remove_extent(struct inode *inode,
5434 el = path_leaf_el(path); 5564 el = path_leaf_el(path);
5435 index = ocfs2_search_extent_list(el, cpos); 5565 index = ocfs2_search_extent_list(el, cpos);
5436 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5566 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
5437 ocfs2_error(inode->i_sb, 5567 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5438 "Inode %llu has an extent at cpos %u which can no " 5568 "Owner %llu has an extent at cpos %u which can no "
5439 "longer be found.\n", 5569 "longer be found.\n",
5440 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); 5570 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5571 cpos);
5441 ret = -EROFS; 5572 ret = -EROFS;
5442 goto out; 5573 goto out;
5443 } 5574 }
@@ -5464,20 +5595,21 @@ int ocfs2_remove_extent(struct inode *inode,
5464 5595
5465 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); 5596 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
5466 5597
5467 mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d " 5598 mlog(0, "Owner %llu, remove (cpos %u, len %u). Existing index %d "
5468 "(cpos %u, len %u)\n", 5599 "(cpos %u, len %u)\n",
5469 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index, 5600 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5601 cpos, len, index,
5470 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); 5602 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
5471 5603
5472 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { 5604 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
5473 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5605 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
5474 cpos, len, et); 5606 cpos, len);
5475 if (ret) { 5607 if (ret) {
5476 mlog_errno(ret); 5608 mlog_errno(ret);
5477 goto out; 5609 goto out;
5478 } 5610 }
5479 } else { 5611 } else {
5480 ret = ocfs2_split_tree(inode, et, handle, path, index, 5612 ret = ocfs2_split_tree(handle, et, path, index,
5481 trunc_range, meta_ac); 5613 trunc_range, meta_ac);
5482 if (ret) { 5614 if (ret) {
5483 mlog_errno(ret); 5615 mlog_errno(ret);
@@ -5490,7 +5622,7 @@ int ocfs2_remove_extent(struct inode *inode,
5490 */ 5622 */
5491 ocfs2_reinit_path(path, 1); 5623 ocfs2_reinit_path(path, 1);
5492 5624
5493 ret = ocfs2_find_path(inode, path, cpos); 5625 ret = ocfs2_find_path(et->et_ci, path, cpos);
5494 if (ret) { 5626 if (ret) {
5495 mlog_errno(ret); 5627 mlog_errno(ret);
5496 goto out; 5628 goto out;
@@ -5499,9 +5631,9 @@ int ocfs2_remove_extent(struct inode *inode,
5499 el = path_leaf_el(path); 5631 el = path_leaf_el(path);
5500 index = ocfs2_search_extent_list(el, cpos); 5632 index = ocfs2_search_extent_list(el, cpos);
5501 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5633 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
5502 ocfs2_error(inode->i_sb, 5634 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5503 "Inode %llu: split at cpos %u lost record.", 5635 "Owner %llu: split at cpos %u lost record.",
5504 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5636 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5505 cpos); 5637 cpos);
5506 ret = -EROFS; 5638 ret = -EROFS;
5507 goto out; 5639 goto out;
@@ -5515,18 +5647,18 @@ int ocfs2_remove_extent(struct inode *inode,
5515 rec_range = le32_to_cpu(rec->e_cpos) + 5647 rec_range = le32_to_cpu(rec->e_cpos) +
5516 ocfs2_rec_clusters(el, rec); 5648 ocfs2_rec_clusters(el, rec);
5517 if (rec_range != trunc_range) { 5649 if (rec_range != trunc_range) {
5518 ocfs2_error(inode->i_sb, 5650 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5519 "Inode %llu: error after split at cpos %u" 5651 "Owner %llu: error after split at cpos %u"
5520 "trunc len %u, existing record is (%u,%u)", 5652 "trunc len %u, existing record is (%u,%u)",
5521 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5653 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5522 cpos, len, le32_to_cpu(rec->e_cpos), 5654 cpos, len, le32_to_cpu(rec->e_cpos),
5523 ocfs2_rec_clusters(el, rec)); 5655 ocfs2_rec_clusters(el, rec));
5524 ret = -EROFS; 5656 ret = -EROFS;
5525 goto out; 5657 goto out;
5526 } 5658 }
5527 5659
5528 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5660 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
5529 cpos, len, et); 5661 cpos, len);
5530 if (ret) { 5662 if (ret) {
5531 mlog_errno(ret); 5663 mlog_errno(ret);
5532 goto out; 5664 goto out;
@@ -5573,7 +5705,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
5573 goto out; 5705 goto out;
5574 } 5706 }
5575 5707
5576 ret = ocfs2_et_root_journal_access(handle, inode, et, 5708 ret = ocfs2_et_root_journal_access(handle, et,
5577 OCFS2_JOURNAL_ACCESS_WRITE); 5709 OCFS2_JOURNAL_ACCESS_WRITE);
5578 if (ret) { 5710 if (ret) {
5579 mlog_errno(ret); 5711 mlog_errno(ret);
@@ -5583,14 +5715,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
5583 vfs_dq_free_space_nodirty(inode, 5715 vfs_dq_free_space_nodirty(inode,
5584 ocfs2_clusters_to_bytes(inode->i_sb, len)); 5716 ocfs2_clusters_to_bytes(inode->i_sb, len));
5585 5717
5586 ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac, 5718 ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
5587 dealloc);
5588 if (ret) { 5719 if (ret) {
5589 mlog_errno(ret); 5720 mlog_errno(ret);
5590 goto out_commit; 5721 goto out_commit;
5591 } 5722 }
5592 5723
5593 ocfs2_et_update_clusters(inode, et, -len); 5724 ocfs2_et_update_clusters(et, -len);
5594 5725
5595 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 5726 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
5596 if (ret) { 5727 if (ret) {
@@ -5690,7 +5821,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5690 goto bail; 5821 goto bail;
5691 } 5822 }
5692 5823
5693 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, 5824 status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
5694 OCFS2_JOURNAL_ACCESS_WRITE); 5825 OCFS2_JOURNAL_ACCESS_WRITE);
5695 if (status < 0) { 5826 if (status < 0) {
5696 mlog_errno(status); 5827 mlog_errno(status);
@@ -5752,7 +5883,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5752 while (i >= 0) { 5883 while (i >= 0) {
5753 /* Caller has given us at least enough credits to 5884 /* Caller has given us at least enough credits to
5754 * update the truncate log dinode */ 5885 * update the truncate log dinode */
5755 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, 5886 status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
5756 OCFS2_JOURNAL_ACCESS_WRITE); 5887 OCFS2_JOURNAL_ACCESS_WRITE);
5757 if (status < 0) { 5888 if (status < 0) {
5758 mlog_errno(status); 5889 mlog_errno(status);
@@ -6010,7 +6141,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
6010 tl->tl_used = 0; 6141 tl->tl_used = 0;
6011 6142
6012 ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check); 6143 ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
6013 status = ocfs2_write_block(osb, tl_bh, tl_inode); 6144 status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode));
6014 if (status < 0) { 6145 if (status < 0) {
6015 mlog_errno(status); 6146 mlog_errno(status);
6016 goto bail; 6147 goto bail;
@@ -6400,9 +6531,9 @@ ocfs2_find_per_slot_free_list(int type,
6400 return fl; 6531 return fl;
6401} 6532}
6402 6533
6403static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 6534int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6404 int type, int slot, u64 blkno, 6535 int type, int slot, u64 blkno,
6405 unsigned int bit) 6536 unsigned int bit)
6406{ 6537{
6407 int ret; 6538 int ret;
6408 struct ocfs2_per_slot_free_list *fl; 6539 struct ocfs2_per_slot_free_list *fl;
@@ -6518,7 +6649,7 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6518 goto out; 6649 goto out;
6519 } 6650 }
6520 6651
6521 ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh); 6652 ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
6522 if (ret) { 6653 if (ret) {
6523 mlog_errno(ret); 6654 mlog_errno(ret);
6524 goto out; 6655 goto out;
@@ -6551,7 +6682,7 @@ out:
6551 */ 6682 */
6552static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, 6683static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6553 handle_t *handle, struct ocfs2_truncate_context *tc, 6684 handle_t *handle, struct ocfs2_truncate_context *tc,
6554 u32 clusters_to_del, u64 *delete_start) 6685 u32 clusters_to_del, u64 *delete_start, u8 *flags)
6555{ 6686{
6556 int ret, i, index = path->p_tree_depth; 6687 int ret, i, index = path->p_tree_depth;
6557 u32 new_edge = 0; 6688 u32 new_edge = 0;
@@ -6561,6 +6692,7 @@ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6561 struct ocfs2_extent_rec *rec; 6692 struct ocfs2_extent_rec *rec;
6562 6693
6563 *delete_start = 0; 6694 *delete_start = 0;
6695 *flags = 0;
6564 6696
6565 while (index >= 0) { 6697 while (index >= 0) {
6566 bh = path->p_node[index].bh; 6698 bh = path->p_node[index].bh;
@@ -6648,6 +6780,7 @@ find_tail_record:
6648 *delete_start = le64_to_cpu(rec->e_blkno) 6780 *delete_start = le64_to_cpu(rec->e_blkno)
6649 + ocfs2_clusters_to_blocks(inode->i_sb, 6781 + ocfs2_clusters_to_blocks(inode->i_sb,
6650 le16_to_cpu(rec->e_leaf_clusters)); 6782 le16_to_cpu(rec->e_leaf_clusters));
6783 *flags = rec->e_flags;
6651 6784
6652 /* 6785 /*
6653 * If it's now empty, remove this record. 6786 * If it's now empty, remove this record.
@@ -6719,7 +6852,7 @@ delete:
6719 6852
6720 mlog(0, "deleting this extent block.\n"); 6853 mlog(0, "deleting this extent block.\n");
6721 6854
6722 ocfs2_remove_from_cache(inode, bh); 6855 ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
6723 6856
6724 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); 6857 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
6725 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); 6858 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
@@ -6747,7 +6880,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6747 struct buffer_head *fe_bh, 6880 struct buffer_head *fe_bh,
6748 handle_t *handle, 6881 handle_t *handle,
6749 struct ocfs2_truncate_context *tc, 6882 struct ocfs2_truncate_context *tc,
6750 struct ocfs2_path *path) 6883 struct ocfs2_path *path,
6884 struct ocfs2_alloc_context *meta_ac)
6751{ 6885{
6752 int status; 6886 int status;
6753 struct ocfs2_dinode *fe; 6887 struct ocfs2_dinode *fe;
@@ -6755,6 +6889,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6755 struct ocfs2_extent_list *el; 6889 struct ocfs2_extent_list *el;
6756 struct buffer_head *last_eb_bh = NULL; 6890 struct buffer_head *last_eb_bh = NULL;
6757 u64 delete_blk = 0; 6891 u64 delete_blk = 0;
6892 u8 rec_flags;
6758 6893
6759 fe = (struct ocfs2_dinode *) fe_bh->b_data; 6894 fe = (struct ocfs2_dinode *) fe_bh->b_data;
6760 6895
@@ -6769,14 +6904,14 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6769 * Each component will be touched, so we might as well journal 6904 * Each component will be touched, so we might as well journal
6770 * here to avoid having to handle errors later. 6905 * here to avoid having to handle errors later.
6771 */ 6906 */
6772 status = ocfs2_journal_access_path(inode, handle, path); 6907 status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
6773 if (status < 0) { 6908 if (status < 0) {
6774 mlog_errno(status); 6909 mlog_errno(status);
6775 goto bail; 6910 goto bail;
6776 } 6911 }
6777 6912
6778 if (last_eb_bh) { 6913 if (last_eb_bh) {
6779 status = ocfs2_journal_access_eb(handle, inode, last_eb_bh, 6914 status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
6780 OCFS2_JOURNAL_ACCESS_WRITE); 6915 OCFS2_JOURNAL_ACCESS_WRITE);
6781 if (status < 0) { 6916 if (status < 0) {
6782 mlog_errno(status); 6917 mlog_errno(status);
@@ -6810,7 +6945,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6810 inode->i_blocks = ocfs2_inode_sector_count(inode); 6945 inode->i_blocks = ocfs2_inode_sector_count(inode);
6811 6946
6812 status = ocfs2_trim_tree(inode, path, handle, tc, 6947 status = ocfs2_trim_tree(inode, path, handle, tc,
6813 clusters_to_del, &delete_blk); 6948 clusters_to_del, &delete_blk, &rec_flags);
6814 if (status) { 6949 if (status) {
6815 mlog_errno(status); 6950 mlog_errno(status);
6816 goto bail; 6951 goto bail;
@@ -6842,8 +6977,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6842 } 6977 }
6843 6978
6844 if (delete_blk) { 6979 if (delete_blk) {
6845 status = ocfs2_truncate_log_append(osb, handle, delete_blk, 6980 if (rec_flags & OCFS2_EXT_REFCOUNTED)
6846 clusters_to_del); 6981 status = ocfs2_decrease_refcount(inode, handle,
6982 ocfs2_blocks_to_clusters(osb->sb,
6983 delete_blk),
6984 clusters_to_del, meta_ac,
6985 &tc->tc_dealloc, 1);
6986 else
6987 status = ocfs2_truncate_log_append(osb, handle,
6988 delete_blk,
6989 clusters_to_del);
6847 if (status < 0) { 6990 if (status < 0) {
6848 mlog_errno(status); 6991 mlog_errno(status);
6849 goto bail; 6992 goto bail;
@@ -6863,9 +7006,9 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
6863 return 0; 7006 return 0;
6864} 7007}
6865 7008
6866static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, 7009void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6867 unsigned int from, unsigned int to, 7010 unsigned int from, unsigned int to,
6868 struct page *page, int zero, u64 *phys) 7011 struct page *page, int zero, u64 *phys)
6869{ 7012{
6870 int ret, partial = 0; 7013 int ret, partial = 0;
6871 7014
@@ -6933,20 +7076,16 @@ out:
6933 ocfs2_unlock_and_free_pages(pages, numpages); 7076 ocfs2_unlock_and_free_pages(pages, numpages);
6934} 7077}
6935 7078
6936static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, 7079int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
6937 struct page **pages, int *num) 7080 struct page **pages, int *num)
6938{ 7081{
6939 int numpages, ret = 0; 7082 int numpages, ret = 0;
6940 struct super_block *sb = inode->i_sb;
6941 struct address_space *mapping = inode->i_mapping; 7083 struct address_space *mapping = inode->i_mapping;
6942 unsigned long index; 7084 unsigned long index;
6943 loff_t last_page_bytes; 7085 loff_t last_page_bytes;
6944 7086
6945 BUG_ON(start > end); 7087 BUG_ON(start > end);
6946 7088
6947 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
6948 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
6949
6950 numpages = 0; 7089 numpages = 0;
6951 last_page_bytes = PAGE_ALIGN(end); 7090 last_page_bytes = PAGE_ALIGN(end);
6952 index = start >> PAGE_CACHE_SHIFT; 7091 index = start >> PAGE_CACHE_SHIFT;
@@ -6974,6 +7113,17 @@ out:
6974 return ret; 7113 return ret;
6975} 7114}
6976 7115
7116static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
7117 struct page **pages, int *num)
7118{
7119 struct super_block *sb = inode->i_sb;
7120
7121 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
7122 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
7123
7124 return ocfs2_grab_pages(inode, start, end, pages, num);
7125}
7126
6977/* 7127/*
6978 * Zero the area past i_size but still within an allocated 7128 * Zero the area past i_size but still within an allocated
6979 * cluster. This avoids exposing nonzero data on subsequent file 7129 * cluster. This avoids exposing nonzero data on subsequent file
@@ -7138,7 +7288,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7138 goto out_unlock; 7288 goto out_unlock;
7139 } 7289 }
7140 7290
7141 ret = ocfs2_journal_access_di(handle, inode, di_bh, 7291 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
7142 OCFS2_JOURNAL_ACCESS_WRITE); 7292 OCFS2_JOURNAL_ACCESS_WRITE);
7143 if (ret) { 7293 if (ret) {
7144 mlog_errno(ret); 7294 mlog_errno(ret);
@@ -7218,9 +7368,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7218 * this proves to be false, we could always re-build 7368 * this proves to be false, we could always re-build
7219 * the in-inode data from our pages. 7369 * the in-inode data from our pages.
7220 */ 7370 */
7221 ocfs2_init_dinode_extent_tree(&et, inode, di_bh); 7371 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7222 ret = ocfs2_insert_extent(osb, handle, inode, &et, 7372 ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
7223 0, block, 1, 0, NULL);
7224 if (ret) { 7373 if (ret) {
7225 mlog_errno(ret); 7374 mlog_errno(ret);
7226 goto out_commit; 7375 goto out_commit;
@@ -7262,11 +7411,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
7262{ 7411{
7263 int status, i, credits, tl_sem = 0; 7412 int status, i, credits, tl_sem = 0;
7264 u32 clusters_to_del, new_highest_cpos, range; 7413 u32 clusters_to_del, new_highest_cpos, range;
7414 u64 blkno = 0;
7265 struct ocfs2_extent_list *el; 7415 struct ocfs2_extent_list *el;
7266 handle_t *handle = NULL; 7416 handle_t *handle = NULL;
7267 struct inode *tl_inode = osb->osb_tl_inode; 7417 struct inode *tl_inode = osb->osb_tl_inode;
7268 struct ocfs2_path *path = NULL; 7418 struct ocfs2_path *path = NULL;
7269 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 7419 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
7420 struct ocfs2_alloc_context *meta_ac = NULL;
7421 struct ocfs2_refcount_tree *ref_tree = NULL;
7270 7422
7271 mlog_entry_void(); 7423 mlog_entry_void();
7272 7424
@@ -7292,10 +7444,12 @@ start:
7292 goto bail; 7444 goto bail;
7293 } 7445 }
7294 7446
7447 credits = 0;
7448
7295 /* 7449 /*
7296 * Truncate always works against the rightmost tree branch. 7450 * Truncate always works against the rightmost tree branch.
7297 */ 7451 */
7298 status = ocfs2_find_path(inode, path, UINT_MAX); 7452 status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX);
7299 if (status) { 7453 if (status) {
7300 mlog_errno(status); 7454 mlog_errno(status);
7301 goto bail; 7455 goto bail;
@@ -7332,10 +7486,15 @@ start:
7332 clusters_to_del = 0; 7486 clusters_to_del = 0;
7333 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 7487 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
7334 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 7488 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
7489 blkno = le64_to_cpu(el->l_recs[i].e_blkno);
7335 } else if (range > new_highest_cpos) { 7490 } else if (range > new_highest_cpos) {
7336 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 7491 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
7337 le32_to_cpu(el->l_recs[i].e_cpos)) - 7492 le32_to_cpu(el->l_recs[i].e_cpos)) -
7338 new_highest_cpos; 7493 new_highest_cpos;
7494 blkno = le64_to_cpu(el->l_recs[i].e_blkno) +
7495 ocfs2_clusters_to_blocks(inode->i_sb,
7496 ocfs2_rec_clusters(el, &el->l_recs[i]) -
7497 clusters_to_del);
7339 } else { 7498 } else {
7340 status = 0; 7499 status = 0;
7341 goto bail; 7500 goto bail;
@@ -7344,6 +7503,29 @@ start:
7344 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 7503 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n",
7345 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); 7504 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
7346 7505
7506 if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7507 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7508 OCFS2_HAS_REFCOUNT_FL));
7509
7510 status = ocfs2_lock_refcount_tree(osb,
7511 le64_to_cpu(di->i_refcount_loc),
7512 1, &ref_tree, NULL);
7513 if (status) {
7514 mlog_errno(status);
7515 goto bail;
7516 }
7517
7518 status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7519 blkno,
7520 clusters_to_del,
7521 &credits,
7522 &meta_ac);
7523 if (status < 0) {
7524 mlog_errno(status);
7525 goto bail;
7526 }
7527 }
7528
7347 mutex_lock(&tl_inode->i_mutex); 7529 mutex_lock(&tl_inode->i_mutex);
7348 tl_sem = 1; 7530 tl_sem = 1;
7349 /* ocfs2_truncate_log_needs_flush guarantees us at least one 7531 /* ocfs2_truncate_log_needs_flush guarantees us at least one
@@ -7357,7 +7539,7 @@ start:
7357 } 7539 }
7358 } 7540 }
7359 7541
7360 credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 7542 credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
7361 (struct ocfs2_dinode *)fe_bh->b_data, 7543 (struct ocfs2_dinode *)fe_bh->b_data,
7362 el); 7544 el);
7363 handle = ocfs2_start_trans(osb, credits); 7545 handle = ocfs2_start_trans(osb, credits);
@@ -7369,7 +7551,7 @@ start:
7369 } 7551 }
7370 7552
7371 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, 7553 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
7372 tc, path); 7554 tc, path, meta_ac);
7373 if (status < 0) { 7555 if (status < 0) {
7374 mlog_errno(status); 7556 mlog_errno(status);
7375 goto bail; 7557 goto bail;
@@ -7383,6 +7565,16 @@ start:
7383 7565
7384 ocfs2_reinit_path(path, 1); 7566 ocfs2_reinit_path(path, 1);
7385 7567
7568 if (meta_ac) {
7569 ocfs2_free_alloc_context(meta_ac);
7570 meta_ac = NULL;
7571 }
7572
7573 if (ref_tree) {
7574 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7575 ref_tree = NULL;
7576 }
7577
7386 /* 7578 /*
7387 * The check above will catch the case where we've truncated 7579 * The check above will catch the case where we've truncated
7388 * away all allocation. 7580 * away all allocation.
@@ -7399,6 +7591,12 @@ bail:
7399 if (handle) 7591 if (handle)
7400 ocfs2_commit_trans(osb, handle); 7592 ocfs2_commit_trans(osb, handle);
7401 7593
7594 if (meta_ac)
7595 ocfs2_free_alloc_context(meta_ac);
7596
7597 if (ref_tree)
7598 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7599
7402 ocfs2_run_deallocs(osb, &tc->tc_dealloc); 7600 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7403 7601
7404 ocfs2_free_path(path); 7602 ocfs2_free_path(path);
@@ -7445,7 +7643,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
7445 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); 7643 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
7446 7644
7447 if (fe->id2.i_list.l_tree_depth) { 7645 if (fe->id2.i_list.l_tree_depth) {
7448 status = ocfs2_read_extent_block(inode, 7646 status = ocfs2_read_extent_block(INODE_CACHE(inode),
7449 le64_to_cpu(fe->i_last_eb_blk), 7647 le64_to_cpu(fe->i_last_eb_blk),
7450 &last_eb_bh); 7648 &last_eb_bh);
7451 if (status < 0) { 7649 if (status < 0) {
@@ -7507,7 +7705,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
7507 goto out; 7705 goto out;
7508 } 7706 }
7509 7707
7510 ret = ocfs2_journal_access_di(handle, inode, di_bh, 7708 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
7511 OCFS2_JOURNAL_ACCESS_WRITE); 7709 OCFS2_JOURNAL_ACCESS_WRITE);
7512 if (ret) { 7710 if (ret) {
7513 mlog_errno(ret); 7711 mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 353254ba29e1..9c122d574464 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -45,7 +45,8 @@
45 * 45 *
46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a 46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree 47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
48 * functions. With metadata ecc, we now call different journal_access 48 * functions. It needs the ocfs2_caching_info structure associated with
49 * I/O on the tree. With metadata ecc, we now call different journal_access
49 * functions for each type of metadata, so it must have the 50 * functions for each type of metadata, so it must have the
50 * root_journal_access function. 51 * root_journal_access function.
51 * ocfs2_extent_tree_operations abstract the normal operations we do for 52 * ocfs2_extent_tree_operations abstract the normal operations we do for
@@ -56,6 +57,7 @@ struct ocfs2_extent_tree {
56 struct ocfs2_extent_tree_operations *et_ops; 57 struct ocfs2_extent_tree_operations *et_ops;
57 struct buffer_head *et_root_bh; 58 struct buffer_head *et_root_bh;
58 struct ocfs2_extent_list *et_root_el; 59 struct ocfs2_extent_list *et_root_el;
60 struct ocfs2_caching_info *et_ci;
59 ocfs2_journal_access_func et_root_journal_access; 61 ocfs2_journal_access_func et_root_journal_access;
60 void *et_object; 62 void *et_object;
61 unsigned int et_max_leaf_clusters; 63 unsigned int et_max_leaf_clusters;
@@ -66,31 +68,32 @@ struct ocfs2_extent_tree {
66 * specified object buffer. 68 * specified object buffer.
67 */ 69 */
68void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, 70void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
69 struct inode *inode, 71 struct ocfs2_caching_info *ci,
70 struct buffer_head *bh); 72 struct buffer_head *bh);
71void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 73void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
72 struct inode *inode, 74 struct ocfs2_caching_info *ci,
73 struct buffer_head *bh); 75 struct buffer_head *bh);
74struct ocfs2_xattr_value_buf; 76struct ocfs2_xattr_value_buf;
75void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 77void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
76 struct inode *inode, 78 struct ocfs2_caching_info *ci,
77 struct ocfs2_xattr_value_buf *vb); 79 struct ocfs2_xattr_value_buf *vb);
78void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, 80void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
79 struct inode *inode, 81 struct ocfs2_caching_info *ci,
80 struct buffer_head *bh); 82 struct buffer_head *bh);
83void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
84 struct ocfs2_caching_info *ci,
85 struct buffer_head *bh);
81 86
82/* 87/*
83 * Read an extent block into *bh. If *bh is NULL, a bh will be 88 * Read an extent block into *bh. If *bh is NULL, a bh will be
84 * allocated. This is a cached read. The extent block will be validated 89 * allocated. This is a cached read. The extent block will be validated
85 * with ocfs2_validate_extent_block(). 90 * with ocfs2_validate_extent_block().
86 */ 91 */
87int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, 92int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
88 struct buffer_head **bh); 93 struct buffer_head **bh);
89 94
90struct ocfs2_alloc_context; 95struct ocfs2_alloc_context;
91int ocfs2_insert_extent(struct ocfs2_super *osb, 96int ocfs2_insert_extent(handle_t *handle,
92 handle_t *handle,
93 struct inode *inode,
94 struct ocfs2_extent_tree *et, 97 struct ocfs2_extent_tree *et,
95 u32 cpos, 98 u32 cpos,
96 u64 start_blk, 99 u64 start_blk,
@@ -103,25 +106,36 @@ enum ocfs2_alloc_restarted {
103 RESTART_TRANS, 106 RESTART_TRANS,
104 RESTART_META 107 RESTART_META
105}; 108};
106int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, 109int ocfs2_add_clusters_in_btree(handle_t *handle,
107 struct inode *inode, 110 struct ocfs2_extent_tree *et,
108 u32 *logical_offset, 111 u32 *logical_offset,
109 u32 clusters_to_add, 112 u32 clusters_to_add,
110 int mark_unwritten, 113 int mark_unwritten,
111 struct ocfs2_extent_tree *et,
112 handle_t *handle,
113 struct ocfs2_alloc_context *data_ac, 114 struct ocfs2_alloc_context *data_ac,
114 struct ocfs2_alloc_context *meta_ac, 115 struct ocfs2_alloc_context *meta_ac,
115 enum ocfs2_alloc_restarted *reason_ret); 116 enum ocfs2_alloc_restarted *reason_ret);
116struct ocfs2_cached_dealloc_ctxt; 117struct ocfs2_cached_dealloc_ctxt;
118struct ocfs2_path;
119int ocfs2_split_extent(handle_t *handle,
120 struct ocfs2_extent_tree *et,
121 struct ocfs2_path *path,
122 int split_index,
123 struct ocfs2_extent_rec *split_rec,
124 struct ocfs2_alloc_context *meta_ac,
125 struct ocfs2_cached_dealloc_ctxt *dealloc);
117int ocfs2_mark_extent_written(struct inode *inode, 126int ocfs2_mark_extent_written(struct inode *inode,
118 struct ocfs2_extent_tree *et, 127 struct ocfs2_extent_tree *et,
119 handle_t *handle, u32 cpos, u32 len, u32 phys, 128 handle_t *handle, u32 cpos, u32 len, u32 phys,
120 struct ocfs2_alloc_context *meta_ac, 129 struct ocfs2_alloc_context *meta_ac,
121 struct ocfs2_cached_dealloc_ctxt *dealloc); 130 struct ocfs2_cached_dealloc_ctxt *dealloc);
122int ocfs2_remove_extent(struct inode *inode, 131int ocfs2_change_extent_flag(handle_t *handle,
123 struct ocfs2_extent_tree *et, 132 struct ocfs2_extent_tree *et,
124 u32 cpos, u32 len, handle_t *handle, 133 u32 cpos, u32 len, u32 phys,
134 struct ocfs2_alloc_context *meta_ac,
135 struct ocfs2_cached_dealloc_ctxt *dealloc,
136 int new_flags, int clear_flags);
137int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
138 u32 cpos, u32 len,
125 struct ocfs2_alloc_context *meta_ac, 139 struct ocfs2_alloc_context *meta_ac,
126 struct ocfs2_cached_dealloc_ctxt *dealloc); 140 struct ocfs2_cached_dealloc_ctxt *dealloc);
127int ocfs2_remove_btree_range(struct inode *inode, 141int ocfs2_remove_btree_range(struct inode *inode,
@@ -130,7 +144,6 @@ int ocfs2_remove_btree_range(struct inode *inode,
130 struct ocfs2_cached_dealloc_ctxt *dealloc); 144 struct ocfs2_cached_dealloc_ctxt *dealloc);
131 145
132int ocfs2_num_free_extents(struct ocfs2_super *osb, 146int ocfs2_num_free_extents(struct ocfs2_super *osb,
133 struct inode *inode,
134 struct ocfs2_extent_tree *et); 147 struct ocfs2_extent_tree *et);
135 148
136/* 149/*
@@ -195,6 +208,9 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
195} 208}
196int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 209int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
197 u64 blkno, unsigned int bit); 210 u64 blkno, unsigned int bit);
211int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
212 int type, int slot, u64 blkno,
213 unsigned int bit);
198static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) 214static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
199{ 215{
200 return c->c_global_allocator != NULL; 216 return c->c_global_allocator != NULL;
@@ -222,8 +238,9 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
222int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
223 unsigned int start, unsigned int end, int trunc); 239 unsigned int start, unsigned int end, int trunc);
224 240
225int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 241int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
226 u32 cpos, struct buffer_head **leaf_bh); 242 struct ocfs2_extent_list *root_el, u32 cpos,
243 struct buffer_head **leaf_bh);
227int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); 244int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
228 245
229/* 246/*
@@ -254,4 +271,50 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
254 return !rec->e_leaf_clusters; 271 return !rec->e_leaf_clusters;
255} 272}
256 273
274int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
275 struct page **pages, int *num);
276void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
277 unsigned int from, unsigned int to,
278 struct page *page, int zero, u64 *phys);
279/*
280 * Structures which describe a path through a btree, and functions to
281 * manipulate them.
282 *
283 * The idea here is to be as generic as possible with the tree
284 * manipulation code.
285 */
286struct ocfs2_path_item {
287 struct buffer_head *bh;
288 struct ocfs2_extent_list *el;
289};
290
291#define OCFS2_MAX_PATH_DEPTH 5
292
293struct ocfs2_path {
294 int p_tree_depth;
295 ocfs2_journal_access_func p_root_access;
296 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
297};
298
299#define path_root_bh(_path) ((_path)->p_node[0].bh)
300#define path_root_el(_path) ((_path)->p_node[0].el)
301#define path_root_access(_path)((_path)->p_root_access)
302#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
303#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
304#define path_num_items(_path) ((_path)->p_tree_depth + 1)
305
306void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root);
307void ocfs2_free_path(struct ocfs2_path *path);
308int ocfs2_find_path(struct ocfs2_caching_info *ci,
309 struct ocfs2_path *path,
310 u32 cpos);
311struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path);
312struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et);
313int ocfs2_path_bh_journal_access(handle_t *handle,
314 struct ocfs2_caching_info *ci,
315 struct ocfs2_path *path,
316 int idx);
317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
318 handle_t *handle,
319 struct ocfs2_path *path);
257#endif /* OCFS2_ALLOC_H */ 320#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8a1e61545f41..72e76062a900 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -44,6 +44,7 @@
44#include "suballoc.h" 44#include "suballoc.h"
45#include "super.h" 45#include "super.h"
46#include "symlink.h" 46#include "symlink.h"
47#include "refcounttree.h"
47 48
48#include "buffer_head_io.h" 49#include "buffer_head_io.h"
49 50
@@ -126,8 +127,8 @@ bail:
126 return err; 127 return err;
127} 128}
128 129
129static int ocfs2_get_block(struct inode *inode, sector_t iblock, 130int ocfs2_get_block(struct inode *inode, sector_t iblock,
130 struct buffer_head *bh_result, int create) 131 struct buffer_head *bh_result, int create)
131{ 132{
132 int err = 0; 133 int err = 0;
133 unsigned int ext_flags; 134 unsigned int ext_flags;
@@ -590,6 +591,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
590 goto bail; 591 goto bail;
591 } 592 }
592 593
594 /* We should already CoW the refcounted extent. */
595 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
593 /* 596 /*
594 * get_more_blocks() expects us to describe a hole by clearing 597 * get_more_blocks() expects us to describe a hole by clearing
595 * the mapped bit on bh_result(). 598 * the mapped bit on bh_result().
@@ -687,6 +690,10 @@ static ssize_t ocfs2_direct_IO(int rw,
687 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 690 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
688 return 0; 691 return 0;
689 692
693 /* Fallback to buffered I/O if we are appending. */
694 if (i_size_read(inode) <= offset)
695 return 0;
696
690 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 697 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
691 inode->i_sb->s_bdev, iov, offset, 698 inode->i_sb->s_bdev, iov, offset,
692 nr_segs, 699 nr_segs,
@@ -1259,7 +1266,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1259 goto out; 1266 goto out;
1260 } 1267 }
1261 } else if (unwritten) { 1268 } else if (unwritten) {
1262 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); 1269 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
1270 wc->w_di_bh);
1263 ret = ocfs2_mark_extent_written(inode, &et, 1271 ret = ocfs2_mark_extent_written(inode, &et,
1264 wc->w_handle, cpos, 1, phys, 1272 wc->w_handle, cpos, 1, phys,
1265 meta_ac, &wc->w_dealloc); 1273 meta_ac, &wc->w_dealloc);
@@ -1448,6 +1456,9 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1448 goto out; 1456 goto out;
1449 } 1457 }
1450 1458
1459 /* We should already CoW the refcountd extent. */
1460 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1461
1451 /* 1462 /*
1452 * Assume worst case - that we're writing in 1463 * Assume worst case - that we're writing in
1453 * the middle of the extent. 1464 * the middle of the extent.
@@ -1528,7 +1539,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
1528 goto out; 1539 goto out;
1529 } 1540 }
1530 1541
1531 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, 1542 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
1532 OCFS2_JOURNAL_ACCESS_WRITE); 1543 OCFS2_JOURNAL_ACCESS_WRITE);
1533 if (ret) { 1544 if (ret) {
1534 ocfs2_commit_trans(osb, handle); 1545 ocfs2_commit_trans(osb, handle);
@@ -1699,6 +1710,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1699 goto out; 1710 goto out;
1700 } 1711 }
1701 1712
1713 ret = ocfs2_check_range_for_refcount(inode, pos, len);
1714 if (ret < 0) {
1715 mlog_errno(ret);
1716 goto out;
1717 } else if (ret == 1) {
1718 ret = ocfs2_refcount_cow(inode, di_bh,
1719 wc->w_cpos, wc->w_clen, UINT_MAX);
1720 if (ret) {
1721 mlog_errno(ret);
1722 goto out;
1723 }
1724 }
1725
1702 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, 1726 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
1703 &extents_to_split); 1727 &extents_to_split);
1704 if (ret) { 1728 if (ret) {
@@ -1726,7 +1750,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1726 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), 1750 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
1727 clusters_to_alloc, extents_to_split); 1751 clusters_to_alloc, extents_to_split);
1728 1752
1729 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); 1753 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
1754 wc->w_di_bh);
1730 ret = ocfs2_lock_allocators(inode, &et, 1755 ret = ocfs2_lock_allocators(inode, &et,
1731 clusters_to_alloc, extents_to_split, 1756 clusters_to_alloc, extents_to_split,
1732 &data_ac, &meta_ac); 1757 &data_ac, &meta_ac);
@@ -1773,7 +1798,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1773 * We don't want this to fail in ocfs2_write_end(), so do it 1798 * We don't want this to fail in ocfs2_write_end(), so do it
1774 * here. 1799 * here.
1775 */ 1800 */
1776 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, 1801 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
1777 OCFS2_JOURNAL_ACCESS_WRITE); 1802 OCFS2_JOURNAL_ACCESS_WRITE);
1778 if (ret) { 1803 if (ret) {
1779 mlog_errno(ret); 1804 mlog_errno(ret);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 503e49232e11..c48e93ffc513 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -57,6 +57,8 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
57 struct buffer_head *di_bh); 57 struct buffer_head *di_bh);
58int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); 58int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
59 59
60int ocfs2_get_block(struct inode *inode, sector_t iblock,
61 struct buffer_head *bh_result, int create);
60/* all ocfs2_dio_end_io()'s fault */ 62/* all ocfs2_dio_end_io()'s fault */
61#define ocfs2_iocb_is_rw_locked(iocb) \ 63#define ocfs2_iocb_is_rw_locked(iocb) \
62 test_bit(0, (unsigned long *)&iocb->private) 64 test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 15c8e6deee2e..d43d34a1dd31 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -52,12 +52,12 @@ enum ocfs2_state_bits {
52BUFFER_FNS(NeedsValidate, needs_validate); 52BUFFER_FNS(NeedsValidate, needs_validate);
53 53
54int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, 54int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
55 struct inode *inode) 55 struct ocfs2_caching_info *ci)
56{ 56{
57 int ret = 0; 57 int ret = 0;
58 58
59 mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n", 59 mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n",
60 (unsigned long long)bh->b_blocknr, inode); 60 (unsigned long long)bh->b_blocknr, ci);
61 61
62 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); 62 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
63 BUG_ON(buffer_jbd(bh)); 63 BUG_ON(buffer_jbd(bh));
@@ -70,7 +70,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
70 goto out; 70 goto out;
71 } 71 }
72 72
73 mutex_lock(&OCFS2_I(inode)->ip_io_mutex); 73 ocfs2_metadata_cache_io_lock(ci);
74 74
75 lock_buffer(bh); 75 lock_buffer(bh);
76 set_buffer_uptodate(bh); 76 set_buffer_uptodate(bh);
@@ -85,7 +85,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
85 wait_on_buffer(bh); 85 wait_on_buffer(bh);
86 86
87 if (buffer_uptodate(bh)) { 87 if (buffer_uptodate(bh)) {
88 ocfs2_set_buffer_uptodate(inode, bh); 88 ocfs2_set_buffer_uptodate(ci, bh);
89 } else { 89 } else {
90 /* We don't need to remove the clustered uptodate 90 /* We don't need to remove the clustered uptodate
91 * information for this bh as it's not marked locally 91 * information for this bh as it's not marked locally
@@ -94,7 +94,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
94 put_bh(bh); 94 put_bh(bh);
95 } 95 }
96 96
97 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 97 ocfs2_metadata_cache_io_unlock(ci);
98out: 98out:
99 mlog_exit(ret); 99 mlog_exit(ret);
100 return ret; 100 return ret;
@@ -177,7 +177,7 @@ bail:
177 return status; 177 return status;
178} 178}
179 179
180int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, 180int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
181 struct buffer_head *bhs[], int flags, 181 struct buffer_head *bhs[], int flags,
182 int (*validate)(struct super_block *sb, 182 int (*validate)(struct super_block *sb,
183 struct buffer_head *bh)) 183 struct buffer_head *bh))
@@ -185,11 +185,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
185 int status = 0; 185 int status = 0;
186 int i, ignore_cache = 0; 186 int i, ignore_cache = 0;
187 struct buffer_head *bh; 187 struct buffer_head *bh;
188 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
188 189
189 mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n", 190 mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n",
190 inode, (unsigned long long)block, nr, flags); 191 ci, (unsigned long long)block, nr, flags);
191 192
192 BUG_ON(!inode); 193 BUG_ON(!ci);
193 BUG_ON((flags & OCFS2_BH_READAHEAD) && 194 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
194 (flags & OCFS2_BH_IGNORE_CACHE)); 195 (flags & OCFS2_BH_IGNORE_CACHE));
195 196
@@ -212,12 +213,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
212 goto bail; 213 goto bail;
213 } 214 }
214 215
215 mutex_lock(&OCFS2_I(inode)->ip_io_mutex); 216 ocfs2_metadata_cache_io_lock(ci);
216 for (i = 0 ; i < nr ; i++) { 217 for (i = 0 ; i < nr ; i++) {
217 if (bhs[i] == NULL) { 218 if (bhs[i] == NULL) {
218 bhs[i] = sb_getblk(inode->i_sb, block++); 219 bhs[i] = sb_getblk(sb, block++);
219 if (bhs[i] == NULL) { 220 if (bhs[i] == NULL) {
220 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 221 ocfs2_metadata_cache_io_unlock(ci);
221 status = -EIO; 222 status = -EIO;
222 mlog_errno(status); 223 mlog_errno(status);
223 goto bail; 224 goto bail;
@@ -250,11 +251,11 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
250 * before our is-it-in-flight check. 251 * before our is-it-in-flight check.
251 */ 252 */
252 253
253 if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) { 254 if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
254 mlog(ML_UPTODATE, 255 mlog(ML_UPTODATE,
255 "bh (%llu), inode %llu not uptodate\n", 256 "bh (%llu), owner %llu not uptodate\n",
256 (unsigned long long)bh->b_blocknr, 257 (unsigned long long)bh->b_blocknr,
257 (unsigned long long)OCFS2_I(inode)->ip_blkno); 258 (unsigned long long)ocfs2_metadata_cache_owner(ci));
258 /* We're using ignore_cache here to say 259 /* We're using ignore_cache here to say
259 * "go to disk" */ 260 * "go to disk" */
260 ignore_cache = 1; 261 ignore_cache = 1;
@@ -283,7 +284,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
283 * previously submitted request than we are 284 * previously submitted request than we are
284 * done here. */ 285 * done here. */
285 if ((flags & OCFS2_BH_READAHEAD) 286 if ((flags & OCFS2_BH_READAHEAD)
286 && ocfs2_buffer_read_ahead(inode, bh)) 287 && ocfs2_buffer_read_ahead(ci, bh))
287 continue; 288 continue;
288 289
289 lock_buffer(bh); 290 lock_buffer(bh);
@@ -305,7 +306,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
305 * buffer lock. */ 306 * buffer lock. */
306 if (!(flags & OCFS2_BH_IGNORE_CACHE) 307 if (!(flags & OCFS2_BH_IGNORE_CACHE)
307 && !(flags & OCFS2_BH_READAHEAD) 308 && !(flags & OCFS2_BH_READAHEAD)
308 && ocfs2_buffer_uptodate(inode, bh)) { 309 && ocfs2_buffer_uptodate(ci, bh)) {
309 unlock_buffer(bh); 310 unlock_buffer(bh);
310 continue; 311 continue;
311 } 312 }
@@ -327,7 +328,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
327 328
328 if (!(flags & OCFS2_BH_READAHEAD)) { 329 if (!(flags & OCFS2_BH_READAHEAD)) {
329 /* We know this can't have changed as we hold the 330 /* We know this can't have changed as we hold the
330 * inode sem. Avoid doing any work on the bh if the 331 * owner sem. Avoid doing any work on the bh if the
331 * journal has it. */ 332 * journal has it. */
332 if (!buffer_jbd(bh)) 333 if (!buffer_jbd(bh))
333 wait_on_buffer(bh); 334 wait_on_buffer(bh);
@@ -351,7 +352,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
351 * that better not have changed */ 352 * that better not have changed */
352 BUG_ON(buffer_jbd(bh)); 353 BUG_ON(buffer_jbd(bh));
353 clear_buffer_needs_validate(bh); 354 clear_buffer_needs_validate(bh);
354 status = validate(inode->i_sb, bh); 355 status = validate(sb, bh);
355 if (status) { 356 if (status) {
356 put_bh(bh); 357 put_bh(bh);
357 bhs[i] = NULL; 358 bhs[i] = NULL;
@@ -363,9 +364,9 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
363 /* Always set the buffer in the cache, even if it was 364 /* Always set the buffer in the cache, even if it was
364 * a forced read, or read-ahead which hasn't yet 365 * a forced read, or read-ahead which hasn't yet
365 * completed. */ 366 * completed. */
366 ocfs2_set_buffer_uptodate(inode, bh); 367 ocfs2_set_buffer_uptodate(ci, bh);
367 } 368 }
368 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 369 ocfs2_metadata_cache_io_unlock(ci);
369 370
370 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 371 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
371 (unsigned long long)block, nr, 372 (unsigned long long)block, nr,
@@ -399,7 +400,7 @@ static void ocfs2_check_super_or_backup(struct super_block *sb,
399 400
400/* 401/*
401 * Write super block and backups doesn't need to collaborate with journal, 402 * Write super block and backups doesn't need to collaborate with journal,
402 * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed 403 * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
403 * into this function. 404 * into this function.
404 */ 405 */
405int ocfs2_write_super_or_backup(struct ocfs2_super *osb, 406int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index c75d682dadd8..b97bcc6dde7c 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -33,7 +33,7 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
33 33
34int ocfs2_write_block(struct ocfs2_super *osb, 34int ocfs2_write_block(struct ocfs2_super *osb,
35 struct buffer_head *bh, 35 struct buffer_head *bh,
36 struct inode *inode); 36 struct ocfs2_caching_info *ci);
37int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, 37int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
38 unsigned int nr, struct buffer_head *bhs[]); 38 unsigned int nr, struct buffer_head *bhs[]);
39 39
@@ -44,7 +44,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
44 * be set even for a READAHEAD call, as it marks the buffer for later 44 * be set even for a READAHEAD call, as it marks the buffer for later
45 * validation. 45 * validation.
46 */ 46 */
47int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, 47int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
48 struct buffer_head *bhs[], int flags, 48 struct buffer_head *bhs[], int flags,
49 int (*validate)(struct super_block *sb, 49 int (*validate)(struct super_block *sb,
50 struct buffer_head *bh)); 50 struct buffer_head *bh));
@@ -55,7 +55,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
55#define OCFS2_BH_IGNORE_CACHE 1 55#define OCFS2_BH_IGNORE_CACHE 1
56#define OCFS2_BH_READAHEAD 8 56#define OCFS2_BH_READAHEAD 8
57 57
58static inline int ocfs2_read_block(struct inode *inode, u64 off, 58static inline int ocfs2_read_block(struct ocfs2_caching_info *ci, u64 off,
59 struct buffer_head **bh, 59 struct buffer_head **bh,
60 int (*validate)(struct super_block *sb, 60 int (*validate)(struct super_block *sb,
61 struct buffer_head *bh)) 61 struct buffer_head *bh))
@@ -68,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
68 goto bail; 68 goto bail;
69 } 69 }
70 70
71 status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate); 71 status = ocfs2_read_blocks(ci, off, 1, bh, 0, validate);
72 72
73bail: 73bail:
74 return status; 74 return status;
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 96df5416993e..1cd2934de615 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -111,6 +111,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
111 define_mask(EXPORT), 111 define_mask(EXPORT),
112 define_mask(XATTR), 112 define_mask(XATTR),
113 define_mask(QUOTA), 113 define_mask(QUOTA),
114 define_mask(REFCOUNT),
114 define_mask(ERROR), 115 define_mask(ERROR),
115 define_mask(NOTICE), 116 define_mask(NOTICE),
116 define_mask(KTHREAD), 117 define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 696c32e50716..9b4d11726cf2 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
113#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ 113#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
116/* bits that are infrequently given and frequently matched in the high word */ 117/* bits that are infrequently given and frequently matched in the high word */
117#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 118#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
118#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 119#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index f8424874fa07..cfb2be708abe 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -163,7 +163,7 @@ static void nst_seq_stop(struct seq_file *seq, void *v)
163{ 163{
164} 164}
165 165
166static struct seq_operations nst_seq_ops = { 166static const struct seq_operations nst_seq_ops = {
167 .start = nst_seq_start, 167 .start = nst_seq_start,
168 .next = nst_seq_next, 168 .next = nst_seq_next,
169 .stop = nst_seq_stop, 169 .stop = nst_seq_stop,
@@ -344,7 +344,7 @@ static void sc_seq_stop(struct seq_file *seq, void *v)
344{ 344{
345} 345}
346 346
347static struct seq_operations sc_seq_ops = { 347static const struct seq_operations sc_seq_ops = {
348 .start = sc_seq_start, 348 .start = sc_seq_start,
349 .next = sc_seq_next, 349 .next = sc_seq_next,
350 .stop = sc_seq_stop, 350 .stop = sc_seq_stop,
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b358f3bf896d..28c3ec238796 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -176,7 +176,7 @@ static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
176 struct ocfs2_dx_root_block *dx_root; 176 struct ocfs2_dx_root_block *dx_root;
177 struct ocfs2_dir_block_trailer *trailer; 177 struct ocfs2_dir_block_trailer *trailer;
178 178
179 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 179 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
180 OCFS2_JOURNAL_ACCESS_WRITE); 180 OCFS2_JOURNAL_ACCESS_WRITE);
181 if (ret) { 181 if (ret) {
182 mlog_errno(ret); 182 mlog_errno(ret);
@@ -564,7 +564,8 @@ static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
564 int ret; 564 int ret;
565 struct buffer_head *tmp = *bh; 565 struct buffer_head *tmp = *bh;
566 566
567 ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block); 567 ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp,
568 ocfs2_validate_dir_block);
568 if (ret) { 569 if (ret) {
569 mlog_errno(ret); 570 mlog_errno(ret);
570 goto out; 571 goto out;
@@ -622,7 +623,8 @@ static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
622 u64 blkno = le64_to_cpu(di->i_dx_root); 623 u64 blkno = le64_to_cpu(di->i_dx_root);
623 struct buffer_head *tmp = *dx_root_bh; 624 struct buffer_head *tmp = *dx_root_bh;
624 625
625 ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root); 626 ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
627 ocfs2_validate_dx_root);
626 628
627 /* If ocfs2_read_block() got us a new bh, pass it up. */ 629 /* If ocfs2_read_block() got us a new bh, pass it up. */
628 if (!ret && !*dx_root_bh) 630 if (!ret && !*dx_root_bh)
@@ -662,7 +664,8 @@ static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
662 int ret; 664 int ret;
663 struct buffer_head *tmp = *dx_leaf_bh; 665 struct buffer_head *tmp = *dx_leaf_bh;
664 666
665 ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf); 667 ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
668 ocfs2_validate_dx_leaf);
666 669
667 /* If ocfs2_read_block() got us a new bh, pass it up. */ 670 /* If ocfs2_read_block() got us a new bh, pass it up. */
668 if (!ret && !*dx_leaf_bh) 671 if (!ret && !*dx_leaf_bh)
@@ -680,7 +683,7 @@ static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
680{ 683{
681 int ret; 684 int ret;
682 685
683 ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0, 686 ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0,
684 ocfs2_validate_dx_leaf); 687 ocfs2_validate_dx_leaf);
685 if (ret) 688 if (ret)
686 mlog_errno(ret); 689 mlog_errno(ret);
@@ -802,7 +805,8 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
802 struct ocfs2_extent_rec *rec = NULL; 805 struct ocfs2_extent_rec *rec = NULL;
803 806
804 if (el->l_tree_depth) { 807 if (el->l_tree_depth) {
805 ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh); 808 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
809 &eb_bh);
806 if (ret) { 810 if (ret) {
807 mlog_errno(ret); 811 mlog_errno(ret);
808 goto out; 812 goto out;
@@ -1133,7 +1137,8 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
1133 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 1137 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1134 access = ocfs2_journal_access_di; 1138 access = ocfs2_journal_access_di;
1135 1139
1136 ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1140 ret = access(handle, INODE_CACHE(dir), de_bh,
1141 OCFS2_JOURNAL_ACCESS_WRITE);
1137 if (ret) { 1142 if (ret) {
1138 mlog_errno(ret); 1143 mlog_errno(ret);
1139 goto out; 1144 goto out;
@@ -1176,7 +1181,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1176 goto bail; 1181 goto bail;
1177 } 1182 }
1178 if (de == de_del) { 1183 if (de == de_del) {
1179 status = access(handle, dir, bh, 1184 status = access(handle, INODE_CACHE(dir), bh,
1180 OCFS2_JOURNAL_ACCESS_WRITE); 1185 OCFS2_JOURNAL_ACCESS_WRITE);
1181 if (status < 0) { 1186 if (status < 0) {
1182 status = -EIO; 1187 status = -EIO;
@@ -1326,7 +1331,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1326 * the entry count needs to be updated. Also, we might be 1331 * the entry count needs to be updated. Also, we might be
1327 * adding to the start of the free list. 1332 * adding to the start of the free list.
1328 */ 1333 */
1329 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 1334 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
1330 OCFS2_JOURNAL_ACCESS_WRITE); 1335 OCFS2_JOURNAL_ACCESS_WRITE);
1331 if (ret) { 1336 if (ret) {
1332 mlog_errno(ret); 1337 mlog_errno(ret);
@@ -1334,7 +1339,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1334 } 1339 }
1335 1340
1336 if (!ocfs2_dx_root_inline(dx_root)) { 1341 if (!ocfs2_dx_root_inline(dx_root)) {
1337 ret = ocfs2_journal_access_dl(handle, dir, 1342 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
1338 lookup->dl_dx_leaf_bh, 1343 lookup->dl_dx_leaf_bh,
1339 OCFS2_JOURNAL_ACCESS_WRITE); 1344 OCFS2_JOURNAL_ACCESS_WRITE);
1340 if (ret) { 1345 if (ret) {
@@ -1493,7 +1498,7 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
1493 int ret; 1498 int ret;
1494 struct ocfs2_dx_leaf *dx_leaf; 1499 struct ocfs2_dx_leaf *dx_leaf;
1495 1500
1496 ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, 1501 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
1497 OCFS2_JOURNAL_ACCESS_WRITE); 1502 OCFS2_JOURNAL_ACCESS_WRITE);
1498 if (ret) { 1503 if (ret) {
1499 mlog_errno(ret); 1504 mlog_errno(ret);
@@ -1523,7 +1528,7 @@ static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
1523 struct ocfs2_dx_root_block *dx_root; 1528 struct ocfs2_dx_root_block *dx_root;
1524 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; 1529 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
1525 1530
1526 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 1531 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
1527 OCFS2_JOURNAL_ACCESS_WRITE); 1532 OCFS2_JOURNAL_ACCESS_WRITE);
1528 if (ret) { 1533 if (ret) {
1529 mlog_errno(ret); 1534 mlog_errno(ret);
@@ -1645,11 +1650,13 @@ int __ocfs2_add_entry(handle_t *handle,
1645 */ 1650 */
1646 if (ocfs2_free_list_at_root(lookup)) { 1651 if (ocfs2_free_list_at_root(lookup)) {
1647 bh = lookup->dl_dx_root_bh; 1652 bh = lookup->dl_dx_root_bh;
1648 retval = ocfs2_journal_access_dr(handle, dir, bh, 1653 retval = ocfs2_journal_access_dr(handle,
1654 INODE_CACHE(dir), bh,
1649 OCFS2_JOURNAL_ACCESS_WRITE); 1655 OCFS2_JOURNAL_ACCESS_WRITE);
1650 } else { 1656 } else {
1651 bh = lookup->dl_prev_leaf_bh; 1657 bh = lookup->dl_prev_leaf_bh;
1652 retval = ocfs2_journal_access_db(handle, dir, bh, 1658 retval = ocfs2_journal_access_db(handle,
1659 INODE_CACHE(dir), bh,
1653 OCFS2_JOURNAL_ACCESS_WRITE); 1660 OCFS2_JOURNAL_ACCESS_WRITE);
1654 } 1661 }
1655 if (retval) { 1662 if (retval) {
@@ -1700,11 +1707,13 @@ int __ocfs2_add_entry(handle_t *handle,
1700 } 1707 }
1701 1708
1702 if (insert_bh == parent_fe_bh) 1709 if (insert_bh == parent_fe_bh)
1703 status = ocfs2_journal_access_di(handle, dir, 1710 status = ocfs2_journal_access_di(handle,
1711 INODE_CACHE(dir),
1704 insert_bh, 1712 insert_bh,
1705 OCFS2_JOURNAL_ACCESS_WRITE); 1713 OCFS2_JOURNAL_ACCESS_WRITE);
1706 else { 1714 else {
1707 status = ocfs2_journal_access_db(handle, dir, 1715 status = ocfs2_journal_access_db(handle,
1716 INODE_CACHE(dir),
1708 insert_bh, 1717 insert_bh,
1709 OCFS2_JOURNAL_ACCESS_WRITE); 1718 OCFS2_JOURNAL_ACCESS_WRITE);
1710 1719
@@ -2280,7 +2289,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2280 struct ocfs2_inline_data *data = &di->id2.i_data; 2289 struct ocfs2_inline_data *data = &di->id2.i_data;
2281 unsigned int size = le16_to_cpu(data->id_count); 2290 unsigned int size = le16_to_cpu(data->id_count);
2282 2291
2283 ret = ocfs2_journal_access_di(handle, inode, di_bh, 2292 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2284 OCFS2_JOURNAL_ACCESS_WRITE); 2293 OCFS2_JOURNAL_ACCESS_WRITE);
2285 if (ret) { 2294 if (ret) {
2286 mlog_errno(ret); 2295 mlog_errno(ret);
@@ -2332,9 +2341,9 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2332 goto bail; 2341 goto bail;
2333 } 2342 }
2334 2343
2335 ocfs2_set_new_buffer_uptodate(inode, new_bh); 2344 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2336 2345
2337 status = ocfs2_journal_access_db(handle, inode, new_bh, 2346 status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh,
2338 OCFS2_JOURNAL_ACCESS_CREATE); 2347 OCFS2_JOURNAL_ACCESS_CREATE);
2339 if (status < 0) { 2348 if (status < 0) {
2340 mlog_errno(status); 2349 mlog_errno(status);
@@ -2418,9 +2427,9 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2418 ret = -EIO; 2427 ret = -EIO;
2419 goto out; 2428 goto out;
2420 } 2429 }
2421 ocfs2_set_new_buffer_uptodate(dir, dx_root_bh); 2430 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
2422 2431
2423 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 2432 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
2424 OCFS2_JOURNAL_ACCESS_CREATE); 2433 OCFS2_JOURNAL_ACCESS_CREATE);
2425 if (ret < 0) { 2434 if (ret < 0) {
2426 mlog_errno(ret); 2435 mlog_errno(ret);
@@ -2454,7 +2463,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2454 if (ret) 2463 if (ret)
2455 mlog_errno(ret); 2464 mlog_errno(ret);
2456 2465
2457 ret = ocfs2_journal_access_di(handle, dir, di_bh, 2466 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
2458 OCFS2_JOURNAL_ACCESS_CREATE); 2467 OCFS2_JOURNAL_ACCESS_CREATE);
2459 if (ret) { 2468 if (ret) {
2460 mlog_errno(ret); 2469 mlog_errno(ret);
@@ -2495,9 +2504,9 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
2495 } 2504 }
2496 dx_leaves[i] = bh; 2505 dx_leaves[i] = bh;
2497 2506
2498 ocfs2_set_new_buffer_uptodate(dir, bh); 2507 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
2499 2508
2500 ret = ocfs2_journal_access_dl(handle, dir, bh, 2509 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh,
2501 OCFS2_JOURNAL_ACCESS_CREATE); 2510 OCFS2_JOURNAL_ACCESS_CREATE);
2502 if (ret < 0) { 2511 if (ret < 0) {
2503 mlog_errno(ret); 2512 mlog_errno(ret);
@@ -2582,7 +2591,6 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
2582{ 2591{
2583 int ret; 2592 int ret;
2584 u64 phys_blkno; 2593 u64 phys_blkno;
2585 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2586 2594
2587 ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves, 2595 ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
2588 num_dx_leaves, &phys_blkno); 2596 num_dx_leaves, &phys_blkno);
@@ -2591,7 +2599,7 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
2591 goto out; 2599 goto out;
2592 } 2600 }
2593 2601
2594 ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0, 2602 ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0,
2595 meta_ac); 2603 meta_ac);
2596 if (ret) 2604 if (ret)
2597 mlog_errno(ret); 2605 mlog_errno(ret);
@@ -2895,7 +2903,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2895 struct ocfs2_extent_tree dx_et; 2903 struct ocfs2_extent_tree dx_et;
2896 int did_quota = 0, bytes_allocated = 0; 2904 int did_quota = 0, bytes_allocated = 0;
2897 2905
2898 ocfs2_init_dinode_extent_tree(&et, dir, di_bh); 2906 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh);
2899 2907
2900 alloc = ocfs2_clusters_for_bytes(sb, bytes); 2908 alloc = ocfs2_clusters_for_bytes(sb, bytes);
2901 dx_alloc = 0; 2909 dx_alloc = 0;
@@ -3005,9 +3013,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3005 goto out_commit; 3013 goto out_commit;
3006 } 3014 }
3007 3015
3008 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); 3016 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
3009 3017
3010 ret = ocfs2_journal_access_db(handle, dir, dirdata_bh, 3018 ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh,
3011 OCFS2_JOURNAL_ACCESS_CREATE); 3019 OCFS2_JOURNAL_ACCESS_CREATE);
3012 if (ret) { 3020 if (ret) {
3013 mlog_errno(ret); 3021 mlog_errno(ret);
@@ -3060,7 +3068,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3060 * We let the later dirent insert modify c/mtime - to the user 3068 * We let the later dirent insert modify c/mtime - to the user
3061 * the data hasn't changed. 3069 * the data hasn't changed.
3062 */ 3070 */
3063 ret = ocfs2_journal_access_di(handle, dir, di_bh, 3071 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
3064 OCFS2_JOURNAL_ACCESS_CREATE); 3072 OCFS2_JOURNAL_ACCESS_CREATE);
3065 if (ret) { 3073 if (ret) {
3066 mlog_errno(ret); 3074 mlog_errno(ret);
@@ -3085,7 +3093,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3085 * This should never fail as our extent list is empty and all 3093 * This should never fail as our extent list is empty and all
3086 * related blocks have been journaled already. 3094 * related blocks have been journaled already.
3087 */ 3095 */
3088 ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len, 3096 ret = ocfs2_insert_extent(handle, &et, 0, blkno, len,
3089 0, NULL); 3097 0, NULL);
3090 if (ret) { 3098 if (ret) {
3091 mlog_errno(ret); 3099 mlog_errno(ret);
@@ -3117,8 +3125,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3117 ocfs2_dx_dir_index_root_block(dir, dx_root_bh, 3125 ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
3118 dirdata_bh); 3126 dirdata_bh);
3119 } else { 3127 } else {
3120 ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh); 3128 ocfs2_init_dx_root_extent_tree(&dx_et,
3121 ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0, 3129 INODE_CACHE(dir),
3130 dx_root_bh);
3131 ret = ocfs2_insert_extent(handle, &dx_et, 0,
3122 dx_insert_blkno, 1, 0, NULL); 3132 dx_insert_blkno, 1, 0, NULL);
3123 if (ret) 3133 if (ret)
3124 mlog_errno(ret); 3134 mlog_errno(ret);
@@ -3138,7 +3148,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3138 } 3148 }
3139 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); 3149 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
3140 3150
3141 ret = ocfs2_insert_extent(osb, handle, dir, &et, 1, 3151 ret = ocfs2_insert_extent(handle, &et, 1,
3142 blkno, len, 0, NULL); 3152 blkno, len, 0, NULL);
3143 if (ret) { 3153 if (ret) {
3144 mlog_errno(ret); 3154 mlog_errno(ret);
@@ -3337,8 +3347,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3337 spin_lock(&OCFS2_I(dir)->ip_lock); 3347 spin_lock(&OCFS2_I(dir)->ip_lock);
3338 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { 3348 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
3339 spin_unlock(&OCFS2_I(dir)->ip_lock); 3349 spin_unlock(&OCFS2_I(dir)->ip_lock);
3340 ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh); 3350 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
3341 num_free_extents = ocfs2_num_free_extents(osb, dir, &et); 3351 parent_fe_bh);
3352 num_free_extents = ocfs2_num_free_extents(osb, &et);
3342 if (num_free_extents < 0) { 3353 if (num_free_extents < 0) {
3343 status = num_free_extents; 3354 status = num_free_extents;
3344 mlog_errno(status); 3355 mlog_errno(status);
@@ -3387,9 +3398,9 @@ do_extend:
3387 goto bail; 3398 goto bail;
3388 } 3399 }
3389 3400
3390 ocfs2_set_new_buffer_uptodate(dir, new_bh); 3401 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
3391 3402
3392 status = ocfs2_journal_access_db(handle, dir, new_bh, 3403 status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh,
3393 OCFS2_JOURNAL_ACCESS_CREATE); 3404 OCFS2_JOURNAL_ACCESS_CREATE);
3394 if (status < 0) { 3405 if (status < 0) {
3395 mlog_errno(status); 3406 mlog_errno(status);
@@ -3829,7 +3840,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3829 (unsigned long long)OCFS2_I(dir)->ip_blkno, 3840 (unsigned long long)OCFS2_I(dir)->ip_blkno,
3830 (unsigned long long)leaf_blkno, insert_hash); 3841 (unsigned long long)leaf_blkno, insert_hash);
3831 3842
3832 ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); 3843 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
3833 3844
3834 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; 3845 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3835 /* 3846 /*
@@ -3885,7 +3896,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3885 } 3896 }
3886 did_quota = 1; 3897 did_quota = 1;
3887 3898
3888 ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, 3899 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
3889 OCFS2_JOURNAL_ACCESS_WRITE); 3900 OCFS2_JOURNAL_ACCESS_WRITE);
3890 if (ret) { 3901 if (ret) {
3891 mlog_errno(ret); 3902 mlog_errno(ret);
@@ -3949,7 +3960,8 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3949 } 3960 }
3950 3961
3951 for (i = 0; i < num_dx_leaves; i++) { 3962 for (i = 0; i < num_dx_leaves; i++) {
3952 ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i], 3963 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3964 orig_dx_leaves[i],
3953 OCFS2_JOURNAL_ACCESS_WRITE); 3965 OCFS2_JOURNAL_ACCESS_WRITE);
3954 if (ret) { 3966 if (ret) {
3955 mlog_errno(ret); 3967 mlog_errno(ret);
@@ -4165,7 +4177,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
4165 * failure to add the dx_root_bh to the journal won't result 4177 * failure to add the dx_root_bh to the journal won't result
4166 * us losing clusters. 4178 * us losing clusters.
4167 */ 4179 */
4168 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 4180 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
4169 OCFS2_JOURNAL_ACCESS_WRITE); 4181 OCFS2_JOURNAL_ACCESS_WRITE);
4170 if (ret) { 4182 if (ret) {
4171 mlog_errno(ret); 4183 mlog_errno(ret);
@@ -4207,9 +4219,8 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
4207 4219
4208 /* This should never fail considering we start with an empty 4220 /* This should never fail considering we start with an empty
4209 * dx_root. */ 4221 * dx_root. */
4210 ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); 4222 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
4211 ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, 4223 ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
4212 insert_blkno, 1, 0, NULL);
4213 if (ret) 4224 if (ret)
4214 mlog_errno(ret); 4225 mlog_errno(ret);
4215 did_quota = 0; 4226 did_quota = 0;
@@ -4469,7 +4480,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4469 goto out_unlock; 4480 goto out_unlock;
4470 } 4481 }
4471 4482
4472 ret = ocfs2_journal_access_di(handle, dir, di_bh, 4483 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
4473 OCFS2_JOURNAL_ACCESS_WRITE); 4484 OCFS2_JOURNAL_ACCESS_WRITE);
4474 if (ret) { 4485 if (ret) {
4475 mlog_errno(ret); 4486 mlog_errno(ret);
@@ -4532,7 +4543,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4532 if (ocfs2_dx_root_inline(dx_root)) 4543 if (ocfs2_dx_root_inline(dx_root))
4533 goto remove_index; 4544 goto remove_index;
4534 4545
4535 ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); 4546 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
4536 4547
4537 /* XXX: What if dr_clusters is too large? */ 4548 /* XXX: What if dr_clusters is too large? */
4538 while (le32_to_cpu(dx_root->dr_clusters)) { 4549 while (le32_to_cpu(dx_root->dr_clusters)) {
@@ -4565,7 +4576,7 @@ remove_index:
4565 goto out; 4576 goto out;
4566 } 4577 }
4567 4578
4568 ocfs2_remove_from_cache(dir, dx_root_bh); 4579 ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh);
4569out: 4580out:
4570 ocfs2_schedule_truncate_log_flush(osb, 1); 4581 ocfs2_schedule_truncate_log_flush(osb, 1);
4571 ocfs2_run_deallocs(osb, &dealloc); 4582 ocfs2_run_deallocs(osb, &dealloc);
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index df52f706f669..c5c88124096d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -683,7 +683,7 @@ static int lockres_seq_show(struct seq_file *s, void *v)
683 return 0; 683 return 0;
684} 684}
685 685
686static struct seq_operations debug_lockres_ops = { 686static const struct seq_operations debug_lockres_ops = {
687 .start = lockres_seq_start, 687 .start = lockres_seq_start,
688 .stop = lockres_seq_stop, 688 .stop = lockres_seq_stop,
689 .next = lockres_seq_next, 689 .next = lockres_seq_next,
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d490b66ad9d7..98569e86c613 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -212,14 +212,18 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
212 spin_lock(&dlm->spinlock); 212 spin_lock(&dlm->spinlock);
213 } 213 }
214 214
215 spin_lock(&res->spinlock);
215 if (!list_empty(&res->purge)) { 216 if (!list_empty(&res->purge)) {
216 mlog(0, "removing lockres %.*s:%p from purgelist, " 217 mlog(0, "removing lockres %.*s:%p from purgelist, "
217 "master = %d\n", res->lockname.len, res->lockname.name, 218 "master = %d\n", res->lockname.len, res->lockname.name,
218 res, master); 219 res, master);
219 list_del_init(&res->purge); 220 list_del_init(&res->purge);
221 spin_unlock(&res->spinlock);
220 dlm_lockres_put(res); 222 dlm_lockres_put(res);
221 dlm->purge_count--; 223 dlm->purge_count--;
222 } 224 } else
225 spin_unlock(&res->spinlock);
226
223 __dlm_unhash_lockres(res); 227 __dlm_unhash_lockres(res);
224 228
225 /* lockres is not in the hash now. drop the flag and wake up 229 /* lockres is not in the hash now. drop the flag and wake up
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 110bb57c46ab..0d38d67194cb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -53,6 +53,7 @@
53#include "super.h" 53#include "super.h"
54#include "uptodate.h" 54#include "uptodate.h"
55#include "quota.h" 55#include "quota.h"
56#include "refcounttree.h"
56 57
57#include "buffer_head_io.h" 58#include "buffer_head_io.h"
58 59
@@ -110,6 +111,11 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110 111
111static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 112static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
112 113
114static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
115 int new_level);
116static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
117 int blocking);
118
113#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 119#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
114 120
115/* This aids in debugging situations where a bad LVB might be involved. */ 121/* This aids in debugging situations where a bad LVB might be involved. */
@@ -278,6 +284,12 @@ static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
278 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 284 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
279}; 285};
280 286
287static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
288 .check_downconvert = ocfs2_check_refcount_downconvert,
289 .downconvert_worker = ocfs2_refcount_convert_worker,
290 .flags = 0,
291};
292
281static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 293static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
282{ 294{
283 return lockres->l_type == OCFS2_LOCK_TYPE_META || 295 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -306,6 +318,12 @@ static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_re
306 return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 318 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
307} 319}
308 320
321static inline struct ocfs2_refcount_tree *
322ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
323{
324 return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
325}
326
309static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 327static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
310{ 328{
311 if (lockres->l_ops->get_osb) 329 if (lockres->l_ops->get_osb)
@@ -693,6 +711,17 @@ void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
693 info); 711 info);
694} 712}
695 713
714void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
715 struct ocfs2_super *osb, u64 ref_blkno,
716 unsigned int generation)
717{
718 ocfs2_lock_res_init_once(lockres);
719 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
720 generation, lockres->l_name);
721 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
722 &ocfs2_refcount_block_lops, osb);
723}
724
696void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 725void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
697{ 726{
698 mlog_entry_void(); 727 mlog_entry_void();
@@ -1548,8 +1577,10 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1548 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1577 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1549 write ? "EXMODE" : "PRMODE"); 1578 write ? "EXMODE" : "PRMODE");
1550 1579
1551 if (ocfs2_mount_local(osb)) 1580 if (ocfs2_mount_local(osb)) {
1581 mlog_exit(0);
1552 return 0; 1582 return 0;
1583 }
1553 1584
1554 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1585 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1555 1586
@@ -2127,7 +2158,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2127 2158
2128 /* This will discard any caching information we might have had 2159 /* This will discard any caching information we might have had
2129 * for the inode metadata. */ 2160 * for the inode metadata. */
2130 ocfs2_metadata_cache_purge(inode); 2161 ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2131 2162
2132 ocfs2_extent_map_trunc(inode, 0); 2163 ocfs2_extent_map_trunc(inode, 0);
2133 2164
@@ -3009,6 +3040,7 @@ static void ocfs2_unlock_ast(void *opaque, int error)
3009 "unlock_action %d\n", error, lockres->l_name, 3040 "unlock_action %d\n", error, lockres->l_name,
3010 lockres->l_unlock_action); 3041 lockres->l_unlock_action);
3011 spin_unlock_irqrestore(&lockres->l_lock, flags); 3042 spin_unlock_irqrestore(&lockres->l_lock, flags);
3043 mlog_exit_void();
3012 return; 3044 return;
3013 } 3045 }
3014 3046
@@ -3495,11 +3527,11 @@ out:
3495 return UNBLOCK_CONTINUE; 3527 return UNBLOCK_CONTINUE;
3496} 3528}
3497 3529
3498static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3530static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3499 int new_level) 3531 struct ocfs2_lock_res *lockres,
3532 int new_level)
3500{ 3533{
3501 struct inode *inode = ocfs2_lock_res_inode(lockres); 3534 int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3502 int checkpointed = ocfs2_inode_fully_checkpointed(inode);
3503 3535
3504 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3536 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3505 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3537 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
@@ -3507,10 +3539,18 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3507 if (checkpointed) 3539 if (checkpointed)
3508 return 1; 3540 return 1;
3509 3541
3510 ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); 3542 ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3511 return 0; 3543 return 0;
3512} 3544}
3513 3545
3546static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3547 int new_level)
3548{
3549 struct inode *inode = ocfs2_lock_res_inode(lockres);
3550
3551 return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3552}
3553
3514static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3554static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3515{ 3555{
3516 struct inode *inode = ocfs2_lock_res_inode(lockres); 3556 struct inode *inode = ocfs2_lock_res_inode(lockres);
@@ -3640,6 +3680,26 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3640 return UNBLOCK_CONTINUE_POST; 3680 return UNBLOCK_CONTINUE_POST;
3641} 3681}
3642 3682
3683static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
3684 int new_level)
3685{
3686 struct ocfs2_refcount_tree *tree =
3687 ocfs2_lock_res_refcount_tree(lockres);
3688
3689 return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
3690}
3691
3692static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
3693 int blocking)
3694{
3695 struct ocfs2_refcount_tree *tree =
3696 ocfs2_lock_res_refcount_tree(lockres);
3697
3698 ocfs2_metadata_cache_purge(&tree->rf_ci);
3699
3700 return UNBLOCK_CONTINUE;
3701}
3702
3643static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 3703static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3644{ 3704{
3645 struct ocfs2_qinfo_lvb *lvb; 3705 struct ocfs2_qinfo_lvb *lvb;
@@ -3752,6 +3812,37 @@ bail:
3752 return status; 3812 return status;
3753} 3813}
3754 3814
3815int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
3816{
3817 int status;
3818 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3819 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3820 struct ocfs2_super *osb = lockres->l_priv;
3821
3822
3823 if (ocfs2_is_hard_readonly(osb))
3824 return -EROFS;
3825
3826 if (ocfs2_mount_local(osb))
3827 return 0;
3828
3829 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3830 if (status < 0)
3831 mlog_errno(status);
3832
3833 return status;
3834}
3835
3836void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
3837{
3838 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3839 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3840 struct ocfs2_super *osb = lockres->l_priv;
3841
3842 if (!ocfs2_mount_local(osb))
3843 ocfs2_cluster_unlock(osb, lockres, level);
3844}
3845
3755/* 3846/*
3756 * This is the filesystem locking protocol. It provides the lock handling 3847 * This is the filesystem locking protocol. It provides the lock handling
3757 * hooks for the underlying DLM. It has a maximum version number. 3848 * hooks for the underlying DLM. It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 7553836931de..d1ce48e1b3d6 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -101,6 +101,9 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
101struct ocfs2_mem_dqinfo; 101struct ocfs2_mem_dqinfo;
102void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 102void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
103 struct ocfs2_mem_dqinfo *info); 103 struct ocfs2_mem_dqinfo *info);
104void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
105 struct ocfs2_super *osb, u64 ref_blkno,
106 unsigned int generation);
104void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 107void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
105int ocfs2_create_new_inode_locks(struct inode *inode); 108int ocfs2_create_new_inode_locks(struct inode *inode);
106int ocfs2_drop_inode_locks(struct inode *inode); 109int ocfs2_drop_inode_locks(struct inode *inode);
@@ -148,6 +151,9 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock);
148void ocfs2_file_unlock(struct file *file); 151void ocfs2_file_unlock(struct file *file);
149int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex); 152int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
150void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex); 153void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
154struct ocfs2_refcount_tree;
155int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex);
156void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
151 157
152 158
153void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 159void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f2bb1a04d253..843db64e9d4a 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
293 struct ocfs2_extent_block *eb; 293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el; 294 struct ocfs2_extent_list *el;
295 295
296 ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); 296 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
297 if (ret) { 297 if (ret) {
298 mlog_errno(ret); 298 mlog_errno(ret);
299 goto out; 299 goto out;
@@ -353,11 +353,11 @@ static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
353 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 353 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
354 * containing el. 354 * containing el.
355 */ 355 */
356static int ocfs2_figure_hole_clusters(struct inode *inode, 356int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
357 struct ocfs2_extent_list *el, 357 struct ocfs2_extent_list *el,
358 struct buffer_head *eb_bh, 358 struct buffer_head *eb_bh,
359 u32 v_cluster, 359 u32 v_cluster,
360 u32 *num_clusters) 360 u32 *num_clusters)
361{ 361{
362 int ret, i; 362 int ret, i;
363 struct buffer_head *next_eb_bh = NULL; 363 struct buffer_head *next_eb_bh = NULL;
@@ -375,7 +375,7 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
375 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 375 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
376 goto no_more_extents; 376 goto no_more_extents;
377 377
378 ret = ocfs2_read_extent_block(inode, 378 ret = ocfs2_read_extent_block(ci,
379 le64_to_cpu(eb->h_next_leaf_blk), 379 le64_to_cpu(eb->h_next_leaf_blk),
380 &next_eb_bh); 380 &next_eb_bh);
381 if (ret) { 381 if (ret) {
@@ -428,7 +428,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
428 tree_height = le16_to_cpu(el->l_tree_depth); 428 tree_height = le16_to_cpu(el->l_tree_depth);
429 429
430 if (tree_height > 0) { 430 if (tree_height > 0) {
431 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); 431 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
432 &eb_bh);
432 if (ret) { 433 if (ret) {
433 mlog_errno(ret); 434 mlog_errno(ret);
434 goto out; 435 goto out;
@@ -455,7 +456,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
455 * field. 456 * field.
456 */ 457 */
457 if (hole_len) { 458 if (hole_len) {
458 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, 459 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
460 el, eb_bh,
459 v_cluster, &len); 461 v_cluster, &len);
460 if (ret) { 462 if (ret) {
461 mlog_errno(ret); 463 mlog_errno(ret);
@@ -539,7 +541,8 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
539 541
540int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 542int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
541 u32 *p_cluster, u32 *num_clusters, 543 u32 *p_cluster, u32 *num_clusters,
542 struct ocfs2_extent_list *el) 544 struct ocfs2_extent_list *el,
545 unsigned int *extent_flags)
543{ 546{
544 int ret = 0, i; 547 int ret = 0, i;
545 struct buffer_head *eb_bh = NULL; 548 struct buffer_head *eb_bh = NULL;
@@ -548,7 +551,8 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
548 u32 coff; 551 u32 coff;
549 552
550 if (el->l_tree_depth) { 553 if (el->l_tree_depth) {
551 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); 554 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
555 &eb_bh);
552 if (ret) { 556 if (ret) {
553 mlog_errno(ret); 557 mlog_errno(ret);
554 goto out; 558 goto out;
@@ -590,6 +594,9 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
590 *p_cluster = *p_cluster + coff; 594 *p_cluster = *p_cluster + coff;
591 if (num_clusters) 595 if (num_clusters)
592 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 596 *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
597
598 if (extent_flags)
599 *extent_flags = rec->e_flags;
593 } 600 }
594out: 601out:
595 if (eb_bh) 602 if (eb_bh)
@@ -862,8 +869,8 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
862 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 869 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
863 } 870 }
864 871
865 rc = ocfs2_read_blocks(inode, p_block, count, bhs + done, 872 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
866 flags, validate); 873 bhs + done, flags, validate);
867 if (rc) { 874 if (rc) {
868 mlog_errno(rc); 875 mlog_errno(rc);
869 break; 876 break;
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index b7dd9731b462..e79d41c2c909 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -55,12 +55,18 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
55 55
56int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 56int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
57 u32 *p_cluster, u32 *num_clusters, 57 u32 *p_cluster, u32 *num_clusters,
58 struct ocfs2_extent_list *el); 58 struct ocfs2_extent_list *el,
59 unsigned int *extent_flags);
59 60
60int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 61int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
61 struct buffer_head *bhs[], int flags, 62 struct buffer_head *bhs[], int flags,
62 int (*validate)(struct super_block *sb, 63 int (*validate)(struct super_block *sb,
63 struct buffer_head *bh)); 64 struct buffer_head *bh));
65int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
66 struct ocfs2_extent_list *el,
67 struct buffer_head *eb_bh,
68 u32 v_cluster,
69 u32 *num_clusters);
64static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block, 70static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
65 struct buffer_head **bh, 71 struct buffer_head **bh,
66 int (*validate)(struct super_block *sb, 72 int (*validate)(struct super_block *sb,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 221c5e98957b..89fc8ee1f5a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -59,6 +59,7 @@
59#include "xattr.h" 59#include "xattr.h"
60#include "acl.h" 60#include "acl.h"
61#include "quota.h" 61#include "quota.h"
62#include "refcounttree.h"
62 63
63#include "buffer_head_io.h" 64#include "buffer_head_io.h"
64 65
@@ -259,7 +260,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
259 goto out; 260 goto out;
260 } 261 }
261 262
262 ret = ocfs2_journal_access_di(handle, inode, bh, 263 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
263 OCFS2_JOURNAL_ACCESS_WRITE); 264 OCFS2_JOURNAL_ACCESS_WRITE);
264 if (ret) { 265 if (ret) {
265 mlog_errno(ret); 266 mlog_errno(ret);
@@ -334,6 +335,39 @@ out:
334 return ret; 335 return ret;
335} 336}
336 337
338static int ocfs2_cow_file_pos(struct inode *inode,
339 struct buffer_head *fe_bh,
340 u64 offset)
341{
342 int status;
343 u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
344 unsigned int num_clusters = 0;
345 unsigned int ext_flags = 0;
346
347 /*
348 * If the new offset is aligned to the range of the cluster, there is
349 * no space for ocfs2_zero_range_for_truncate to fill, so no need to
350 * CoW either.
351 */
352 if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
353 return 0;
354
355 status = ocfs2_get_clusters(inode, cpos, &phys,
356 &num_clusters, &ext_flags);
357 if (status) {
358 mlog_errno(status);
359 goto out;
360 }
361
362 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
363 goto out;
364
365 return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
366
367out:
368 return status;
369}
370
337static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, 371static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
338 struct inode *inode, 372 struct inode *inode,
339 struct buffer_head *fe_bh, 373 struct buffer_head *fe_bh,
@@ -346,6 +380,17 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
346 380
347 mlog_entry_void(); 381 mlog_entry_void();
348 382
383 /*
384 * We need to CoW the cluster contains the offset if it is reflinked
385 * since we will call ocfs2_zero_range_for_truncate later which will
386 * write "0" from offset to the end of the cluster.
387 */
388 status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
389 if (status) {
390 mlog_errno(status);
391 return status;
392 }
393
349 /* TODO: This needs to actually orphan the inode in this 394 /* TODO: This needs to actually orphan the inode in this
350 * transaction. */ 395 * transaction. */
351 396
@@ -356,7 +401,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
356 goto out; 401 goto out;
357 } 402 }
358 403
359 status = ocfs2_journal_access_di(handle, inode, fe_bh, 404 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
360 OCFS2_JOURNAL_ACCESS_WRITE); 405 OCFS2_JOURNAL_ACCESS_WRITE);
361 if (status < 0) { 406 if (status < 0) {
362 mlog_errno(status); 407 mlog_errno(status);
@@ -486,6 +531,8 @@ bail_unlock_sem:
486 up_write(&OCFS2_I(inode)->ip_alloc_sem); 531 up_write(&OCFS2_I(inode)->ip_alloc_sem);
487 532
488bail: 533bail:
534 if (!status && OCFS2_I(inode)->ip_clusters == 0)
535 status = ocfs2_try_remove_refcount_tree(inode, di_bh);
489 536
490 mlog_exit(status); 537 mlog_exit(status);
491 return status; 538 return status;
@@ -515,11 +562,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
515 int ret; 562 int ret;
516 struct ocfs2_extent_tree et; 563 struct ocfs2_extent_tree et;
517 564
518 ocfs2_init_dinode_extent_tree(&et, inode, fe_bh); 565 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
519 ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset, 566 ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
520 clusters_to_add, mark_unwritten, 567 clusters_to_add, mark_unwritten,
521 &et, handle, 568 data_ac, meta_ac, reason_ret);
522 data_ac, meta_ac, reason_ret);
523 569
524 return ret; 570 return ret;
525} 571}
@@ -564,7 +610,7 @@ restart_all:
564 (unsigned long long)OCFS2_I(inode)->ip_blkno, 610 (unsigned long long)OCFS2_I(inode)->ip_blkno,
565 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), 611 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
566 clusters_to_add); 612 clusters_to_add);
567 ocfs2_init_dinode_extent_tree(&et, inode, bh); 613 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
568 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, 614 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
569 &data_ac, &meta_ac); 615 &data_ac, &meta_ac);
570 if (status) { 616 if (status) {
@@ -593,7 +639,7 @@ restarted_transaction:
593 /* reserve a write to the file entry early on - that we if we 639 /* reserve a write to the file entry early on - that we if we
594 * run out of credits in the allocation path, we can still 640 * run out of credits in the allocation path, we can still
595 * update i_size. */ 641 * update i_size. */
596 status = ocfs2_journal_access_di(handle, inode, bh, 642 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
597 OCFS2_JOURNAL_ACCESS_WRITE); 643 OCFS2_JOURNAL_ACCESS_WRITE);
598 if (status < 0) { 644 if (status < 0) {
599 mlog_errno(status); 645 mlog_errno(status);
@@ -1131,7 +1177,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1131 goto out; 1177 goto out;
1132 } 1178 }
1133 1179
1134 ret = ocfs2_journal_access_di(handle, inode, bh, 1180 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1135 OCFS2_JOURNAL_ACCESS_WRITE); 1181 OCFS2_JOURNAL_ACCESS_WRITE);
1136 if (ret < 0) { 1182 if (ret < 0) {
1137 mlog_errno(ret); 1183 mlog_errno(ret);
@@ -1395,7 +1441,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1395 struct address_space *mapping = inode->i_mapping; 1441 struct address_space *mapping = inode->i_mapping;
1396 struct ocfs2_extent_tree et; 1442 struct ocfs2_extent_tree et;
1397 1443
1398 ocfs2_init_dinode_extent_tree(&et, inode, di_bh); 1444 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1399 ocfs2_init_dealloc_ctxt(&dealloc); 1445 ocfs2_init_dealloc_ctxt(&dealloc);
1400 1446
1401 if (byte_len == 0) 1447 if (byte_len == 0)
@@ -1657,6 +1703,70 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
1657 OCFS2_IOC_RESVSP64, &sr, change_size); 1703 OCFS2_IOC_RESVSP64, &sr, change_size);
1658} 1704}
1659 1705
1706int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
1707 size_t count)
1708{
1709 int ret = 0;
1710 unsigned int extent_flags;
1711 u32 cpos, clusters, extent_len, phys_cpos;
1712 struct super_block *sb = inode->i_sb;
1713
1714 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
1715 !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
1716 return 0;
1717
1718 cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
1719 clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
1720
1721 while (clusters) {
1722 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
1723 &extent_flags);
1724 if (ret < 0) {
1725 mlog_errno(ret);
1726 goto out;
1727 }
1728
1729 if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
1730 ret = 1;
1731 break;
1732 }
1733
1734 if (extent_len > clusters)
1735 extent_len = clusters;
1736
1737 clusters -= extent_len;
1738 cpos += extent_len;
1739 }
1740out:
1741 return ret;
1742}
1743
1744static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
1745 loff_t pos, size_t count,
1746 int *meta_level)
1747{
1748 int ret;
1749 struct buffer_head *di_bh = NULL;
1750 u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
1751 u32 clusters =
1752 ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
1753
1754 ret = ocfs2_inode_lock(inode, &di_bh, 1);
1755 if (ret) {
1756 mlog_errno(ret);
1757 goto out;
1758 }
1759
1760 *meta_level = 1;
1761
1762 ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
1763 if (ret)
1764 mlog_errno(ret);
1765out:
1766 brelse(di_bh);
1767 return ret;
1768}
1769
1660static int ocfs2_prepare_inode_for_write(struct dentry *dentry, 1770static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1661 loff_t *ppos, 1771 loff_t *ppos,
1662 size_t count, 1772 size_t count,
@@ -1713,6 +1823,22 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1713 1823
1714 end = saved_pos + count; 1824 end = saved_pos + count;
1715 1825
1826 ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
1827 if (ret == 1) {
1828 ocfs2_inode_unlock(inode, meta_level);
1829 meta_level = -1;
1830
1831 ret = ocfs2_prepare_inode_for_refcount(inode,
1832 saved_pos,
1833 count,
1834 &meta_level);
1835 }
1836
1837 if (ret < 0) {
1838 mlog_errno(ret);
1839 goto out_unlock;
1840 }
1841
1716 /* 1842 /*
1717 * Skip the O_DIRECT checks if we don't need 1843 * Skip the O_DIRECT checks if we don't need
1718 * them. 1844 * them.
@@ -1759,7 +1885,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1759 *ppos = saved_pos; 1885 *ppos = saved_pos;
1760 1886
1761out_unlock: 1887out_unlock:
1762 ocfs2_inode_unlock(inode, meta_level); 1888 if (meta_level >= 0)
1889 ocfs2_inode_unlock(inode, meta_level);
1763 1890
1764out: 1891out:
1765 return ret; 1892 return ret;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 172f9fbc9fc7..d66cf4f7c70e 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -69,4 +69,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
69int ocfs2_change_file_space(struct file *file, unsigned int cmd, 69int ocfs2_change_file_space(struct file *file, unsigned int cmd,
70 struct ocfs2_space_resv *sr); 70 struct ocfs2_space_resv *sr);
71 71
72int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
73 size_t count);
72#endif /* OCFS2_FILE_H */ 74#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4dc8890ba316..0297fb8982b8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -53,6 +53,7 @@
53#include "sysfile.h" 53#include "sysfile.h"
54#include "uptodate.h" 54#include "uptodate.h"
55#include "xattr.h" 55#include "xattr.h"
56#include "refcounttree.h"
56 57
57#include "buffer_head_io.h" 58#include "buffer_head_io.h"
58 59
@@ -562,7 +563,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
562 goto out; 563 goto out;
563 } 564 }
564 565
565 status = ocfs2_journal_access_di(handle, inode, fe_bh, 566 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
567 fe_bh,
566 OCFS2_JOURNAL_ACCESS_WRITE); 568 OCFS2_JOURNAL_ACCESS_WRITE);
567 if (status < 0) { 569 if (status < 0) {
568 mlog_errno(status); 570 mlog_errno(status);
@@ -646,7 +648,7 @@ static int ocfs2_remove_inode(struct inode *inode,
646 } 648 }
647 649
648 /* set the inodes dtime */ 650 /* set the inodes dtime */
649 status = ocfs2_journal_access_di(handle, inode, di_bh, 651 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
650 OCFS2_JOURNAL_ACCESS_WRITE); 652 OCFS2_JOURNAL_ACCESS_WRITE);
651 if (status < 0) { 653 if (status < 0) {
652 mlog_errno(status); 654 mlog_errno(status);
@@ -662,7 +664,7 @@ static int ocfs2_remove_inode(struct inode *inode,
662 goto bail_commit; 664 goto bail_commit;
663 } 665 }
664 666
665 ocfs2_remove_from_cache(inode, di_bh); 667 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
666 vfs_dq_free_inode(inode); 668 vfs_dq_free_inode(inode);
667 669
668 status = ocfs2_free_dinode(handle, inode_alloc_inode, 670 status = ocfs2_free_dinode(handle, inode_alloc_inode,
@@ -781,6 +783,12 @@ static int ocfs2_wipe_inode(struct inode *inode,
781 goto bail_unlock_dir; 783 goto bail_unlock_dir;
782 } 784 }
783 785
786 status = ocfs2_remove_refcount_tree(inode, di_bh);
787 if (status < 0) {
788 mlog_errno(status);
789 goto bail_unlock_dir;
790 }
791
784 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, 792 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
785 orphan_dir_bh); 793 orphan_dir_bh);
786 if (status < 0) 794 if (status < 0)
@@ -1112,13 +1120,14 @@ void ocfs2_clear_inode(struct inode *inode)
1112 ocfs2_lock_res_free(&oi->ip_inode_lockres); 1120 ocfs2_lock_res_free(&oi->ip_inode_lockres);
1113 ocfs2_lock_res_free(&oi->ip_open_lockres); 1121 ocfs2_lock_res_free(&oi->ip_open_lockres);
1114 1122
1115 ocfs2_metadata_cache_purge(inode); 1123 ocfs2_metadata_cache_exit(INODE_CACHE(inode));
1116 1124
1117 mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached, 1125 mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
1118 "Clear inode of %llu, inode has %u cache items\n", 1126 "Clear inode of %llu, inode has %u cache items\n",
1119 (unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached); 1127 (unsigned long long)oi->ip_blkno,
1128 INODE_CACHE(inode)->ci_num_cached);
1120 1129
1121 mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE), 1130 mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE),
1122 "Clear inode of %llu, inode has a bad flag\n", 1131 "Clear inode of %llu, inode has a bad flag\n",
1123 (unsigned long long)oi->ip_blkno); 1132 (unsigned long long)oi->ip_blkno);
1124 1133
@@ -1145,9 +1154,7 @@ void ocfs2_clear_inode(struct inode *inode)
1145 (unsigned long long)oi->ip_blkno, oi->ip_open_count); 1154 (unsigned long long)oi->ip_blkno, oi->ip_open_count);
1146 1155
1147 /* Clear all other flags. */ 1156 /* Clear all other flags. */
1148 oi->ip_flags = OCFS2_INODE_CACHE_INLINE; 1157 oi->ip_flags = 0;
1149 oi->ip_created_trans = 0;
1150 oi->ip_last_trans = 0;
1151 oi->ip_dir_start_lookup = 0; 1158 oi->ip_dir_start_lookup = 0;
1152 oi->ip_blkno = 0ULL; 1159 oi->ip_blkno = 0ULL;
1153 1160
@@ -1239,7 +1246,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1239 mlog_entry("(inode %llu)\n", 1246 mlog_entry("(inode %llu)\n",
1240 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1247 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1241 1248
1242 status = ocfs2_journal_access_di(handle, inode, bh, 1249 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1243 OCFS2_JOURNAL_ACCESS_WRITE); 1250 OCFS2_JOURNAL_ACCESS_WRITE);
1244 if (status < 0) { 1251 if (status < 0) {
1245 mlog_errno(status); 1252 mlog_errno(status);
@@ -1380,8 +1387,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
1380 int rc; 1387 int rc;
1381 struct buffer_head *tmp = *bh; 1388 struct buffer_head *tmp = *bh;
1382 1389
1383 rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, 1390 rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
1384 flags, ocfs2_validate_inode_block); 1391 1, &tmp, flags, ocfs2_validate_inode_block);
1385 1392
1386 /* If ocfs2_read_blocks() got us a new bh, pass it up. */ 1393 /* If ocfs2_read_blocks() got us a new bh, pass it up. */
1387 if (!rc && !*bh) 1394 if (!rc && !*bh)
@@ -1394,3 +1401,56 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
1394{ 1401{
1395 return ocfs2_read_inode_block_full(inode, bh, 0); 1402 return ocfs2_read_inode_block_full(inode, bh, 0);
1396} 1403}
1404
1405
1406static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
1407{
1408 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1409
1410 return oi->ip_blkno;
1411}
1412
1413static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci)
1414{
1415 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1416
1417 return oi->vfs_inode.i_sb;
1418}
1419
1420static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
1421{
1422 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1423
1424 spin_lock(&oi->ip_lock);
1425}
1426
1427static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
1428{
1429 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1430
1431 spin_unlock(&oi->ip_lock);
1432}
1433
1434static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci)
1435{
1436 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1437
1438 mutex_lock(&oi->ip_io_mutex);
1439}
1440
1441static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
1442{
1443 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1444
1445 mutex_unlock(&oi->ip_io_mutex);
1446}
1447
1448const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
1449 .co_owner = ocfs2_inode_cache_owner,
1450 .co_get_super = ocfs2_inode_cache_get_super,
1451 .co_cache_lock = ocfs2_inode_cache_lock,
1452 .co_cache_unlock = ocfs2_inode_cache_unlock,
1453 .co_io_lock = ocfs2_inode_cache_io_lock,
1454 .co_io_unlock = ocfs2_inode_cache_io_unlock,
1455};
1456
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ea71525aad41..ba4fe07b293c 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -60,12 +60,6 @@ struct ocfs2_inode_info
60 60
61 u32 ip_dir_start_lookup; 61 u32 ip_dir_start_lookup;
62 62
63 /* next two are protected by trans_inc_lock */
64 /* which transaction were we created on? Zero if none. */
65 unsigned long ip_created_trans;
66 /* last transaction we were a part of. */
67 unsigned long ip_last_trans;
68
69 struct ocfs2_caching_info ip_metadata_cache; 63 struct ocfs2_caching_info ip_metadata_cache;
70 64
71 struct ocfs2_extent_map ip_extent_map; 65 struct ocfs2_extent_map ip_extent_map;
@@ -106,8 +100,6 @@ struct ocfs2_inode_info
106#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
107/* Does someone have the file open O_DIRECT */ 101/* Does someone have the file open O_DIRECT */
108#define OCFS2_INODE_OPEN_DIRECT 0x00000040 102#define OCFS2_INODE_OPEN_DIRECT 0x00000040
109/* Indicates that the metadata cache should be used as an array. */
110#define OCFS2_INODE_CACHE_INLINE 0x00000080
111 103
112static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) 104static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
113{ 105{
@@ -120,6 +112,12 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
120extern struct kmem_cache *ocfs2_inode_cache; 112extern struct kmem_cache *ocfs2_inode_cache;
121 113
122extern const struct address_space_operations ocfs2_aops; 114extern const struct address_space_operations ocfs2_aops;
115extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops;
116
117static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
118{
119 return &OCFS2_I(inode)->ip_metadata_cache;
120}
123 121
124void ocfs2_clear_inode(struct inode *inode); 122void ocfs2_clear_inode(struct inode *inode);
125void ocfs2_delete_inode(struct inode *inode); 123void ocfs2_delete_inode(struct inode *inode);
@@ -172,4 +170,10 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
172/* The same, but can be passed OCFS2_BH_* flags */ 170/* The same, but can be passed OCFS2_BH_* flags */
173int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, 171int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
174 int flags); 172 int flags);
173
174static inline struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
175{
176 return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
177}
178
175#endif /* OCFS2_INODE_H */ 179#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 467b413bec21..31fbb0619510 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -21,6 +21,7 @@
21#include "ocfs2_fs.h" 21#include "ocfs2_fs.h"
22#include "ioctl.h" 22#include "ioctl.h"
23#include "resize.h" 23#include "resize.h"
24#include "refcounttree.h"
24 25
25#include <linux/ext2_fs.h> 26#include <linux/ext2_fs.h>
26 27
@@ -115,6 +116,9 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
115 int status; 116 int status;
116 struct ocfs2_space_resv sr; 117 struct ocfs2_space_resv sr;
117 struct ocfs2_new_group_input input; 118 struct ocfs2_new_group_input input;
119 struct reflink_arguments args;
120 const char *old_path, *new_path;
121 bool preserve;
118 122
119 switch (cmd) { 123 switch (cmd) {
120 case OCFS2_IOC_GETFLAGS: 124 case OCFS2_IOC_GETFLAGS:
@@ -160,6 +164,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
160 return -EFAULT; 164 return -EFAULT;
161 165
162 return ocfs2_group_add(inode, &input); 166 return ocfs2_group_add(inode, &input);
167 case OCFS2_IOC_REFLINK:
168 if (copy_from_user(&args, (struct reflink_arguments *)arg,
169 sizeof(args)))
170 return -EFAULT;
171 old_path = (const char *)(unsigned long)args.old_path;
172 new_path = (const char *)(unsigned long)args.new_path;
173 preserve = (args.preserve != 0);
174
175 return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
163 default: 176 default:
164 return -ENOTTY; 177 return -ENOTTY;
165 } 178 }
@@ -182,6 +195,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
182 case OCFS2_IOC_GROUP_EXTEND: 195 case OCFS2_IOC_GROUP_EXTEND:
183 case OCFS2_IOC_GROUP_ADD: 196 case OCFS2_IOC_GROUP_ADD:
184 case OCFS2_IOC_GROUP_ADD64: 197 case OCFS2_IOC_GROUP_ADD64:
198 case OCFS2_IOC_REFLINK:
185 break; 199 break;
186 default: 200 default:
187 return -ENOIOCTLCMD; 201 return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c48b93ac6b65..54c16b66327e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -48,6 +48,7 @@
48#include "slot_map.h" 48#include "slot_map.h"
49#include "super.h" 49#include "super.h"
50#include "sysfile.h" 50#include "sysfile.h"
51#include "uptodate.h"
51#include "quota.h" 52#include "quota.h"
52 53
53#include "buffer_head_io.h" 54#include "buffer_head_io.h"
@@ -554,6 +555,14 @@ static struct ocfs2_triggers eb_triggers = {
554 .ot_offset = offsetof(struct ocfs2_extent_block, h_check), 555 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
555}; 556};
556 557
558static struct ocfs2_triggers rb_triggers = {
559 .ot_triggers = {
560 .t_commit = ocfs2_commit_trigger,
561 .t_abort = ocfs2_abort_trigger,
562 },
563 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
564};
565
557static struct ocfs2_triggers gd_triggers = { 566static struct ocfs2_triggers gd_triggers = {
558 .ot_triggers = { 567 .ot_triggers = {
559 .t_commit = ocfs2_commit_trigger, 568 .t_commit = ocfs2_commit_trigger,
@@ -601,14 +610,16 @@ static struct ocfs2_triggers dl_triggers = {
601}; 610};
602 611
603static int __ocfs2_journal_access(handle_t *handle, 612static int __ocfs2_journal_access(handle_t *handle,
604 struct inode *inode, 613 struct ocfs2_caching_info *ci,
605 struct buffer_head *bh, 614 struct buffer_head *bh,
606 struct ocfs2_triggers *triggers, 615 struct ocfs2_triggers *triggers,
607 int type) 616 int type)
608{ 617{
609 int status; 618 int status;
619 struct ocfs2_super *osb =
620 OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
610 621
611 BUG_ON(!inode); 622 BUG_ON(!ci || !ci->ci_ops);
612 BUG_ON(!handle); 623 BUG_ON(!handle);
613 BUG_ON(!bh); 624 BUG_ON(!bh);
614 625
@@ -627,15 +638,15 @@ static int __ocfs2_journal_access(handle_t *handle,
627 BUG(); 638 BUG();
628 } 639 }
629 640
630 /* Set the current transaction information on the inode so 641 /* Set the current transaction information on the ci so
631 * that the locking code knows whether it can drop it's locks 642 * that the locking code knows whether it can drop it's locks
632 * on this inode or not. We're protected from the commit 643 * on this ci or not. We're protected from the commit
633 * thread updating the current transaction id until 644 * thread updating the current transaction id until
634 * ocfs2_commit_trans() because ocfs2_start_trans() took 645 * ocfs2_commit_trans() because ocfs2_start_trans() took
635 * j_trans_barrier for us. */ 646 * j_trans_barrier for us. */
636 ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); 647 ocfs2_set_ci_lock_trans(osb->journal, ci);
637 648
638 mutex_lock(&OCFS2_I(inode)->ip_io_mutex); 649 ocfs2_metadata_cache_io_lock(ci);
639 switch (type) { 650 switch (type) {
640 case OCFS2_JOURNAL_ACCESS_CREATE: 651 case OCFS2_JOURNAL_ACCESS_CREATE:
641 case OCFS2_JOURNAL_ACCESS_WRITE: 652 case OCFS2_JOURNAL_ACCESS_WRITE:
@@ -650,9 +661,9 @@ static int __ocfs2_journal_access(handle_t *handle,
650 status = -EINVAL; 661 status = -EINVAL;
651 mlog(ML_ERROR, "Uknown access type!\n"); 662 mlog(ML_ERROR, "Uknown access type!\n");
652 } 663 }
653 if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers) 664 if (!status && ocfs2_meta_ecc(osb) && triggers)
654 jbd2_journal_set_triggers(bh, &triggers->ot_triggers); 665 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
655 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 666 ocfs2_metadata_cache_io_unlock(ci);
656 667
657 if (status < 0) 668 if (status < 0)
658 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", 669 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
@@ -662,66 +673,65 @@ static int __ocfs2_journal_access(handle_t *handle,
662 return status; 673 return status;
663} 674}
664 675
665int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, 676int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
666 struct buffer_head *bh, int type) 677 struct buffer_head *bh, int type)
667{ 678{
668 return __ocfs2_journal_access(handle, inode, bh, &di_triggers, 679 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
669 type);
670} 680}
671 681
672int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, 682int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
673 struct buffer_head *bh, int type) 683 struct buffer_head *bh, int type)
674{ 684{
675 return __ocfs2_journal_access(handle, inode, bh, &eb_triggers, 685 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
676 type);
677} 686}
678 687
679int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, 688int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
680 struct buffer_head *bh, int type) 689 struct buffer_head *bh, int type)
681{ 690{
682 return __ocfs2_journal_access(handle, inode, bh, &gd_triggers, 691 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
683 type); 692 type);
684} 693}
685 694
686int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, 695int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
687 struct buffer_head *bh, int type) 696 struct buffer_head *bh, int type)
688{ 697{
689 return __ocfs2_journal_access(handle, inode, bh, &db_triggers, 698 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
690 type);
691} 699}
692 700
693int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, 701int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
694 struct buffer_head *bh, int type) 702 struct buffer_head *bh, int type)
695{ 703{
696 return __ocfs2_journal_access(handle, inode, bh, &xb_triggers, 704 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
697 type);
698} 705}
699 706
700int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, 707int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
701 struct buffer_head *bh, int type) 708 struct buffer_head *bh, int type)
702{ 709{
703 return __ocfs2_journal_access(handle, inode, bh, &dq_triggers, 710 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
704 type);
705} 711}
706 712
707int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, 713int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
708 struct buffer_head *bh, int type) 714 struct buffer_head *bh, int type)
709{ 715{
710 return __ocfs2_journal_access(handle, inode, bh, &dr_triggers, 716 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
711 type);
712} 717}
713 718
714int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, 719int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
715 struct buffer_head *bh, int type) 720 struct buffer_head *bh, int type)
716{ 721{
717 return __ocfs2_journal_access(handle, inode, bh, &dl_triggers, 722 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
718 type); 723}
724
725int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
726 struct buffer_head *bh, int type)
727{
728 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
719} 729}
720 730
721int ocfs2_journal_access(handle_t *handle, struct inode *inode, 731int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
722 struct buffer_head *bh, int type) 732 struct buffer_head *bh, int type)
723{ 733{
724 return __ocfs2_journal_access(handle, inode, bh, NULL, type); 734 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
725} 735}
726 736
727int ocfs2_journal_dirty(handle_t *handle, 737int ocfs2_journal_dirty(handle_t *handle,
@@ -898,7 +908,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
898 ocfs2_bump_recovery_generation(fe); 908 ocfs2_bump_recovery_generation(fe);
899 909
900 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 910 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
901 status = ocfs2_write_block(osb, bh, journal->j_inode); 911 status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
902 if (status < 0) 912 if (status < 0)
903 mlog_errno(status); 913 mlog_errno(status);
904 914
@@ -1642,7 +1652,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1642 ocfs2_get_recovery_generation(fe); 1652 ocfs2_get_recovery_generation(fe);
1643 1653
1644 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 1654 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1645 status = ocfs2_write_block(osb, bh, inode); 1655 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
1646 if (status < 0) 1656 if (status < 0)
1647 mlog_errno(status); 1657 mlog_errno(status);
1648 1658
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2c3222aec622..3f74e09b0d80 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -90,56 +90,66 @@ static inline unsigned long ocfs2_inc_trans_id(struct ocfs2_journal *j)
90 return old_id; 90 return old_id;
91} 91}
92 92
93static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal, 93static inline void ocfs2_set_ci_lock_trans(struct ocfs2_journal *journal,
94 struct inode *inode) 94 struct ocfs2_caching_info *ci)
95{ 95{
96 spin_lock(&trans_inc_lock); 96 spin_lock(&trans_inc_lock);
97 OCFS2_I(inode)->ip_last_trans = journal->j_trans_id; 97 ci->ci_last_trans = journal->j_trans_id;
98 spin_unlock(&trans_inc_lock); 98 spin_unlock(&trans_inc_lock);
99} 99}
100 100
101/* Used to figure out whether it's safe to drop a metadata lock on an 101/* Used to figure out whether it's safe to drop a metadata lock on an
102 * inode. Returns true if all the inodes changes have been 102 * cached object. Returns true if all the object's changes have been
103 * checkpointed to disk. You should be holding the spinlock on the 103 * checkpointed to disk. You should be holding the spinlock on the
104 * metadata lock while calling this to be sure that nobody can take 104 * metadata lock while calling this to be sure that nobody can take
105 * the lock and put it on another transaction. */ 105 * the lock and put it on another transaction. */
106static inline int ocfs2_inode_fully_checkpointed(struct inode *inode) 106static inline int ocfs2_ci_fully_checkpointed(struct ocfs2_caching_info *ci)
107{ 107{
108 int ret; 108 int ret;
109 struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal; 109 struct ocfs2_journal *journal =
110 OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
110 111
111 spin_lock(&trans_inc_lock); 112 spin_lock(&trans_inc_lock);
112 ret = time_after(journal->j_trans_id, OCFS2_I(inode)->ip_last_trans); 113 ret = time_after(journal->j_trans_id, ci->ci_last_trans);
113 spin_unlock(&trans_inc_lock); 114 spin_unlock(&trans_inc_lock);
114 return ret; 115 return ret;
115} 116}
116 117
117/* convenience function to check if an inode is still new (has never 118/* convenience function to check if an object backed by struct
118 * hit disk) Will do you a favor and set created_trans = 0 when you've 119 * ocfs2_caching_info is still new (has never hit disk) Will do you a
119 * been checkpointed. returns '1' if the inode is still new. */ 120 * favor and set created_trans = 0 when you've
120static inline int ocfs2_inode_is_new(struct inode *inode) 121 * been checkpointed. returns '1' if the ci is still new. */
122static inline int ocfs2_ci_is_new(struct ocfs2_caching_info *ci)
121{ 123{
122 int ret; 124 int ret;
125 struct ocfs2_journal *journal =
126 OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
123 127
128 spin_lock(&trans_inc_lock);
129 ret = !(time_after(journal->j_trans_id, ci->ci_created_trans));
130 if (!ret)
131 ci->ci_created_trans = 0;
132 spin_unlock(&trans_inc_lock);
133 return ret;
134}
135
136/* Wrapper for inodes so we can check system files */
137static inline int ocfs2_inode_is_new(struct inode *inode)
138{
124 /* System files are never "new" as they're written out by 139 /* System files are never "new" as they're written out by
125 * mkfs. This helps us early during mount, before we have the 140 * mkfs. This helps us early during mount, before we have the
126 * journal open and j_trans_id could be junk. */ 141 * journal open and j_trans_id could be junk. */
127 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) 142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
128 return 0; 143 return 0;
129 spin_lock(&trans_inc_lock); 144
130 ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id, 145 return ocfs2_ci_is_new(INODE_CACHE(inode));
131 OCFS2_I(inode)->ip_created_trans));
132 if (!ret)
133 OCFS2_I(inode)->ip_created_trans = 0;
134 spin_unlock(&trans_inc_lock);
135 return ret;
136} 146}
137 147
138static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, 148static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
139 struct inode *inode) 149 struct ocfs2_caching_info *ci)
140{ 150{
141 spin_lock(&trans_inc_lock); 151 spin_lock(&trans_inc_lock);
142 OCFS2_I(inode)->ip_created_trans = osb->journal->j_trans_id; 152 ci->ci_created_trans = osb->journal->j_trans_id;
143 spin_unlock(&trans_inc_lock); 153 spin_unlock(&trans_inc_lock);
144} 154}
145 155
@@ -200,7 +210,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
200 if (ocfs2_mount_local(osb)) 210 if (ocfs2_mount_local(osb))
201 return; 211 return;
202 212
203 if (!ocfs2_inode_fully_checkpointed(inode)) { 213 if (!ocfs2_ci_fully_checkpointed(INODE_CACHE(inode))) {
204 /* WARNING: This only kicks off a single 214 /* WARNING: This only kicks off a single
205 * checkpoint. If someone races you and adds more 215 * checkpoint. If someone races you and adds more
206 * metadata to the journal, you won't know, and will 216 * metadata to the journal, you won't know, and will
@@ -210,7 +220,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
210 ocfs2_start_checkpoint(osb); 220 ocfs2_start_checkpoint(osb);
211 221
212 wait_event(osb->journal->j_checkpointed, 222 wait_event(osb->journal->j_checkpointed,
213 ocfs2_inode_fully_checkpointed(inode)); 223 ocfs2_ci_fully_checkpointed(INODE_CACHE(inode)));
214 } 224 }
215} 225}
216 226
@@ -266,31 +276,34 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks);
266 276
267 277
268/* ocfs2_inode */ 278/* ocfs2_inode */
269int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, 279int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
270 struct buffer_head *bh, int type); 280 struct buffer_head *bh, int type);
271/* ocfs2_extent_block */ 281/* ocfs2_extent_block */
272int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, 282int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
283 struct buffer_head *bh, int type);
284/* ocfs2_refcount_block */
285int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
273 struct buffer_head *bh, int type); 286 struct buffer_head *bh, int type);
274/* ocfs2_group_desc */ 287/* ocfs2_group_desc */
275int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, 288int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
276 struct buffer_head *bh, int type); 289 struct buffer_head *bh, int type);
277/* ocfs2_xattr_block */ 290/* ocfs2_xattr_block */
278int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, 291int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
279 struct buffer_head *bh, int type); 292 struct buffer_head *bh, int type);
280/* quota blocks */ 293/* quota blocks */
281int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, 294int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
282 struct buffer_head *bh, int type); 295 struct buffer_head *bh, int type);
283/* dirblock */ 296/* dirblock */
284int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, 297int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
285 struct buffer_head *bh, int type); 298 struct buffer_head *bh, int type);
286/* ocfs2_dx_root_block */ 299/* ocfs2_dx_root_block */
287int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, 300int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
288 struct buffer_head *bh, int type); 301 struct buffer_head *bh, int type);
289/* ocfs2_dx_leaf */ 302/* ocfs2_dx_leaf */
290int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, 303int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
291 struct buffer_head *bh, int type); 304 struct buffer_head *bh, int type);
292/* Anything that has no ecc */ 305/* Anything that has no ecc */
293int ocfs2_journal_access(handle_t *handle, struct inode *inode, 306int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
294 struct buffer_head *bh, int type); 307 struct buffer_head *bh, int type);
295 308
296/* 309/*
@@ -477,6 +490,23 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
477 return credits; 490 return credits;
478} 491}
479 492
493/* inode update, new refcount block and its allocation credits. */
494#define OCFS2_REFCOUNT_TREE_CREATE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1 \
495 + OCFS2_SUBALLOC_ALLOC)
496
497/* inode and the refcount block update. */
498#define OCFS2_REFCOUNT_TREE_SET_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
499
500/*
501 * inode and the refcount block update.
502 * It doesn't include the credits for sub alloc change.
503 * So if we need to free the bit, OCFS2_SUBALLOC_FREE needs to be added.
504 */
505#define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
506
507/* 2 metadata alloc, 2 new blocks and root refcount block */
508#define OCFS2_EXPAND_REFCOUNT_TREE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + 3)
509
480/* 510/*
481 * Please note that the caller must make sure that root_el is the root 511 * Please note that the caller must make sure that root_el is the root
482 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise 512 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index bac7e6abaf47..ac10f83edb95 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -297,8 +297,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
297 } 297 }
298 memcpy(alloc_copy, alloc, bh->b_size); 298 memcpy(alloc_copy, alloc, bh->b_size);
299 299
300 status = ocfs2_journal_access_di(handle, local_alloc_inode, bh, 300 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
301 OCFS2_JOURNAL_ACCESS_WRITE); 301 bh, OCFS2_JOURNAL_ACCESS_WRITE);
302 if (status < 0) { 302 if (status < 0) {
303 mlog_errno(status); 303 mlog_errno(status);
304 goto out_commit; 304 goto out_commit;
@@ -392,7 +392,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
392 ocfs2_clear_local_alloc(alloc); 392 ocfs2_clear_local_alloc(alloc);
393 393
394 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 394 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
395 status = ocfs2_write_block(osb, alloc_bh, inode); 395 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
396 if (status < 0) 396 if (status < 0)
397 mlog_errno(status); 397 mlog_errno(status);
398 398
@@ -678,7 +678,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
678 * delete bits from it! */ 678 * delete bits from it! */
679 *num_bits = bits_wanted; 679 *num_bits = bits_wanted;
680 680
681 status = ocfs2_journal_access_di(handle, local_alloc_inode, 681 status = ocfs2_journal_access_di(handle,
682 INODE_CACHE(local_alloc_inode),
682 osb->local_alloc_bh, 683 osb->local_alloc_bh,
683 OCFS2_JOURNAL_ACCESS_WRITE); 684 OCFS2_JOURNAL_ACCESS_WRITE);
684 if (status < 0) { 685 if (status < 0) {
@@ -1156,7 +1157,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1156 } 1157 }
1157 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1158 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
1158 1159
1159 status = ocfs2_journal_access_di(handle, local_alloc_inode, 1160 status = ocfs2_journal_access_di(handle,
1161 INODE_CACHE(local_alloc_inode),
1160 osb->local_alloc_bh, 1162 osb->local_alloc_bh,
1161 OCFS2_JOURNAL_ACCESS_WRITE); 1163 OCFS2_JOURNAL_ACCESS_WRITE);
1162 if (status < 0) { 1164 if (status < 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8601f934010b..f010b22b1c44 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -69,7 +69,6 @@
69static int ocfs2_mknod_locked(struct ocfs2_super *osb, 69static int ocfs2_mknod_locked(struct ocfs2_super *osb,
70 struct inode *dir, 70 struct inode *dir,
71 struct inode *inode, 71 struct inode *inode,
72 struct dentry *dentry,
73 dev_t dev, 72 dev_t dev,
74 struct buffer_head **new_fe_bh, 73 struct buffer_head **new_fe_bh,
75 struct buffer_head *parent_fe_bh, 74 struct buffer_head *parent_fe_bh,
@@ -78,7 +77,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
78 77
79static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 78static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
80 struct inode **ret_orphan_dir, 79 struct inode **ret_orphan_dir,
81 struct inode *inode, 80 u64 blkno,
82 char *name, 81 char *name,
83 struct ocfs2_dir_lookup_result *lookup); 82 struct ocfs2_dir_lookup_result *lookup);
84 83
@@ -358,8 +357,12 @@ static int ocfs2_mknod(struct inode *dir,
358 } 357 }
359 did_quota_inode = 1; 358 did_quota_inode = 1;
360 359
360 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
361 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
362 dentry->d_name.name);
363
361 /* do the real work now. */ 364 /* do the real work now. */
362 status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev, 365 status = ocfs2_mknod_locked(osb, dir, inode, dev,
363 &new_fe_bh, parent_fe_bh, handle, 366 &new_fe_bh, parent_fe_bh, handle,
364 inode_ac); 367 inode_ac);
365 if (status < 0) { 368 if (status < 0) {
@@ -375,7 +378,8 @@ static int ocfs2_mknod(struct inode *dir,
375 goto leave; 378 goto leave;
376 } 379 }
377 380
378 status = ocfs2_journal_access_di(handle, dir, parent_fe_bh, 381 status = ocfs2_journal_access_di(handle, INODE_CACHE(dir),
382 parent_fe_bh,
379 OCFS2_JOURNAL_ACCESS_WRITE); 383 OCFS2_JOURNAL_ACCESS_WRITE);
380 if (status < 0) { 384 if (status < 0) {
381 mlog_errno(status); 385 mlog_errno(status);
@@ -465,7 +469,6 @@ leave:
465static int ocfs2_mknod_locked(struct ocfs2_super *osb, 469static int ocfs2_mknod_locked(struct ocfs2_super *osb,
466 struct inode *dir, 470 struct inode *dir,
467 struct inode *inode, 471 struct inode *inode,
468 struct dentry *dentry,
469 dev_t dev, 472 dev_t dev,
470 struct buffer_head **new_fe_bh, 473 struct buffer_head **new_fe_bh,
471 struct buffer_head *parent_fe_bh, 474 struct buffer_head *parent_fe_bh,
@@ -479,10 +482,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
479 u16 suballoc_bit; 482 u16 suballoc_bit;
480 u16 feat; 483 u16 feat;
481 484
482 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
483 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
484 dentry->d_name.name);
485
486 *new_fe_bh = NULL; 485 *new_fe_bh = NULL;
487 486
488 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, 487 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
@@ -507,9 +506,10 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
507 mlog_errno(status); 506 mlog_errno(status);
508 goto leave; 507 goto leave;
509 } 508 }
510 ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); 509 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh);
511 510
512 status = ocfs2_journal_access_di(handle, inode, *new_fe_bh, 511 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
512 *new_fe_bh,
513 OCFS2_JOURNAL_ACCESS_CREATE); 513 OCFS2_JOURNAL_ACCESS_CREATE);
514 if (status < 0) { 514 if (status < 0) {
515 mlog_errno(status); 515 mlog_errno(status);
@@ -565,7 +565,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
565 } 565 }
566 566
567 ocfs2_populate_inode(inode, fe, 1); 567 ocfs2_populate_inode(inode, fe, 1);
568 ocfs2_inode_set_new(osb, inode); 568 ocfs2_ci_set_new(osb, INODE_CACHE(inode));
569 if (!ocfs2_mount_local(osb)) { 569 if (!ocfs2_mount_local(osb)) {
570 status = ocfs2_create_new_inode_locks(inode); 570 status = ocfs2_create_new_inode_locks(inode);
571 if (status < 0) 571 if (status < 0)
@@ -682,7 +682,7 @@ static int ocfs2_link(struct dentry *old_dentry,
682 goto out_unlock_inode; 682 goto out_unlock_inode;
683 } 683 }
684 684
685 err = ocfs2_journal_access_di(handle, inode, fe_bh, 685 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
686 OCFS2_JOURNAL_ACCESS_WRITE); 686 OCFS2_JOURNAL_ACCESS_WRITE);
687 if (err < 0) { 687 if (err < 0) {
688 mlog_errno(err); 688 mlog_errno(err);
@@ -850,7 +850,8 @@ static int ocfs2_unlink(struct inode *dir,
850 } 850 }
851 851
852 if (inode_is_unlinkable(inode)) { 852 if (inode_is_unlinkable(inode)) {
853 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode, 853 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
854 OCFS2_I(inode)->ip_blkno,
854 orphan_name, &orphan_insert); 855 orphan_name, &orphan_insert);
855 if (status < 0) { 856 if (status < 0) {
856 mlog_errno(status); 857 mlog_errno(status);
@@ -866,7 +867,7 @@ static int ocfs2_unlink(struct inode *dir,
866 goto leave; 867 goto leave;
867 } 868 }
868 869
869 status = ocfs2_journal_access_di(handle, inode, fe_bh, 870 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
870 OCFS2_JOURNAL_ACCESS_WRITE); 871 OCFS2_JOURNAL_ACCESS_WRITE);
871 if (status < 0) { 872 if (status < 0) {
872 mlog_errno(status); 873 mlog_errno(status);
@@ -1241,9 +1242,8 @@ static int ocfs2_rename(struct inode *old_dir,
1241 1242
1242 if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { 1243 if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1243 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, 1244 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
1244 new_inode, 1245 OCFS2_I(new_inode)->ip_blkno,
1245 orphan_name, 1246 orphan_name, &orphan_insert);
1246 &orphan_insert);
1247 if (status < 0) { 1247 if (status < 0) {
1248 mlog_errno(status); 1248 mlog_errno(status);
1249 goto bail; 1249 goto bail;
@@ -1284,7 +1284,8 @@ static int ocfs2_rename(struct inode *old_dir,
1284 goto bail; 1284 goto bail;
1285 } 1285 }
1286 } 1286 }
1287 status = ocfs2_journal_access_di(handle, new_inode, newfe_bh, 1287 status = ocfs2_journal_access_di(handle, INODE_CACHE(new_inode),
1288 newfe_bh,
1288 OCFS2_JOURNAL_ACCESS_WRITE); 1289 OCFS2_JOURNAL_ACCESS_WRITE);
1289 if (status < 0) { 1290 if (status < 0) {
1290 mlog_errno(status); 1291 mlog_errno(status);
@@ -1331,7 +1332,8 @@ static int ocfs2_rename(struct inode *old_dir,
1331 old_inode->i_ctime = CURRENT_TIME; 1332 old_inode->i_ctime = CURRENT_TIME;
1332 mark_inode_dirty(old_inode); 1333 mark_inode_dirty(old_inode);
1333 1334
1334 status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh, 1335 status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
1336 old_inode_bh,
1335 OCFS2_JOURNAL_ACCESS_WRITE); 1337 OCFS2_JOURNAL_ACCESS_WRITE);
1336 if (status >= 0) { 1338 if (status >= 0) {
1337 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; 1339 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
@@ -1407,9 +1409,10 @@ static int ocfs2_rename(struct inode *old_dir,
1407 (int)old_dir_nlink, old_dir->i_nlink); 1409 (int)old_dir_nlink, old_dir->i_nlink);
1408 } else { 1410 } else {
1409 struct ocfs2_dinode *fe; 1411 struct ocfs2_dinode *fe;
1410 status = ocfs2_journal_access_di(handle, old_dir, 1412 status = ocfs2_journal_access_di(handle,
1411 old_dir_bh, 1413 INODE_CACHE(old_dir),
1412 OCFS2_JOURNAL_ACCESS_WRITE); 1414 old_dir_bh,
1415 OCFS2_JOURNAL_ACCESS_WRITE);
1413 fe = (struct ocfs2_dinode *) old_dir_bh->b_data; 1416 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1414 ocfs2_set_links_count(fe, old_dir->i_nlink); 1417 ocfs2_set_links_count(fe, old_dir->i_nlink);
1415 status = ocfs2_journal_dirty(handle, old_dir_bh); 1418 status = ocfs2_journal_dirty(handle, old_dir_bh);
@@ -1527,9 +1530,11 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1527 mlog_errno(status); 1530 mlog_errno(status);
1528 goto bail; 1531 goto bail;
1529 } 1532 }
1530 ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]); 1533 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
1534 bhs[virtual]);
1531 1535
1532 status = ocfs2_journal_access(handle, inode, bhs[virtual], 1536 status = ocfs2_journal_access(handle, INODE_CACHE(inode),
1537 bhs[virtual],
1533 OCFS2_JOURNAL_ACCESS_CREATE); 1538 OCFS2_JOURNAL_ACCESS_CREATE);
1534 if (status < 0) { 1539 if (status < 0) {
1535 mlog_errno(status); 1540 mlog_errno(status);
@@ -1692,7 +1697,11 @@ static int ocfs2_symlink(struct inode *dir,
1692 } 1697 }
1693 did_quota_inode = 1; 1698 did_quota_inode = 1;
1694 1699
1695 status = ocfs2_mknod_locked(osb, dir, inode, dentry, 1700 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
1701 inode->i_mode, dentry->d_name.len,
1702 dentry->d_name.name);
1703
1704 status = ocfs2_mknod_locked(osb, dir, inode,
1696 0, &new_fe_bh, parent_fe_bh, handle, 1705 0, &new_fe_bh, parent_fe_bh, handle,
1697 inode_ac); 1706 inode_ac);
1698 if (status < 0) { 1707 if (status < 0) {
@@ -1842,7 +1851,7 @@ bail:
1842 1851
1843static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 1852static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1844 struct inode **ret_orphan_dir, 1853 struct inode **ret_orphan_dir,
1845 struct inode *inode, 1854 u64 blkno,
1846 char *name, 1855 char *name,
1847 struct ocfs2_dir_lookup_result *lookup) 1856 struct ocfs2_dir_lookup_result *lookup)
1848{ 1857{
@@ -1850,7 +1859,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1850 struct buffer_head *orphan_dir_bh = NULL; 1859 struct buffer_head *orphan_dir_bh = NULL;
1851 int status = 0; 1860 int status = 0;
1852 1861
1853 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); 1862 status = ocfs2_blkno_stringify(blkno, name);
1854 if (status < 0) { 1863 if (status < 0) {
1855 mlog_errno(status); 1864 mlog_errno(status);
1856 return status; 1865 return status;
@@ -1917,7 +1926,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1917 goto leave; 1926 goto leave;
1918 } 1927 }
1919 1928
1920 status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh, 1929 status = ocfs2_journal_access_di(handle,
1930 INODE_CACHE(orphan_dir_inode),
1931 orphan_dir_bh,
1921 OCFS2_JOURNAL_ACCESS_WRITE); 1932 OCFS2_JOURNAL_ACCESS_WRITE);
1922 if (status < 0) { 1933 if (status < 0) {
1923 mlog_errno(status); 1934 mlog_errno(status);
@@ -2002,7 +2013,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2002 goto leave; 2013 goto leave;
2003 } 2014 }
2004 2015
2005 status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh, 2016 status = ocfs2_journal_access_di(handle,
2017 INODE_CACHE(orphan_dir_inode),
2018 orphan_dir_bh,
2006 OCFS2_JOURNAL_ACCESS_WRITE); 2019 OCFS2_JOURNAL_ACCESS_WRITE);
2007 if (status < 0) { 2020 if (status < 0) {
2008 mlog_errno(status); 2021 mlog_errno(status);
@@ -2028,6 +2041,274 @@ leave:
2028 return status; 2041 return status;
2029} 2042}
2030 2043
2044int ocfs2_create_inode_in_orphan(struct inode *dir,
2045 int mode,
2046 struct inode **new_inode)
2047{
2048 int status, did_quota_inode = 0;
2049 struct inode *inode = NULL;
2050 struct inode *orphan_dir = NULL;
2051 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2052 struct ocfs2_dinode *di = NULL;
2053 handle_t *handle = NULL;
2054 char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
2055 struct buffer_head *parent_di_bh = NULL;
2056 struct buffer_head *new_di_bh = NULL;
2057 struct ocfs2_alloc_context *inode_ac = NULL;
2058 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
2059
2060 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2061 if (status < 0) {
2062 if (status != -ENOENT)
2063 mlog_errno(status);
2064 return status;
2065 }
2066
2067 /*
2068 * We give the orphan dir the root blkno to fake an orphan name,
2069 * and allocate enough space for our insertion.
2070 */
2071 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
2072 osb->root_blkno,
2073 orphan_name, &orphan_insert);
2074 if (status < 0) {
2075 mlog_errno(status);
2076 goto leave;
2077 }
2078
2079 /* reserve an inode spot */
2080 status = ocfs2_reserve_new_inode(osb, &inode_ac);
2081 if (status < 0) {
2082 if (status != -ENOSPC)
2083 mlog_errno(status);
2084 goto leave;
2085 }
2086
2087 inode = ocfs2_get_init_inode(dir, mode);
2088 if (!inode) {
2089 status = -ENOMEM;
2090 mlog_errno(status);
2091 goto leave;
2092 }
2093
2094 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 0, 0));
2095 if (IS_ERR(handle)) {
2096 status = PTR_ERR(handle);
2097 handle = NULL;
2098 mlog_errno(status);
2099 goto leave;
2100 }
2101
2102 /* We don't use standard VFS wrapper because we don't want vfs_dq_init
2103 * to be called. */
2104 if (sb_any_quota_active(osb->sb) &&
2105 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
2106 status = -EDQUOT;
2107 goto leave;
2108 }
2109 did_quota_inode = 1;
2110
2111 /* do the real work now. */
2112 status = ocfs2_mknod_locked(osb, dir, inode,
2113 0, &new_di_bh, parent_di_bh, handle,
2114 inode_ac);
2115 if (status < 0) {
2116 mlog_errno(status);
2117 goto leave;
2118 }
2119
2120 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name);
2121 if (status < 0) {
2122 mlog_errno(status);
2123 goto leave;
2124 }
2125
2126 di = (struct ocfs2_dinode *)new_di_bh->b_data;
2127 status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name,
2128 &orphan_insert, orphan_dir);
2129 if (status < 0) {
2130 mlog_errno(status);
2131 goto leave;
2132 }
2133
2134 /* get open lock so that only nodes can't remove it from orphan dir. */
2135 status = ocfs2_open_lock(inode);
2136 if (status < 0)
2137 mlog_errno(status);
2138
2139leave:
2140 if (status < 0 && did_quota_inode)
2141 vfs_dq_free_inode(inode);
2142 if (handle)
2143 ocfs2_commit_trans(osb, handle);
2144
2145 if (orphan_dir) {
2146 /* This was locked for us in ocfs2_prepare_orphan_dir() */
2147 ocfs2_inode_unlock(orphan_dir, 1);
2148 mutex_unlock(&orphan_dir->i_mutex);
2149 iput(orphan_dir);
2150 }
2151
2152 if (status == -ENOSPC)
2153 mlog(0, "Disk is full\n");
2154
2155 if ((status < 0) && inode) {
2156 clear_nlink(inode);
2157 iput(inode);
2158 }
2159
2160 if (inode_ac)
2161 ocfs2_free_alloc_context(inode_ac);
2162
2163 brelse(new_di_bh);
2164
2165 if (!status)
2166 *new_inode = inode;
2167
2168 ocfs2_free_dir_lookup_result(&orphan_insert);
2169
2170 ocfs2_inode_unlock(dir, 1);
2171 brelse(parent_di_bh);
2172 return status;
2173}
2174
2175int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2176 struct inode *inode,
2177 struct dentry *dentry)
2178{
2179 int status = 0;
2180 struct buffer_head *parent_di_bh = NULL;
2181 handle_t *handle = NULL;
2182 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2183 struct ocfs2_dinode *dir_di, *di;
2184 struct inode *orphan_dir_inode = NULL;
2185 struct buffer_head *orphan_dir_bh = NULL;
2186 struct buffer_head *di_bh = NULL;
2187 struct ocfs2_dir_lookup_result lookup = { NULL, };
2188
2189 mlog_entry("(0x%p, 0x%p, %.*s')\n", dir, dentry,
2190 dentry->d_name.len, dentry->d_name.name);
2191
2192 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2193 if (status < 0) {
2194 if (status != -ENOENT)
2195 mlog_errno(status);
2196 return status;
2197 }
2198
2199 dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data;
2200 if (!dir_di->i_links_count) {
2201 /* can't make a file in a deleted directory. */
2202 status = -ENOENT;
2203 goto leave;
2204 }
2205
2206 status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
2207 dentry->d_name.len);
2208 if (status)
2209 goto leave;
2210
2211 /* get a spot inside the dir. */
2212 status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh,
2213 dentry->d_name.name,
2214 dentry->d_name.len, &lookup);
2215 if (status < 0) {
2216 mlog_errno(status);
2217 goto leave;
2218 }
2219
2220 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2221 ORPHAN_DIR_SYSTEM_INODE,
2222 osb->slot_num);
2223 if (!orphan_dir_inode) {
2224 status = -EEXIST;
2225 mlog_errno(status);
2226 goto leave;
2227 }
2228
2229 mutex_lock(&orphan_dir_inode->i_mutex);
2230
2231 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
2232 if (status < 0) {
2233 mlog_errno(status);
2234 mutex_unlock(&orphan_dir_inode->i_mutex);
2235 iput(orphan_dir_inode);
2236 goto leave;
2237 }
2238
2239 status = ocfs2_read_inode_block(inode, &di_bh);
2240 if (status < 0) {
2241 mlog_errno(status);
2242 goto orphan_unlock;
2243 }
2244
2245 handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
2246 if (IS_ERR(handle)) {
2247 status = PTR_ERR(handle);
2248 handle = NULL;
2249 mlog_errno(status);
2250 goto orphan_unlock;
2251 }
2252
2253 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
2254 di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
2255 if (status < 0) {
2256 mlog_errno(status);
2257 goto out_commit;
2258 }
2259
2260 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
2261 orphan_dir_bh);
2262 if (status < 0) {
2263 mlog_errno(status);
2264 goto out_commit;
2265 }
2266
2267 di = (struct ocfs2_dinode *)di_bh->b_data;
2268 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2269 di->i_orphaned_slot = 0;
2270 ocfs2_journal_dirty(handle, di_bh);
2271
2272 status = ocfs2_add_entry(handle, dentry, inode,
2273 OCFS2_I(inode)->ip_blkno, parent_di_bh,
2274 &lookup);
2275 if (status < 0) {
2276 mlog_errno(status);
2277 goto out_commit;
2278 }
2279
2280 status = ocfs2_dentry_attach_lock(dentry, inode,
2281 OCFS2_I(dir)->ip_blkno);
2282 if (status) {
2283 mlog_errno(status);
2284 goto out_commit;
2285 }
2286
2287 insert_inode_hash(inode);
2288 dentry->d_op = &ocfs2_dentry_ops;
2289 d_instantiate(dentry, inode);
2290 status = 0;
2291out_commit:
2292 ocfs2_commit_trans(osb, handle);
2293orphan_unlock:
2294 ocfs2_inode_unlock(orphan_dir_inode, 1);
2295 mutex_unlock(&orphan_dir_inode->i_mutex);
2296 iput(orphan_dir_inode);
2297leave:
2298
2299 ocfs2_inode_unlock(dir, 1);
2300
2301 brelse(di_bh);
2302 brelse(parent_di_bh);
2303 brelse(orphan_dir_bh);
2304
2305 ocfs2_free_dir_lookup_result(&lookup);
2306
2307 mlog_exit(status);
2308
2309 return status;
2310}
2311
2031const struct inode_operations ocfs2_dir_iops = { 2312const struct inode_operations ocfs2_dir_iops = {
2032 .create = ocfs2_create, 2313 .create = ocfs2_create,
2033 .lookup = ocfs2_lookup, 2314 .lookup = ocfs2_lookup,
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 688aef64c879..e5d059d4f115 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -35,5 +35,11 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
35 struct inode *orphan_dir_inode, 35 struct inode *orphan_dir_inode,
36 struct inode *inode, 36 struct inode *inode,
37 struct buffer_head *orphan_dir_bh); 37 struct buffer_head *orphan_dir_bh);
38int ocfs2_create_inode_in_orphan(struct inode *dir,
39 int mode,
40 struct inode **new_inode);
41int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
42 struct inode *new_inode,
43 struct dentry *new_dentry);
38 44
39#endif /* OCFS2_NAMEI_H */ 45#endif /* OCFS2_NAMEI_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 39e1d5a39505..eae404602424 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -51,20 +51,51 @@
51/* For struct ocfs2_blockcheck_stats */ 51/* For struct ocfs2_blockcheck_stats */
52#include "blockcheck.h" 52#include "blockcheck.h"
53 53
54
55/* Caching of metadata buffers */
56
54/* Most user visible OCFS2 inodes will have very few pieces of 57/* Most user visible OCFS2 inodes will have very few pieces of
55 * metadata, but larger files (including bitmaps, etc) must be taken 58 * metadata, but larger files (including bitmaps, etc) must be taken
56 * into account when designing an access scheme. We allow a small 59 * into account when designing an access scheme. We allow a small
57 * amount of inlined blocks to be stored on an array and grow the 60 * amount of inlined blocks to be stored on an array and grow the
58 * structure into a rb tree when necessary. */ 61 * structure into a rb tree when necessary. */
59#define OCFS2_INODE_MAX_CACHE_ARRAY 2 62#define OCFS2_CACHE_INFO_MAX_ARRAY 2
63
64/* Flags for ocfs2_caching_info */
65
66enum ocfs2_caching_info_flags {
67 /* Indicates that the metadata cache is using the inline array */
68 OCFS2_CACHE_FL_INLINE = 1<<1,
69};
60 70
71struct ocfs2_caching_operations;
61struct ocfs2_caching_info { 72struct ocfs2_caching_info {
73 /*
74 * The parent structure provides the locks, but because the
75 * parent structure can differ, it provides locking operations
76 * to struct ocfs2_caching_info.
77 */
78 const struct ocfs2_caching_operations *ci_ops;
79
80 /* next two are protected by trans_inc_lock */
81 /* which transaction were we created on? Zero if none. */
82 unsigned long ci_created_trans;
83 /* last transaction we were a part of. */
84 unsigned long ci_last_trans;
85
86 /* Cache structures */
87 unsigned int ci_flags;
62 unsigned int ci_num_cached; 88 unsigned int ci_num_cached;
63 union { 89 union {
64 sector_t ci_array[OCFS2_INODE_MAX_CACHE_ARRAY]; 90 sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY];
65 struct rb_root ci_tree; 91 struct rb_root ci_tree;
66 } ci_cache; 92 } ci_cache;
67}; 93};
94/*
95 * Need this prototype here instead of in uptodate.h because journal.h
96 * uses it.
97 */
98struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
68 99
69/* this limits us to 256 nodes 100/* this limits us to 256 nodes
70 * if we need more, we can do a kmalloc for the map */ 101 * if we need more, we can do a kmalloc for the map */
@@ -377,12 +408,17 @@ struct ocfs2_super
377 408
378 /* the group we used to allocate inodes. */ 409 /* the group we used to allocate inodes. */
379 u64 osb_inode_alloc_group; 410 u64 osb_inode_alloc_group;
411
412 /* rb tree root for refcount lock. */
413 struct rb_root osb_rf_lock_tree;
414 struct ocfs2_refcount_tree *osb_ref_tree_lru;
380}; 415};
381 416
382#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 417#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
383 418
384/* Useful typedef for passing around journal access functions */ 419/* Useful typedef for passing around journal access functions */
385typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode, 420typedef int (*ocfs2_journal_access_func)(handle_t *handle,
421 struct ocfs2_caching_info *ci,
386 struct buffer_head *bh, int type); 422 struct buffer_head *bh, int type);
387 423
388static inline int ocfs2_should_order_data(struct inode *inode) 424static inline int ocfs2_should_order_data(struct inode *inode)
@@ -480,6 +516,13 @@ static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
480 ocfs2_set_links_count(di, links); 516 ocfs2_set_links_count(di, links);
481} 517}
482 518
519static inline int ocfs2_refcount_tree(struct ocfs2_super *osb)
520{
521 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
522 return 1;
523 return 0;
524}
525
483/* set / clear functions because cluster events can make these happen 526/* set / clear functions because cluster events can make these happen
484 * in parallel so we want the transitions to be atomic. this also 527 * in parallel so we want the transitions to be atomic. this also
485 * means that any future flags osb_flags must be protected by spinlock 528 * means that any future flags osb_flags must be protected by spinlock
@@ -578,6 +621,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
578#define OCFS2_IS_VALID_DX_LEAF(ptr) \ 621#define OCFS2_IS_VALID_DX_LEAF(ptr) \
579 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) 622 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
580 623
624#define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \
625 (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE))
626
581static inline unsigned long ino_from_blkno(struct super_block *sb, 627static inline unsigned long ino_from_blkno(struct super_block *sb,
582 u64 blkno) 628 u64 blkno)
583{ 629{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7ab6e9e5e77c..e9431e4a5e7c 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -68,6 +68,7 @@
68#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" 68#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
69#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01" 69#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01"
70#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1" 70#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1"
71#define OCFS2_REFCOUNT_BLOCK_SIGNATURE "REFCNT1"
71 72
72/* Compatibility flags */ 73/* Compatibility flags */
73#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ 74#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -98,7 +99,8 @@
98 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ 99 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
99 | OCFS2_FEATURE_INCOMPAT_XATTR \ 100 | OCFS2_FEATURE_INCOMPAT_XATTR \
100 | OCFS2_FEATURE_INCOMPAT_META_ECC \ 101 | OCFS2_FEATURE_INCOMPAT_META_ECC \
101 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
102#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 104#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
103 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 105 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
104 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 106 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -160,6 +162,9 @@
160/* Metadata checksum and error correction */ 162/* Metadata checksum and error correction */
161#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 163#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
162 164
165/* Refcount tree support */
166#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000
167
163/* 168/*
164 * backup superblock flag is used to indicate that this volume 169 * backup superblock flag is used to indicate that this volume
165 * has backup superblocks. 170 * has backup superblocks.
@@ -223,6 +228,7 @@
223#define OCFS2_HAS_XATTR_FL (0x0002) 228#define OCFS2_HAS_XATTR_FL (0x0002)
224#define OCFS2_INLINE_XATTR_FL (0x0004) 229#define OCFS2_INLINE_XATTR_FL (0x0004)
225#define OCFS2_INDEXED_DIR_FL (0x0008) 230#define OCFS2_INDEXED_DIR_FL (0x0008)
231#define OCFS2_HAS_REFCOUNT_FL (0x0010)
226 232
227/* Inode attributes, keep in sync with EXT2 */ 233/* Inode attributes, keep in sync with EXT2 */
228#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ 234#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
@@ -241,8 +247,11 @@
241/* 247/*
242 * Extent record flags (e_node.leaf.flags) 248 * Extent record flags (e_node.leaf.flags)
243 */ 249 */
244#define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but 250#define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but
245 * unwritten */ 251 * unwritten */
252#define OCFS2_EXT_REFCOUNTED (0x02) /* Extent is reference
253 * counted in an associated
254 * refcount tree */
246 255
247/* 256/*
248 * ioctl commands 257 * ioctl commands
@@ -292,6 +301,15 @@ struct ocfs2_new_group_input {
292#define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) 301#define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input)
293#define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) 302#define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input)
294 303
304/* Used to pass 2 file names to reflink. */
305struct reflink_arguments {
306 __u64 old_path;
307 __u64 new_path;
308 __u64 preserve;
309};
310#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments)
311
312
295/* 313/*
296 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 314 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
297 */ 315 */
@@ -717,7 +735,8 @@ struct ocfs2_dinode {
717 __le64 i_xattr_loc; 735 __le64 i_xattr_loc;
718/*80*/ struct ocfs2_block_check i_check; /* Error checking */ 736/*80*/ struct ocfs2_block_check i_check; /* Error checking */
719/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ 737/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
720 __le64 i_reserved2[5]; 738/*90*/ __le64 i_refcount_loc;
739 __le64 i_reserved2[4];
721/*B8*/ union { 740/*B8*/ union {
722 __le64 i_pad1; /* Generic way to refer to this 741 __le64 i_pad1; /* Generic way to refer to this
723 64bit union */ 742 64bit union */
@@ -901,6 +920,60 @@ struct ocfs2_group_desc
901/*40*/ __u8 bg_bitmap[0]; 920/*40*/ __u8 bg_bitmap[0];
902}; 921};
903 922
923struct ocfs2_refcount_rec {
924/*00*/ __le64 r_cpos; /* Physical offset, in clusters */
925 __le32 r_clusters; /* Clusters covered by this extent */
926 __le32 r_refcount; /* Reference count of this extent */
927/*10*/
928};
929#define OCFS2_32BIT_POS_MASK (0xffffffffULL)
930
931#define OCFS2_REFCOUNT_LEAF_FL (0x00000001)
932#define OCFS2_REFCOUNT_TREE_FL (0x00000002)
933
934struct ocfs2_refcount_list {
935/*00*/ __le16 rl_count; /* Maximum number of entries possible
936 in rl_records */
937 __le16 rl_used; /* Current number of used records */
938 __le32 rl_reserved2;
939 __le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */
940/*10*/ struct ocfs2_refcount_rec rl_recs[0]; /* Refcount records */
941};
942
943
944struct ocfs2_refcount_block {
945/*00*/ __u8 rf_signature[8]; /* Signature for verification */
946 __le16 rf_suballoc_slot; /* Slot suballocator this block
947 belongs to */
948 __le16 rf_suballoc_bit; /* Bit offset in suballocator
949 block group */
950 __le32 rf_fs_generation; /* Must match superblock */
951/*10*/ __le64 rf_blkno; /* Offset on disk, in blocks */
952 __le64 rf_parent; /* Parent block, only valid if
953 OCFS2_REFCOUNT_LEAF_FL is set in
954 rf_flags */
955/*20*/ struct ocfs2_block_check rf_check; /* Error checking */
956 __le64 rf_last_eb_blk; /* Pointer to last extent block */
957/*30*/ __le32 rf_count; /* Number of inodes sharing this
958 refcount tree */
959 __le32 rf_flags; /* See the flags above */
960 __le32 rf_clusters; /* clusters covered by refcount tree. */
961 __le32 rf_cpos; /* cluster offset in refcount tree.*/
962/*40*/ __le32 rf_generation; /* generation number. all be the same
963 * for the same refcount tree. */
964 __le32 rf_reserved0;
965 __le64 rf_reserved1[7];
966/*80*/ union {
967 struct ocfs2_refcount_list rf_records; /* List of refcount
968 records */
969 struct ocfs2_extent_list rf_list; /* Extent record list,
970 only valid if
971 OCFS2_REFCOUNT_TREE_FL
972 is set in rf_flags */
973 };
974/* Actual on-disk size is one block */
975};
976
904/* 977/*
905 * On disk extended attribute structure for OCFS2. 978 * On disk extended attribute structure for OCFS2.
906 */ 979 */
@@ -1312,6 +1385,32 @@ static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
1312 1385
1313 return size / sizeof(struct ocfs2_extent_rec); 1386 return size / sizeof(struct ocfs2_extent_rec);
1314} 1387}
1388
1389static inline u16 ocfs2_extent_recs_per_rb(struct super_block *sb)
1390{
1391 int size;
1392
1393 size = sb->s_blocksize -
1394 offsetof(struct ocfs2_refcount_block, rf_list.l_recs);
1395
1396 return size / sizeof(struct ocfs2_extent_rec);
1397}
1398
1399static inline u16 ocfs2_refcount_recs_per_rb(struct super_block *sb)
1400{
1401 int size;
1402
1403 size = sb->s_blocksize -
1404 offsetof(struct ocfs2_refcount_block, rf_records.rl_recs);
1405
1406 return size / sizeof(struct ocfs2_refcount_rec);
1407}
1408
1409static inline u32
1410ocfs2_get_ref_rec_low_cpos(const struct ocfs2_refcount_rec *rec)
1411{
1412 return le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
1413}
1315#else 1414#else
1316static inline int ocfs2_fast_symlink_chars(int blocksize) 1415static inline int ocfs2_fast_symlink_chars(int blocksize)
1317{ 1416{
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index c212cf5a2bdf..d277aabf5dfb 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -49,6 +49,7 @@ enum ocfs2_lock_type {
49 OCFS2_LOCK_TYPE_QINFO, 49 OCFS2_LOCK_TYPE_QINFO,
50 OCFS2_LOCK_TYPE_NFS_SYNC, 50 OCFS2_LOCK_TYPE_NFS_SYNC,
51 OCFS2_LOCK_TYPE_ORPHAN_SCAN, 51 OCFS2_LOCK_TYPE_ORPHAN_SCAN,
52 OCFS2_LOCK_TYPE_REFCOUNT,
52 OCFS2_NUM_LOCK_TYPES 53 OCFS2_NUM_LOCK_TYPES
53}; 54};
54 55
@@ -89,6 +90,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
89 case OCFS2_LOCK_TYPE_ORPHAN_SCAN: 90 case OCFS2_LOCK_TYPE_ORPHAN_SCAN:
90 c = 'P'; 91 c = 'P';
91 break; 92 break;
93 case OCFS2_LOCK_TYPE_REFCOUNT:
94 c = 'T';
95 break;
92 default: 96 default:
93 c = '\0'; 97 c = '\0';
94 } 98 }
@@ -110,6 +114,7 @@ static char *ocfs2_lock_type_strings[] = {
110 [OCFS2_LOCK_TYPE_QINFO] = "Quota", 114 [OCFS2_LOCK_TYPE_QINFO] = "Quota",
111 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", 115 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
112 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", 116 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
117 [OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount",
113}; 118};
114 119
115static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 120static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 3fb96fcd4c81..e5df9d170b0c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -109,7 +109,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
109int ocfs2_read_quota_block(struct inode *inode, u64 v_block, 109int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
110 struct buffer_head **bh); 110 struct buffer_head **bh);
111 111
112extern struct dquot_operations ocfs2_quota_operations; 112extern const struct dquot_operations ocfs2_quota_operations;
113extern struct quota_format_type ocfs2_quota_format; 113extern struct quota_format_type ocfs2_quota_format;
114 114
115int ocfs2_quota_setup(void); 115int ocfs2_quota_setup(void);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e1d042..b437dc0c4cad 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -154,7 +154,7 @@ static int ocfs2_get_quota_block(struct inode *inode, int block,
154 err = -EIO; 154 err = -EIO;
155 mlog_errno(err); 155 mlog_errno(err);
156 } 156 }
157 return err;; 157 return err;
158} 158}
159 159
160/* Read data from global quotafile - avoid pagecache and such because we cannot 160/* Read data from global quotafile - avoid pagecache and such because we cannot
@@ -253,8 +253,9 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
253 flush_dcache_page(bh->b_page); 253 flush_dcache_page(bh->b_page);
254 set_buffer_uptodate(bh); 254 set_buffer_uptodate(bh);
255 unlock_buffer(bh); 255 unlock_buffer(bh);
256 ocfs2_set_buffer_uptodate(gqinode, bh); 256 ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
257 err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type); 257 err = ocfs2_journal_access_dq(handle, INODE_CACHE(gqinode), bh,
258 ja_type);
258 if (err < 0) { 259 if (err < 0) {
259 brelse(bh); 260 brelse(bh);
260 goto out; 261 goto out;
@@ -849,7 +850,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
849 kmem_cache_free(ocfs2_dquot_cachep, dquot); 850 kmem_cache_free(ocfs2_dquot_cachep, dquot);
850} 851}
851 852
852struct dquot_operations ocfs2_quota_operations = { 853const struct dquot_operations ocfs2_quota_operations = {
853 .initialize = dquot_initialize, 854 .initialize = dquot_initialize,
854 .drop = dquot_drop, 855 .drop = dquot_drop,
855 .alloc_space = dquot_alloc_space, 856 .alloc_space = dquot_alloc_space,
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index bdb09cb6e1fe..1a2c50a759fa 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -108,7 +108,7 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
108 mlog_errno(status); 108 mlog_errno(status);
109 return status; 109 return status;
110 } 110 }
111 status = ocfs2_journal_access_dq(handle, inode, bh, 111 status = ocfs2_journal_access_dq(handle, INODE_CACHE(inode), bh,
112 OCFS2_JOURNAL_ACCESS_WRITE); 112 OCFS2_JOURNAL_ACCESS_WRITE);
113 if (status < 0) { 113 if (status < 0) {
114 mlog_errno(status); 114 mlog_errno(status);
@@ -510,7 +510,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
510 goto out_commit; 510 goto out_commit;
511 } 511 }
512 /* Release local quota file entry */ 512 /* Release local quota file entry */
513 status = ocfs2_journal_access_dq(handle, lqinode, 513 status = ocfs2_journal_access_dq(handle,
514 INODE_CACHE(lqinode),
514 qbh, OCFS2_JOURNAL_ACCESS_WRITE); 515 qbh, OCFS2_JOURNAL_ACCESS_WRITE);
515 if (status < 0) { 516 if (status < 0) {
516 mlog_errno(status); 517 mlog_errno(status);
@@ -619,7 +620,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
619 mlog_errno(status); 620 mlog_errno(status);
620 goto out_bh; 621 goto out_bh;
621 } 622 }
622 status = ocfs2_journal_access_dq(handle, lqinode, bh, 623 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
624 bh,
623 OCFS2_JOURNAL_ACCESS_WRITE); 625 OCFS2_JOURNAL_ACCESS_WRITE);
624 if (status < 0) { 626 if (status < 0) {
625 mlog_errno(status); 627 mlog_errno(status);
@@ -993,8 +995,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
993 goto out_trans; 995 goto out_trans;
994 } 996 }
995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 997 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
996 ocfs2_set_new_buffer_uptodate(lqinode, bh); 998 ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
997 status = ocfs2_journal_access_dq(handle, lqinode, bh, 999 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
998 OCFS2_JOURNAL_ACCESS_CREATE); 1000 OCFS2_JOURNAL_ACCESS_CREATE);
999 if (status < 0) { 1001 if (status < 0) {
1000 mlog_errno(status); 1002 mlog_errno(status);
@@ -1027,8 +1029,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1027 mlog_errno(status); 1029 mlog_errno(status);
1028 goto out_trans; 1030 goto out_trans;
1029 } 1031 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh); 1032 ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh, 1033 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE); 1034 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) { 1035 if (status < 0) {
1034 mlog_errno(status); 1036 mlog_errno(status);
@@ -1131,7 +1133,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1131 mlog_errno(status); 1133 mlog_errno(status);
1132 goto out; 1134 goto out;
1133 } 1135 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh); 1136 ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
1135 1137
1136 /* Local quota info, chunk header and the new block we initialize */ 1138 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb), 1139 handle = ocfs2_start_trans(OCFS2_SB(sb),
@@ -1143,7 +1145,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1143 goto out; 1145 goto out;
1144 } 1146 }
1145 /* Zero created block */ 1147 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh, 1148 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE); 1149 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) { 1150 if (status < 0) {
1149 mlog_errno(status); 1151 mlog_errno(status);
@@ -1158,7 +1160,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1158 goto out_trans; 1160 goto out_trans;
1159 } 1161 }
1160 /* Update chunk header */ 1162 /* Update chunk header */
1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1163 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
1164 chunk->qc_headerbh,
1162 OCFS2_JOURNAL_ACCESS_WRITE); 1165 OCFS2_JOURNAL_ACCESS_WRITE);
1163 if (status < 0) { 1166 if (status < 0) {
1164 mlog_errno(status); 1167 mlog_errno(status);
@@ -1292,7 +1295,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
1292 goto out; 1295 goto out;
1293 } 1296 }
1294 1297
1295 status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type], 1298 status = ocfs2_journal_access_dq(handle,
1299 INODE_CACHE(sb_dqopt(sb)->files[type]),
1296 od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); 1300 od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
1297 if (status < 0) { 1301 if (status < 0) {
1298 mlog_errno(status); 1302 mlog_errno(status);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
new file mode 100644
index 000000000000..60287fc56bcb
--- /dev/null
+++ b/fs/ocfs2/refcounttree.c
@@ -0,0 +1,4313 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * refcounttree.c
5 *
6 * Copyright (C) 2009 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17
18#include <linux/sort.h>
19#define MLOG_MASK_PREFIX ML_REFCOUNT
20#include <cluster/masklog.h>
21#include "ocfs2.h"
22#include "inode.h"
23#include "alloc.h"
24#include "suballoc.h"
25#include "journal.h"
26#include "uptodate.h"
27#include "super.h"
28#include "buffer_head_io.h"
29#include "blockcheck.h"
30#include "refcounttree.h"
31#include "sysfile.h"
32#include "dlmglue.h"
33#include "extent_map.h"
34#include "aops.h"
35#include "xattr.h"
36#include "namei.h"
37
38#include <linux/bio.h>
39#include <linux/blkdev.h>
40#include <linux/gfp.h>
41#include <linux/slab.h>
42#include <linux/writeback.h>
43#include <linux/pagevec.h>
44#include <linux/swap.h>
45#include <linux/security.h>
46#include <linux/fsnotify.h>
47#include <linux/quotaops.h>
48#include <linux/namei.h>
49#include <linux/mount.h>
50
51struct ocfs2_cow_context {
52 struct inode *inode;
53 u32 cow_start;
54 u32 cow_len;
55 struct ocfs2_extent_tree data_et;
56 struct ocfs2_refcount_tree *ref_tree;
57 struct buffer_head *ref_root_bh;
58 struct ocfs2_alloc_context *meta_ac;
59 struct ocfs2_alloc_context *data_ac;
60 struct ocfs2_cached_dealloc_ctxt dealloc;
61 void *cow_object;
62 struct ocfs2_post_refcount *post_refcount;
63 int extra_credits;
64 int (*get_clusters)(struct ocfs2_cow_context *context,
65 u32 v_cluster, u32 *p_cluster,
66 u32 *num_clusters,
67 unsigned int *extent_flags);
68 int (*cow_duplicate_clusters)(handle_t *handle,
69 struct ocfs2_cow_context *context,
70 u32 cpos, u32 old_cluster,
71 u32 new_cluster, u32 new_len);
72};
73
74static inline struct ocfs2_refcount_tree *
75cache_info_to_refcount(struct ocfs2_caching_info *ci)
76{
77 return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
78}
79
80static int ocfs2_validate_refcount_block(struct super_block *sb,
81 struct buffer_head *bh)
82{
83 int rc;
84 struct ocfs2_refcount_block *rb =
85 (struct ocfs2_refcount_block *)bh->b_data;
86
87 mlog(0, "Validating refcount block %llu\n",
88 (unsigned long long)bh->b_blocknr);
89
90 BUG_ON(!buffer_uptodate(bh));
91
92 /*
93 * If the ecc fails, we return the error but otherwise
94 * leave the filesystem running. We know any error is
95 * local to this block.
96 */
97 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
98 if (rc) {
99 mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
100 (unsigned long long)bh->b_blocknr);
101 return rc;
102 }
103
104
105 if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
106 ocfs2_error(sb,
107 "Refcount block #%llu has bad signature %.*s",
108 (unsigned long long)bh->b_blocknr, 7,
109 rb->rf_signature);
110 return -EINVAL;
111 }
112
113 if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
114 ocfs2_error(sb,
115 "Refcount block #%llu has an invalid rf_blkno "
116 "of %llu",
117 (unsigned long long)bh->b_blocknr,
118 (unsigned long long)le64_to_cpu(rb->rf_blkno));
119 return -EINVAL;
120 }
121
122 if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
123 ocfs2_error(sb,
124 "Refcount block #%llu has an invalid "
125 "rf_fs_generation of #%u",
126 (unsigned long long)bh->b_blocknr,
127 le32_to_cpu(rb->rf_fs_generation));
128 return -EINVAL;
129 }
130
131 return 0;
132}
133
134static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
135 u64 rb_blkno,
136 struct buffer_head **bh)
137{
138 int rc;
139 struct buffer_head *tmp = *bh;
140
141 rc = ocfs2_read_block(ci, rb_blkno, &tmp,
142 ocfs2_validate_refcount_block);
143
144 /* If ocfs2_read_block() got us a new bh, pass it up. */
145 if (!rc && !*bh)
146 *bh = tmp;
147
148 return rc;
149}
150
151static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
152{
153 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
154
155 return rf->rf_blkno;
156}
157
158static struct super_block *
159ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
160{
161 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
162
163 return rf->rf_sb;
164}
165
166static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
167{
168 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
169
170 spin_lock(&rf->rf_lock);
171}
172
173static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
174{
175 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
176
177 spin_unlock(&rf->rf_lock);
178}
179
180static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
181{
182 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
183
184 mutex_lock(&rf->rf_io_mutex);
185}
186
187static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
188{
189 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
190
191 mutex_unlock(&rf->rf_io_mutex);
192}
193
194static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
195 .co_owner = ocfs2_refcount_cache_owner,
196 .co_get_super = ocfs2_refcount_cache_get_super,
197 .co_cache_lock = ocfs2_refcount_cache_lock,
198 .co_cache_unlock = ocfs2_refcount_cache_unlock,
199 .co_io_lock = ocfs2_refcount_cache_io_lock,
200 .co_io_unlock = ocfs2_refcount_cache_io_unlock,
201};
202
203static struct ocfs2_refcount_tree *
204ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
205{
206 struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
207 struct ocfs2_refcount_tree *tree = NULL;
208
209 while (n) {
210 tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
211
212 if (blkno < tree->rf_blkno)
213 n = n->rb_left;
214 else if (blkno > tree->rf_blkno)
215 n = n->rb_right;
216 else
217 return tree;
218 }
219
220 return NULL;
221}
222
223/* osb_lock is already locked. */
224static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
225 struct ocfs2_refcount_tree *new)
226{
227 u64 rf_blkno = new->rf_blkno;
228 struct rb_node *parent = NULL;
229 struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
230 struct ocfs2_refcount_tree *tmp;
231
232 while (*p) {
233 parent = *p;
234
235 tmp = rb_entry(parent, struct ocfs2_refcount_tree,
236 rf_node);
237
238 if (rf_blkno < tmp->rf_blkno)
239 p = &(*p)->rb_left;
240 else if (rf_blkno > tmp->rf_blkno)
241 p = &(*p)->rb_right;
242 else {
243 /* This should never happen! */
244 mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
245 (unsigned long long)rf_blkno);
246 BUG();
247 }
248 }
249
250 rb_link_node(&new->rf_node, parent, p);
251 rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
252}
253
254static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
255{
256 ocfs2_metadata_cache_exit(&tree->rf_ci);
257 ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
258 ocfs2_lock_res_free(&tree->rf_lockres);
259 kfree(tree);
260}
261
262static inline void
263ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
264 struct ocfs2_refcount_tree *tree)
265{
266 rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
267 if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
268 osb->osb_ref_tree_lru = NULL;
269}
270
271static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
272 struct ocfs2_refcount_tree *tree)
273{
274 spin_lock(&osb->osb_lock);
275 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
276 spin_unlock(&osb->osb_lock);
277}
278
279void ocfs2_kref_remove_refcount_tree(struct kref *kref)
280{
281 struct ocfs2_refcount_tree *tree =
282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
283
284 ocfs2_free_refcount_tree(tree);
285}
286
287static inline void
288ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
289{
290 kref_get(&tree->rf_getcnt);
291}
292
293static inline void
294ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
295{
296 kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
297}
298
299static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
300 struct super_block *sb)
301{
302 ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
303 mutex_init(&new->rf_io_mutex);
304 new->rf_sb = sb;
305 spin_lock_init(&new->rf_lock);
306}
307
308static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
309 struct ocfs2_refcount_tree *new,
310 u64 rf_blkno, u32 generation)
311{
312 init_rwsem(&new->rf_sem);
313 ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
314 rf_blkno, generation);
315}
316
317static struct ocfs2_refcount_tree*
318ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
319{
320 struct ocfs2_refcount_tree *new;
321
322 new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
323 if (!new)
324 return NULL;
325
326 new->rf_blkno = rf_blkno;
327 kref_init(&new->rf_getcnt);
328 ocfs2_init_refcount_tree_ci(new, osb->sb);
329
330 return new;
331}
332
333static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
334 struct ocfs2_refcount_tree **ret_tree)
335{
336 int ret = 0;
337 struct ocfs2_refcount_tree *tree, *new = NULL;
338 struct buffer_head *ref_root_bh = NULL;
339 struct ocfs2_refcount_block *ref_rb;
340
341 spin_lock(&osb->osb_lock);
342 if (osb->osb_ref_tree_lru &&
343 osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
344 tree = osb->osb_ref_tree_lru;
345 else
346 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
347 if (tree)
348 goto out;
349
350 spin_unlock(&osb->osb_lock);
351
352 new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
353 if (!new) {
354 ret = -ENOMEM;
355 mlog_errno(ret);
356 return ret;
357 }
358 /*
359 * We need the generation to create the refcount tree lock and since
360 * it isn't changed during the tree modification, we are safe here to
361 * read without protection.
362 * We also have to purge the cache after we create the lock since the
363 * refcount block may have the stale data. It can only be trusted when
364 * we hold the refcount lock.
365 */
366 ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
367 if (ret) {
368 mlog_errno(ret);
369 ocfs2_metadata_cache_exit(&new->rf_ci);
370 kfree(new);
371 return ret;
372 }
373
374 ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
375 new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
376 ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
377 new->rf_generation);
378 ocfs2_metadata_cache_purge(&new->rf_ci);
379
380 spin_lock(&osb->osb_lock);
381 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
382 if (tree)
383 goto out;
384
385 ocfs2_insert_refcount_tree(osb, new);
386
387 tree = new;
388 new = NULL;
389
390out:
391 *ret_tree = tree;
392
393 osb->osb_ref_tree_lru = tree;
394
395 spin_unlock(&osb->osb_lock);
396
397 if (new)
398 ocfs2_free_refcount_tree(new);
399
400 brelse(ref_root_bh);
401 return ret;
402}
403
404static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
405{
406 int ret;
407 struct buffer_head *di_bh = NULL;
408 struct ocfs2_dinode *di;
409
410 ret = ocfs2_read_inode_block(inode, &di_bh);
411 if (ret) {
412 mlog_errno(ret);
413 goto out;
414 }
415
416 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
417
418 di = (struct ocfs2_dinode *)di_bh->b_data;
419 *ref_blkno = le64_to_cpu(di->i_refcount_loc);
420 brelse(di_bh);
421out:
422 return ret;
423}
424
425static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
426 struct ocfs2_refcount_tree *tree, int rw)
427{
428 int ret;
429
430 ret = ocfs2_refcount_lock(tree, rw);
431 if (ret) {
432 mlog_errno(ret);
433 goto out;
434 }
435
436 if (rw)
437 down_write(&tree->rf_sem);
438 else
439 down_read(&tree->rf_sem);
440
441out:
442 return ret;
443}
444
445/*
446 * Lock the refcount tree pointed by ref_blkno and return the tree.
447 * In most case, we lock the tree and read the refcount block.
448 * So read it here if the caller really needs it.
449 *
450 * If the tree has been re-created by other node, it will free the
451 * old one and re-create it.
452 */
453int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
454 u64 ref_blkno, int rw,
455 struct ocfs2_refcount_tree **ret_tree,
456 struct buffer_head **ref_bh)
457{
458 int ret, delete_tree = 0;
459 struct ocfs2_refcount_tree *tree = NULL;
460 struct buffer_head *ref_root_bh = NULL;
461 struct ocfs2_refcount_block *rb;
462
463again:
464 ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
465 if (ret) {
466 mlog_errno(ret);
467 return ret;
468 }
469
470 ocfs2_refcount_tree_get(tree);
471
472 ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
473 if (ret) {
474 mlog_errno(ret);
475 ocfs2_refcount_tree_put(tree);
476 goto out;
477 }
478
479 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
480 &ref_root_bh);
481 if (ret) {
482 mlog_errno(ret);
483 ocfs2_unlock_refcount_tree(osb, tree, rw);
484 ocfs2_refcount_tree_put(tree);
485 goto out;
486 }
487
488 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
489 /*
490 * If the refcount block has been freed and re-created, we may need
491 * to recreate the refcount tree also.
492 *
493 * Here we just remove the tree from the rb-tree, and the last
494 * kref holder will unlock and delete this refcount_tree.
495 * Then we goto "again" and ocfs2_get_refcount_tree will create
496 * the new refcount tree for us.
497 */
498 if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
499 if (!tree->rf_removed) {
500 ocfs2_erase_refcount_tree_from_list(osb, tree);
501 tree->rf_removed = 1;
502 delete_tree = 1;
503 }
504
505 ocfs2_unlock_refcount_tree(osb, tree, rw);
506 /*
507 * We get an extra reference when we create the refcount
508 * tree, so another put will destroy it.
509 */
510 if (delete_tree)
511 ocfs2_refcount_tree_put(tree);
512 brelse(ref_root_bh);
513 ref_root_bh = NULL;
514 goto again;
515 }
516
517 *ret_tree = tree;
518 if (ref_bh) {
519 *ref_bh = ref_root_bh;
520 ref_root_bh = NULL;
521 }
522out:
523 brelse(ref_root_bh);
524 return ret;
525}
526
527int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
528 struct ocfs2_refcount_tree **ret_tree,
529 struct buffer_head **ref_bh)
530{
531 int ret;
532 u64 ref_blkno;
533
534 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
535 if (ret) {
536 mlog_errno(ret);
537 return ret;
538 }
539
540 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
541 rw, ret_tree, ref_bh);
542}
543
544void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
545 struct ocfs2_refcount_tree *tree, int rw)
546{
547 if (rw)
548 up_write(&tree->rf_sem);
549 else
550 up_read(&tree->rf_sem);
551
552 ocfs2_refcount_unlock(tree, rw);
553 ocfs2_refcount_tree_put(tree);
554}
555
556void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
557{
558 struct rb_node *node;
559 struct ocfs2_refcount_tree *tree;
560 struct rb_root *root = &osb->osb_rf_lock_tree;
561
562 while ((node = rb_last(root)) != NULL) {
563 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
564
565 mlog(0, "Purge tree %llu\n",
566 (unsigned long long) tree->rf_blkno);
567
568 rb_erase(&tree->rf_node, root);
569 ocfs2_free_refcount_tree(tree);
570 }
571}
572
573/*
574 * Create a refcount tree for an inode.
575 * We take for granted that the inode is already locked.
576 */
577static int ocfs2_create_refcount_tree(struct inode *inode,
578 struct buffer_head *di_bh)
579{
580 int ret;
581 handle_t *handle = NULL;
582 struct ocfs2_alloc_context *meta_ac = NULL;
583 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
584 struct ocfs2_inode_info *oi = OCFS2_I(inode);
585 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
586 struct buffer_head *new_bh = NULL;
587 struct ocfs2_refcount_block *rb;
588 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
589 u16 suballoc_bit_start;
590 u32 num_got;
591 u64 first_blkno;
592
593 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
594
595 mlog(0, "create tree for inode %lu\n", inode->i_ino);
596
597 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
598 if (ret) {
599 mlog_errno(ret);
600 goto out;
601 }
602
603 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
604 if (IS_ERR(handle)) {
605 ret = PTR_ERR(handle);
606 mlog_errno(ret);
607 goto out;
608 }
609
610 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
611 OCFS2_JOURNAL_ACCESS_WRITE);
612 if (ret) {
613 mlog_errno(ret);
614 goto out_commit;
615 }
616
617 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
618 &suballoc_bit_start, &num_got,
619 &first_blkno);
620 if (ret) {
621 mlog_errno(ret);
622 goto out_commit;
623 }
624
625 new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
626 if (!new_tree) {
627 ret = -ENOMEM;
628 mlog_errno(ret);
629 goto out_commit;
630 }
631
632 new_bh = sb_getblk(inode->i_sb, first_blkno);
633 ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
634
635 ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
636 OCFS2_JOURNAL_ACCESS_CREATE);
637 if (ret) {
638 mlog_errno(ret);
639 goto out_commit;
640 }
641
642 /* Initialize ocfs2_refcount_block. */
643 rb = (struct ocfs2_refcount_block *)new_bh->b_data;
644 memset(rb, 0, inode->i_sb->s_blocksize);
645 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
646 rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
647 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
648 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
649 rb->rf_blkno = cpu_to_le64(first_blkno);
650 rb->rf_count = cpu_to_le32(1);
651 rb->rf_records.rl_count =
652 cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
653 spin_lock(&osb->osb_lock);
654 rb->rf_generation = osb->s_next_generation++;
655 spin_unlock(&osb->osb_lock);
656
657 ocfs2_journal_dirty(handle, new_bh);
658
659 spin_lock(&oi->ip_lock);
660 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
661 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
662 di->i_refcount_loc = cpu_to_le64(first_blkno);
663 spin_unlock(&oi->ip_lock);
664
665 mlog(0, "created tree for inode %lu, refblock %llu\n",
666 inode->i_ino, (unsigned long long)first_blkno);
667
668 ocfs2_journal_dirty(handle, di_bh);
669
670 /*
671 * We have to init the tree lock here since it will use
672 * the generation number to create it.
673 */
674 new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
675 ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
676 new_tree->rf_generation);
677
678 spin_lock(&osb->osb_lock);
679 tree = ocfs2_find_refcount_tree(osb, first_blkno);
680
681 /*
682 * We've just created a new refcount tree in this block. If
683 * we found a refcount tree on the ocfs2_super, it must be
684 * one we just deleted. We free the old tree before
685 * inserting the new tree.
686 */
687 BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
688 if (tree)
689 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
690 ocfs2_insert_refcount_tree(osb, new_tree);
691 spin_unlock(&osb->osb_lock);
692 new_tree = NULL;
693 if (tree)
694 ocfs2_refcount_tree_put(tree);
695
696out_commit:
697 ocfs2_commit_trans(osb, handle);
698
699out:
700 if (new_tree) {
701 ocfs2_metadata_cache_exit(&new_tree->rf_ci);
702 kfree(new_tree);
703 }
704
705 brelse(new_bh);
706 if (meta_ac)
707 ocfs2_free_alloc_context(meta_ac);
708
709 return ret;
710}
711
712static int ocfs2_set_refcount_tree(struct inode *inode,
713 struct buffer_head *di_bh,
714 u64 refcount_loc)
715{
716 int ret;
717 handle_t *handle = NULL;
718 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
719 struct ocfs2_inode_info *oi = OCFS2_I(inode);
720 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
721 struct buffer_head *ref_root_bh = NULL;
722 struct ocfs2_refcount_block *rb;
723 struct ocfs2_refcount_tree *ref_tree;
724
725 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
726
727 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
728 &ref_tree, &ref_root_bh);
729 if (ret) {
730 mlog_errno(ret);
731 return ret;
732 }
733
734 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
735 if (IS_ERR(handle)) {
736 ret = PTR_ERR(handle);
737 mlog_errno(ret);
738 goto out;
739 }
740
741 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
742 OCFS2_JOURNAL_ACCESS_WRITE);
743 if (ret) {
744 mlog_errno(ret);
745 goto out_commit;
746 }
747
748 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
749 OCFS2_JOURNAL_ACCESS_WRITE);
750 if (ret) {
751 mlog_errno(ret);
752 goto out_commit;
753 }
754
755 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
756 le32_add_cpu(&rb->rf_count, 1);
757
758 ocfs2_journal_dirty(handle, ref_root_bh);
759
760 spin_lock(&oi->ip_lock);
761 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
762 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
763 di->i_refcount_loc = cpu_to_le64(refcount_loc);
764 spin_unlock(&oi->ip_lock);
765 ocfs2_journal_dirty(handle, di_bh);
766
767out_commit:
768 ocfs2_commit_trans(osb, handle);
769out:
770 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
771 brelse(ref_root_bh);
772
773 return ret;
774}
775
776int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
777{
778 int ret, delete_tree = 0;
779 handle_t *handle = NULL;
780 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
781 struct ocfs2_inode_info *oi = OCFS2_I(inode);
782 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
783 struct ocfs2_refcount_block *rb;
784 struct inode *alloc_inode = NULL;
785 struct buffer_head *alloc_bh = NULL;
786 struct buffer_head *blk_bh = NULL;
787 struct ocfs2_refcount_tree *ref_tree;
788 int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
789 u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
790 u16 bit = 0;
791
792 if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
793 return 0;
794
795 BUG_ON(!ref_blkno);
796 ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
797 if (ret) {
798 mlog_errno(ret);
799 return ret;
800 }
801
802 rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
803
804 /*
805 * If we are the last user, we need to free the block.
806 * So lock the allocator ahead.
807 */
808 if (le32_to_cpu(rb->rf_count) == 1) {
809 blk = le64_to_cpu(rb->rf_blkno);
810 bit = le16_to_cpu(rb->rf_suballoc_bit);
811 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
812
813 alloc_inode = ocfs2_get_system_file_inode(osb,
814 EXTENT_ALLOC_SYSTEM_INODE,
815 le16_to_cpu(rb->rf_suballoc_slot));
816 if (!alloc_inode) {
817 ret = -ENOMEM;
818 mlog_errno(ret);
819 goto out;
820 }
821 mutex_lock(&alloc_inode->i_mutex);
822
823 ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
824 if (ret) {
825 mlog_errno(ret);
826 goto out_mutex;
827 }
828
829 credits += OCFS2_SUBALLOC_FREE;
830 }
831
832 handle = ocfs2_start_trans(osb, credits);
833 if (IS_ERR(handle)) {
834 ret = PTR_ERR(handle);
835 mlog_errno(ret);
836 goto out_unlock;
837 }
838
839 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
840 OCFS2_JOURNAL_ACCESS_WRITE);
841 if (ret) {
842 mlog_errno(ret);
843 goto out_commit;
844 }
845
846 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
847 OCFS2_JOURNAL_ACCESS_WRITE);
848 if (ret) {
849 mlog_errno(ret);
850 goto out_commit;
851 }
852
853 spin_lock(&oi->ip_lock);
854 oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
855 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
856 di->i_refcount_loc = 0;
857 spin_unlock(&oi->ip_lock);
858 ocfs2_journal_dirty(handle, di_bh);
859
860 le32_add_cpu(&rb->rf_count , -1);
861 ocfs2_journal_dirty(handle, blk_bh);
862
863 if (!rb->rf_count) {
864 delete_tree = 1;
865 ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
866 ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
867 alloc_bh, bit, bg_blkno, 1);
868 if (ret)
869 mlog_errno(ret);
870 }
871
872out_commit:
873 ocfs2_commit_trans(osb, handle);
874out_unlock:
875 if (alloc_inode) {
876 ocfs2_inode_unlock(alloc_inode, 1);
877 brelse(alloc_bh);
878 }
879out_mutex:
880 if (alloc_inode) {
881 mutex_unlock(&alloc_inode->i_mutex);
882 iput(alloc_inode);
883 }
884out:
885 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
886 if (delete_tree)
887 ocfs2_refcount_tree_put(ref_tree);
888 brelse(blk_bh);
889
890 return ret;
891}
892
893static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci,
894 struct buffer_head *ref_leaf_bh,
895 u64 cpos, unsigned int len,
896 struct ocfs2_refcount_rec *ret_rec,
897 int *index)
898{
899 int i = 0;
900 struct ocfs2_refcount_block *rb =
901 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
902 struct ocfs2_refcount_rec *rec = NULL;
903
904 for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) {
905 rec = &rb->rf_records.rl_recs[i];
906
907 if (le64_to_cpu(rec->r_cpos) +
908 le32_to_cpu(rec->r_clusters) <= cpos)
909 continue;
910 else if (le64_to_cpu(rec->r_cpos) > cpos)
911 break;
912
913 /* ok, cpos fail in this rec. Just return. */
914 if (ret_rec)
915 *ret_rec = *rec;
916 goto out;
917 }
918
919 if (ret_rec) {
920 /* We meet with a hole here, so fake the rec. */
921 ret_rec->r_cpos = cpu_to_le64(cpos);
922 ret_rec->r_refcount = 0;
923 if (i < le16_to_cpu(rb->rf_records.rl_used) &&
924 le64_to_cpu(rec->r_cpos) < cpos + len)
925 ret_rec->r_clusters =
926 cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos);
927 else
928 ret_rec->r_clusters = cpu_to_le32(len);
929 }
930
931out:
932 *index = i;
933}
934
935/*
936 * Try to remove refcount tree. The mechanism is:
937 * 1) Check whether i_clusters == 0, if no, exit.
938 * 2) check whether we have i_xattr_loc in dinode. if yes, exit.
939 * 3) Check whether we have inline xattr stored outside, if yes, exit.
940 * 4) Remove the tree.
941 */
942int ocfs2_try_remove_refcount_tree(struct inode *inode,
943 struct buffer_head *di_bh)
944{
945 int ret;
946 struct ocfs2_inode_info *oi = OCFS2_I(inode);
947 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
948
949 down_write(&oi->ip_xattr_sem);
950 down_write(&oi->ip_alloc_sem);
951
952 if (oi->ip_clusters)
953 goto out;
954
955 if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc)
956 goto out;
957
958 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL &&
959 ocfs2_has_inline_xattr_value_outside(inode, di))
960 goto out;
961
962 ret = ocfs2_remove_refcount_tree(inode, di_bh);
963 if (ret)
964 mlog_errno(ret);
965out:
966 up_write(&oi->ip_alloc_sem);
967 up_write(&oi->ip_xattr_sem);
968 return 0;
969}
970
971/*
972 * Given a cpos and len, try to find the refcount record which contains cpos.
973 * 1. If cpos can be found in one refcount record, return the record.
974 * 2. If cpos can't be found, return a fake record which start from cpos
975 * and end at a small value between cpos+len and start of the next record.
976 * This fake record has r_refcount = 0.
977 */
978static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
979 struct buffer_head *ref_root_bh,
980 u64 cpos, unsigned int len,
981 struct ocfs2_refcount_rec *ret_rec,
982 int *index,
983 struct buffer_head **ret_bh)
984{
985 int ret = 0, i, found;
986 u32 low_cpos;
987 struct ocfs2_extent_list *el;
988 struct ocfs2_extent_rec *tmp, *rec = NULL;
989 struct ocfs2_extent_block *eb;
990 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
991 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
992 struct ocfs2_refcount_block *rb =
993 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
994
995 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) {
996 ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len,
997 ret_rec, index);
998 *ret_bh = ref_root_bh;
999 get_bh(ref_root_bh);
1000 return 0;
1001 }
1002
1003 el = &rb->rf_list;
1004 low_cpos = cpos & OCFS2_32BIT_POS_MASK;
1005
1006 if (el->l_tree_depth) {
1007 ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh);
1008 if (ret) {
1009 mlog_errno(ret);
1010 goto out;
1011 }
1012
1013 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
1014 el = &eb->h_list;
1015
1016 if (el->l_tree_depth) {
1017 ocfs2_error(sb,
1018 "refcount tree %llu has non zero tree "
1019 "depth in leaf btree tree block %llu\n",
1020 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1021 (unsigned long long)eb_bh->b_blocknr);
1022 ret = -EROFS;
1023 goto out;
1024 }
1025 }
1026
1027 found = 0;
1028 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1029 rec = &el->l_recs[i];
1030
1031 if (le32_to_cpu(rec->e_cpos) <= low_cpos) {
1032 found = 1;
1033 break;
1034 }
1035 }
1036
1037 /* adjust len when we have ocfs2_extent_rec after it. */
1038 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
1039 tmp = &el->l_recs[i+1];
1040
1041 if (le32_to_cpu(tmp->e_cpos) < cpos + len)
1042 len = le32_to_cpu(tmp->e_cpos) - cpos;
1043 }
1044
1045 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
1046 &ref_leaf_bh);
1047 if (ret) {
1048 mlog_errno(ret);
1049 goto out;
1050 }
1051
1052 ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len,
1053 ret_rec, index);
1054 *ret_bh = ref_leaf_bh;
1055out:
1056 brelse(eb_bh);
1057 return ret;
1058}
1059
1060enum ocfs2_ref_rec_contig {
1061 REF_CONTIG_NONE = 0,
1062 REF_CONTIG_LEFT,
1063 REF_CONTIG_RIGHT,
1064 REF_CONTIG_LEFTRIGHT,
1065};
1066
1067static enum ocfs2_ref_rec_contig
1068 ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb,
1069 int index)
1070{
1071 if ((rb->rf_records.rl_recs[index].r_refcount ==
1072 rb->rf_records.rl_recs[index + 1].r_refcount) &&
1073 (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) +
1074 le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) ==
1075 le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos)))
1076 return REF_CONTIG_RIGHT;
1077
1078 return REF_CONTIG_NONE;
1079}
1080
1081static enum ocfs2_ref_rec_contig
1082 ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb,
1083 int index)
1084{
1085 enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE;
1086
1087 if (index < le16_to_cpu(rb->rf_records.rl_used) - 1)
1088 ret = ocfs2_refcount_rec_adjacent(rb, index);
1089
1090 if (index > 0) {
1091 enum ocfs2_ref_rec_contig tmp;
1092
1093 tmp = ocfs2_refcount_rec_adjacent(rb, index - 1);
1094
1095 if (tmp == REF_CONTIG_RIGHT) {
1096 if (ret == REF_CONTIG_RIGHT)
1097 ret = REF_CONTIG_LEFTRIGHT;
1098 else
1099 ret = REF_CONTIG_LEFT;
1100 }
1101 }
1102
1103 return ret;
1104}
1105
1106static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb,
1107 int index)
1108{
1109 BUG_ON(rb->rf_records.rl_recs[index].r_refcount !=
1110 rb->rf_records.rl_recs[index+1].r_refcount);
1111
1112 le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters,
1113 le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters));
1114
1115 if (index < le16_to_cpu(rb->rf_records.rl_used) - 2)
1116 memmove(&rb->rf_records.rl_recs[index + 1],
1117 &rb->rf_records.rl_recs[index + 2],
1118 sizeof(struct ocfs2_refcount_rec) *
1119 (le16_to_cpu(rb->rf_records.rl_used) - index - 2));
1120
1121 memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1],
1122 0, sizeof(struct ocfs2_refcount_rec));
1123 le16_add_cpu(&rb->rf_records.rl_used, -1);
1124}
1125
1126/*
1127 * Merge the refcount rec if we are contiguous with the adjacent recs.
1128 */
1129static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
1130 int index)
1131{
1132 enum ocfs2_ref_rec_contig contig =
1133 ocfs2_refcount_rec_contig(rb, index);
1134
1135 if (contig == REF_CONTIG_NONE)
1136 return;
1137
1138 if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) {
1139 BUG_ON(index == 0);
1140 index--;
1141 }
1142
1143 ocfs2_rotate_refcount_rec_left(rb, index);
1144
1145 if (contig == REF_CONTIG_LEFTRIGHT)
1146 ocfs2_rotate_refcount_rec_left(rb, index);
1147}
1148
1149/*
1150 * Change the refcount indexed by "index" in ref_bh.
1151 * If refcount reaches 0, remove it.
1152 */
1153static int ocfs2_change_refcount_rec(handle_t *handle,
1154 struct ocfs2_caching_info *ci,
1155 struct buffer_head *ref_leaf_bh,
1156 int index, int merge, int change)
1157{
1158 int ret;
1159 struct ocfs2_refcount_block *rb =
1160 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1161 struct ocfs2_refcount_list *rl = &rb->rf_records;
1162 struct ocfs2_refcount_rec *rec = &rl->rl_recs[index];
1163
1164 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1165 OCFS2_JOURNAL_ACCESS_WRITE);
1166 if (ret) {
1167 mlog_errno(ret);
1168 goto out;
1169 }
1170
1171 mlog(0, "change index %d, old count %u, change %d\n", index,
1172 le32_to_cpu(rec->r_refcount), change);
1173 le32_add_cpu(&rec->r_refcount, change);
1174
1175 if (!rec->r_refcount) {
1176 if (index != le16_to_cpu(rl->rl_used) - 1) {
1177 memmove(rec, rec + 1,
1178 (le16_to_cpu(rl->rl_used) - index - 1) *
1179 sizeof(struct ocfs2_refcount_rec));
1180 memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1],
1181 0, sizeof(struct ocfs2_refcount_rec));
1182 }
1183
1184 le16_add_cpu(&rl->rl_used, -1);
1185 } else if (merge)
1186 ocfs2_refcount_rec_merge(rb, index);
1187
1188 ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
1189 if (ret)
1190 mlog_errno(ret);
1191out:
1192 return ret;
1193}
1194
1195static int ocfs2_expand_inline_ref_root(handle_t *handle,
1196 struct ocfs2_caching_info *ci,
1197 struct buffer_head *ref_root_bh,
1198 struct buffer_head **ref_leaf_bh,
1199 struct ocfs2_alloc_context *meta_ac)
1200{
1201 int ret;
1202 u16 suballoc_bit_start;
1203 u32 num_got;
1204 u64 blkno;
1205 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1206 struct buffer_head *new_bh = NULL;
1207 struct ocfs2_refcount_block *new_rb;
1208 struct ocfs2_refcount_block *root_rb =
1209 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
1210
1211 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
1212 OCFS2_JOURNAL_ACCESS_WRITE);
1213 if (ret) {
1214 mlog_errno(ret);
1215 goto out;
1216 }
1217
1218 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
1219 &suballoc_bit_start, &num_got,
1220 &blkno);
1221 if (ret) {
1222 mlog_errno(ret);
1223 goto out;
1224 }
1225
1226 new_bh = sb_getblk(sb, blkno);
1227 if (new_bh == NULL) {
1228 ret = -EIO;
1229 mlog_errno(ret);
1230 goto out;
1231 }
1232 ocfs2_set_new_buffer_uptodate(ci, new_bh);
1233
1234 ret = ocfs2_journal_access_rb(handle, ci, new_bh,
1235 OCFS2_JOURNAL_ACCESS_CREATE);
1236 if (ret) {
1237 mlog_errno(ret);
1238 goto out;
1239 }
1240
1241 /*
1242 * Initialize ocfs2_refcount_block.
1243 * It should contain the same information as the old root.
1244 * so just memcpy it and change the corresponding field.
1245 */
1246 memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
1247
1248 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
1249 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
1250 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1251 new_rb->rf_blkno = cpu_to_le64(blkno);
1252 new_rb->rf_cpos = cpu_to_le32(0);
1253 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
1254 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
1255 ocfs2_journal_dirty(handle, new_bh);
1256
1257 /* Now change the root. */
1258 memset(&root_rb->rf_list, 0, sb->s_blocksize -
1259 offsetof(struct ocfs2_refcount_block, rf_list));
1260 root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb));
1261 root_rb->rf_clusters = cpu_to_le32(1);
1262 root_rb->rf_list.l_next_free_rec = cpu_to_le16(1);
1263 root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
1264 root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
1265 root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL);
1266
1267 ocfs2_journal_dirty(handle, ref_root_bh);
1268
1269 mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno,
1270 le16_to_cpu(new_rb->rf_records.rl_used));
1271
1272 *ref_leaf_bh = new_bh;
1273 new_bh = NULL;
1274out:
1275 brelse(new_bh);
1276 return ret;
1277}
1278
1279static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev,
1280 struct ocfs2_refcount_rec *next)
1281{
1282 if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <=
1283 ocfs2_get_ref_rec_low_cpos(next))
1284 return 1;
1285
1286 return 0;
1287}
1288
1289static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b)
1290{
1291 const struct ocfs2_refcount_rec *l = a, *r = b;
1292 u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l);
1293 u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r);
1294
1295 if (l_cpos > r_cpos)
1296 return 1;
1297 if (l_cpos < r_cpos)
1298 return -1;
1299 return 0;
1300}
1301
1302static int cmp_refcount_rec_by_cpos(const void *a, const void *b)
1303{
1304 const struct ocfs2_refcount_rec *l = a, *r = b;
1305 u64 l_cpos = le64_to_cpu(l->r_cpos);
1306 u64 r_cpos = le64_to_cpu(r->r_cpos);
1307
1308 if (l_cpos > r_cpos)
1309 return 1;
1310 if (l_cpos < r_cpos)
1311 return -1;
1312 return 0;
1313}
1314
1315static void swap_refcount_rec(void *a, void *b, int size)
1316{
1317 struct ocfs2_refcount_rec *l = a, *r = b, tmp;
1318
1319 tmp = *(struct ocfs2_refcount_rec *)l;
1320 *(struct ocfs2_refcount_rec *)l =
1321 *(struct ocfs2_refcount_rec *)r;
1322 *(struct ocfs2_refcount_rec *)r = tmp;
1323}
1324
1325/*
1326 * The refcount cpos are ordered by their 64bit cpos,
1327 * But we will use the low 32 bit to be the e_cpos in the b-tree.
1328 * So we need to make sure that this pos isn't intersected with others.
1329 *
1330 * Note: The refcount block is already sorted by their low 32 bit cpos,
1331 * So just try the middle pos first, and we will exit when we find
1332 * the good position.
1333 */
1334static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl,
1335 u32 *split_pos, int *split_index)
1336{
1337 int num_used = le16_to_cpu(rl->rl_used);
1338 int delta, middle = num_used / 2;
1339
1340 for (delta = 0; delta < middle; delta++) {
1341 /* Let's check delta earlier than middle */
1342 if (ocfs2_refcount_rec_no_intersect(
1343 &rl->rl_recs[middle - delta - 1],
1344 &rl->rl_recs[middle - delta])) {
1345 *split_index = middle - delta;
1346 break;
1347 }
1348
1349 /* For even counts, don't walk off the end */
1350 if ((middle + delta + 1) == num_used)
1351 continue;
1352
1353 /* Now try delta past middle */
1354 if (ocfs2_refcount_rec_no_intersect(
1355 &rl->rl_recs[middle + delta],
1356 &rl->rl_recs[middle + delta + 1])) {
1357 *split_index = middle + delta + 1;
1358 break;
1359 }
1360 }
1361
1362 if (delta >= middle)
1363 return -ENOSPC;
1364
1365 *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]);
1366 return 0;
1367}
1368
1369static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1370 struct buffer_head *new_bh,
1371 u32 *split_cpos)
1372{
1373 int split_index = 0, num_moved, ret;
1374 u32 cpos = 0;
1375 struct ocfs2_refcount_block *rb =
1376 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1377 struct ocfs2_refcount_list *rl = &rb->rf_records;
1378 struct ocfs2_refcount_block *new_rb =
1379 (struct ocfs2_refcount_block *)new_bh->b_data;
1380 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
1381
1382 mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n",
1383 (unsigned long long)ref_leaf_bh->b_blocknr,
1384 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
1385
1386 /*
1387 * XXX: Improvement later.
1388 * If we know all the high 32 bit cpos is the same, no need to sort.
1389 *
1390 * In order to make the whole process safe, we do:
1391 * 1. sort the entries by their low 32 bit cpos first so that we can
1392 * find the split cpos easily.
1393 * 2. call ocfs2_insert_extent to insert the new refcount block.
1394 * 3. move the refcount rec to the new block.
1395 * 4. sort the entries by their 64 bit cpos.
1396 * 5. dirty the new_rb and rb.
1397 */
1398 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
1399 sizeof(struct ocfs2_refcount_rec),
1400 cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
1401
1402 ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
1403 if (ret) {
1404 mlog_errno(ret);
1405 return ret;
1406 }
1407
1408 new_rb->rf_cpos = cpu_to_le32(cpos);
1409
1410 /* move refcount records starting from split_index to the new block. */
1411 num_moved = le16_to_cpu(rl->rl_used) - split_index;
1412 memcpy(new_rl->rl_recs, &rl->rl_recs[split_index],
1413 num_moved * sizeof(struct ocfs2_refcount_rec));
1414
1415 /*ok, remove the entries we just moved over to the other block. */
1416 memset(&rl->rl_recs[split_index], 0,
1417 num_moved * sizeof(struct ocfs2_refcount_rec));
1418
1419 /* change old and new rl_used accordingly. */
1420 le16_add_cpu(&rl->rl_used, -num_moved);
1421 new_rl->rl_used = cpu_to_le32(num_moved);
1422
1423 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
1424 sizeof(struct ocfs2_refcount_rec),
1425 cmp_refcount_rec_by_cpos, swap_refcount_rec);
1426
1427 sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
1428 sizeof(struct ocfs2_refcount_rec),
1429 cmp_refcount_rec_by_cpos, swap_refcount_rec);
1430
1431 *split_cpos = cpos;
1432 return 0;
1433}
1434
1435static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1436 struct ocfs2_caching_info *ci,
1437 struct buffer_head *ref_root_bh,
1438 struct buffer_head *ref_leaf_bh,
1439 struct ocfs2_alloc_context *meta_ac)
1440{
1441 int ret;
1442 u16 suballoc_bit_start;
1443 u32 num_got, new_cpos;
1444 u64 blkno;
1445 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1446 struct ocfs2_refcount_block *root_rb =
1447 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
1448 struct buffer_head *new_bh = NULL;
1449 struct ocfs2_refcount_block *new_rb;
1450 struct ocfs2_extent_tree ref_et;
1451
1452 BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL));
1453
1454 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
1455 OCFS2_JOURNAL_ACCESS_WRITE);
1456 if (ret) {
1457 mlog_errno(ret);
1458 goto out;
1459 }
1460
1461 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1462 OCFS2_JOURNAL_ACCESS_WRITE);
1463 if (ret) {
1464 mlog_errno(ret);
1465 goto out;
1466 }
1467
1468 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
1469 &suballoc_bit_start, &num_got,
1470 &blkno);
1471 if (ret) {
1472 mlog_errno(ret);
1473 goto out;
1474 }
1475
1476 new_bh = sb_getblk(sb, blkno);
1477 if (new_bh == NULL) {
1478 ret = -EIO;
1479 mlog_errno(ret);
1480 goto out;
1481 }
1482 ocfs2_set_new_buffer_uptodate(ci, new_bh);
1483
1484 ret = ocfs2_journal_access_rb(handle, ci, new_bh,
1485 OCFS2_JOURNAL_ACCESS_CREATE);
1486 if (ret) {
1487 mlog_errno(ret);
1488 goto out;
1489 }
1490
1491 /* Initialize ocfs2_refcount_block. */
1492 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
1493 memset(new_rb, 0, sb->s_blocksize);
1494 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
1495 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
1496 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1497 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
1498 new_rb->rf_blkno = cpu_to_le64(blkno);
1499 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
1500 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
1501 new_rb->rf_records.rl_count =
1502 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
1503 new_rb->rf_generation = root_rb->rf_generation;
1504
1505 ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos);
1506 if (ret) {
1507 mlog_errno(ret);
1508 goto out;
1509 }
1510
1511 ocfs2_journal_dirty(handle, ref_leaf_bh);
1512 ocfs2_journal_dirty(handle, new_bh);
1513
1514 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
1515
1516 mlog(0, "insert new leaf block %llu at %u\n",
1517 (unsigned long long)new_bh->b_blocknr, new_cpos);
1518
1519 /* Insert the new leaf block with the specific offset cpos. */
1520 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
1521 1, 0, meta_ac);
1522 if (ret)
1523 mlog_errno(ret);
1524
1525out:
1526 brelse(new_bh);
1527 return ret;
1528}
1529
1530static int ocfs2_expand_refcount_tree(handle_t *handle,
1531 struct ocfs2_caching_info *ci,
1532 struct buffer_head *ref_root_bh,
1533 struct buffer_head *ref_leaf_bh,
1534 struct ocfs2_alloc_context *meta_ac)
1535{
1536 int ret;
1537 struct buffer_head *expand_bh = NULL;
1538
1539 if (ref_root_bh == ref_leaf_bh) {
1540 /*
1541 * the old root bh hasn't been expanded to a b-tree,
1542 * so expand it first.
1543 */
1544 ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh,
1545 &expand_bh, meta_ac);
1546 if (ret) {
1547 mlog_errno(ret);
1548 goto out;
1549 }
1550 } else {
1551 expand_bh = ref_leaf_bh;
1552 get_bh(expand_bh);
1553 }
1554
1555
1556 /* Now add a new refcount block into the tree.*/
1557 ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh,
1558 expand_bh, meta_ac);
1559 if (ret)
1560 mlog_errno(ret);
1561out:
1562 brelse(expand_bh);
1563 return ret;
1564}
1565
1566/*
1567 * Adjust the extent rec in b-tree representing ref_leaf_bh.
1568 *
1569 * Only called when we have inserted a new refcount rec at index 0
1570 * which means ocfs2_extent_rec.e_cpos may need some change.
1571 */
1572static int ocfs2_adjust_refcount_rec(handle_t *handle,
1573 struct ocfs2_caching_info *ci,
1574 struct buffer_head *ref_root_bh,
1575 struct buffer_head *ref_leaf_bh,
1576 struct ocfs2_refcount_rec *rec)
1577{
1578 int ret = 0, i;
1579 u32 new_cpos, old_cpos;
1580 struct ocfs2_path *path = NULL;
1581 struct ocfs2_extent_tree et;
1582 struct ocfs2_refcount_block *rb =
1583 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
1584 struct ocfs2_extent_list *el;
1585
1586 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL))
1587 goto out;
1588
1589 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1590 old_cpos = le32_to_cpu(rb->rf_cpos);
1591 new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
1592 if (old_cpos <= new_cpos)
1593 goto out;
1594
1595 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
1596
1597 path = ocfs2_new_path_from_et(&et);
1598 if (!path) {
1599 ret = -ENOMEM;
1600 mlog_errno(ret);
1601 goto out;
1602 }
1603
1604 ret = ocfs2_find_path(ci, path, old_cpos);
1605 if (ret) {
1606 mlog_errno(ret);
1607 goto out;
1608 }
1609
1610 /*
1611 * 2 more credits, one for the leaf refcount block, one for
1612 * the extent block contains the extent rec.
1613 */
1614 ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
1615 if (ret < 0) {
1616 mlog_errno(ret);
1617 goto out;
1618 }
1619
1620 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1621 OCFS2_JOURNAL_ACCESS_WRITE);
1622 if (ret < 0) {
1623 mlog_errno(ret);
1624 goto out;
1625 }
1626
1627 ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path),
1628 OCFS2_JOURNAL_ACCESS_WRITE);
1629 if (ret < 0) {
1630 mlog_errno(ret);
1631 goto out;
1632 }
1633
1634 /* change the leaf extent block first. */
1635 el = path_leaf_el(path);
1636
1637 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++)
1638 if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos)
1639 break;
1640
1641 BUG_ON(i == le16_to_cpu(el->l_next_free_rec));
1642
1643 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
1644
1645 /* change the r_cpos in the leaf block. */
1646 rb->rf_cpos = cpu_to_le32(new_cpos);
1647
1648 ocfs2_journal_dirty(handle, path_leaf_bh(path));
1649 ocfs2_journal_dirty(handle, ref_leaf_bh);
1650
1651out:
1652 ocfs2_free_path(path);
1653 return ret;
1654}
1655
1656static int ocfs2_insert_refcount_rec(handle_t *handle,
1657 struct ocfs2_caching_info *ci,
1658 struct buffer_head *ref_root_bh,
1659 struct buffer_head *ref_leaf_bh,
1660 struct ocfs2_refcount_rec *rec,
1661 int index, int merge,
1662 struct ocfs2_alloc_context *meta_ac)
1663{
1664 int ret;
1665 struct ocfs2_refcount_block *rb =
1666 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1667 struct ocfs2_refcount_list *rf_list = &rb->rf_records;
1668 struct buffer_head *new_bh = NULL;
1669
1670 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
1671
1672 if (rf_list->rl_used == rf_list->rl_count) {
1673 u64 cpos = le64_to_cpu(rec->r_cpos);
1674 u32 len = le32_to_cpu(rec->r_clusters);
1675
1676 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
1677 ref_leaf_bh, meta_ac);
1678 if (ret) {
1679 mlog_errno(ret);
1680 goto out;
1681 }
1682
1683 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
1684 cpos, len, NULL, &index,
1685 &new_bh);
1686 if (ret) {
1687 mlog_errno(ret);
1688 goto out;
1689 }
1690
1691 ref_leaf_bh = new_bh;
1692 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1693 rf_list = &rb->rf_records;
1694 }
1695
1696 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1697 OCFS2_JOURNAL_ACCESS_WRITE);
1698 if (ret) {
1699 mlog_errno(ret);
1700 goto out;
1701 }
1702
1703 if (index < le16_to_cpu(rf_list->rl_used))
1704 memmove(&rf_list->rl_recs[index + 1],
1705 &rf_list->rl_recs[index],
1706 (le16_to_cpu(rf_list->rl_used) - index) *
1707 sizeof(struct ocfs2_refcount_rec));
1708
1709 mlog(0, "insert refcount record start %llu, len %u, count %u "
1710 "to leaf block %llu at index %d\n",
1711 (unsigned long long)le64_to_cpu(rec->r_cpos),
1712 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount),
1713 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1714
1715 rf_list->rl_recs[index] = *rec;
1716
1717 le16_add_cpu(&rf_list->rl_used, 1);
1718
1719 if (merge)
1720 ocfs2_refcount_rec_merge(rb, index);
1721
1722 ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
1723 if (ret) {
1724 mlog_errno(ret);
1725 goto out;
1726 }
1727
1728 if (index == 0) {
1729 ret = ocfs2_adjust_refcount_rec(handle, ci,
1730 ref_root_bh,
1731 ref_leaf_bh, rec);
1732 if (ret)
1733 mlog_errno(ret);
1734 }
1735out:
1736 brelse(new_bh);
1737 return ret;
1738}
1739
1740/*
1741 * Split the refcount_rec indexed by "index" in ref_leaf_bh.
1742 * This is much simple than our b-tree code.
1743 * split_rec is the new refcount rec we want to insert.
1744 * If split_rec->r_refcount > 0, we are changing the refcount(in case we
1745 * increase refcount or decrease a refcount to non-zero).
1746 * If split_rec->r_refcount == 0, we are punching a hole in current refcount
1747 * rec( in case we decrease a refcount to zero).
1748 */
1749static int ocfs2_split_refcount_rec(handle_t *handle,
1750 struct ocfs2_caching_info *ci,
1751 struct buffer_head *ref_root_bh,
1752 struct buffer_head *ref_leaf_bh,
1753 struct ocfs2_refcount_rec *split_rec,
1754 int index, int merge,
1755 struct ocfs2_alloc_context *meta_ac,
1756 struct ocfs2_cached_dealloc_ctxt *dealloc)
1757{
1758 int ret, recs_need;
1759 u32 len;
1760 struct ocfs2_refcount_block *rb =
1761 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1762 struct ocfs2_refcount_list *rf_list = &rb->rf_records;
1763 struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index];
1764 struct ocfs2_refcount_rec *tail_rec = NULL;
1765 struct buffer_head *new_bh = NULL;
1766
1767 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
1768
1769 mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n",
1770 le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters),
1771 le64_to_cpu(split_rec->r_cpos),
1772 le32_to_cpu(split_rec->r_clusters));
1773
1774 /*
1775 * If we just need to split the header or tail clusters,
1776 * no more recs are needed, just split is OK.
1777 * Otherwise we at least need one new recs.
1778 */
1779 if (!split_rec->r_refcount &&
1780 (split_rec->r_cpos == orig_rec->r_cpos ||
1781 le64_to_cpu(split_rec->r_cpos) +
1782 le32_to_cpu(split_rec->r_clusters) ==
1783 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
1784 recs_need = 0;
1785 else
1786 recs_need = 1;
1787
1788 /*
1789 * We need one more rec if we split in the middle and the new rec have
1790 * some refcount in it.
1791 */
1792 if (split_rec->r_refcount &&
1793 (split_rec->r_cpos != orig_rec->r_cpos &&
1794 le64_to_cpu(split_rec->r_cpos) +
1795 le32_to_cpu(split_rec->r_clusters) !=
1796 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
1797 recs_need++;
1798
1799 /* If the leaf block don't have enough record, expand it. */
1800 if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) {
1801 struct ocfs2_refcount_rec tmp_rec;
1802 u64 cpos = le64_to_cpu(orig_rec->r_cpos);
1803 len = le32_to_cpu(orig_rec->r_clusters);
1804 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
1805 ref_leaf_bh, meta_ac);
1806 if (ret) {
1807 mlog_errno(ret);
1808 goto out;
1809 }
1810
1811 /*
1812 * We have to re-get it since now cpos may be moved to
1813 * another leaf block.
1814 */
1815 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
1816 cpos, len, &tmp_rec, &index,
1817 &new_bh);
1818 if (ret) {
1819 mlog_errno(ret);
1820 goto out;
1821 }
1822
1823 ref_leaf_bh = new_bh;
1824 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1825 rf_list = &rb->rf_records;
1826 orig_rec = &rf_list->rl_recs[index];
1827 }
1828
1829 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1830 OCFS2_JOURNAL_ACCESS_WRITE);
1831 if (ret) {
1832 mlog_errno(ret);
1833 goto out;
1834 }
1835
1836 /*
1837 * We have calculated out how many new records we need and store
1838 * in recs_need, so spare enough space first by moving the records
1839 * after "index" to the end.
1840 */
1841 if (index != le16_to_cpu(rf_list->rl_used) - 1)
1842 memmove(&rf_list->rl_recs[index + 1 + recs_need],
1843 &rf_list->rl_recs[index + 1],
1844 (le16_to_cpu(rf_list->rl_used) - index - 1) *
1845 sizeof(struct ocfs2_refcount_rec));
1846
1847 len = (le64_to_cpu(orig_rec->r_cpos) +
1848 le32_to_cpu(orig_rec->r_clusters)) -
1849 (le64_to_cpu(split_rec->r_cpos) +
1850 le32_to_cpu(split_rec->r_clusters));
1851
1852 /*
1853 * If we have "len", the we will split in the tail and move it
1854 * to the end of the space we have just spared.
1855 */
1856 if (len) {
1857 tail_rec = &rf_list->rl_recs[index + recs_need];
1858
1859 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
1860 le64_add_cpu(&tail_rec->r_cpos,
1861 le32_to_cpu(tail_rec->r_clusters) - len);
1862 tail_rec->r_clusters = le32_to_cpu(len);
1863 }
1864
1865 /*
1866 * If the split pos isn't the same as the original one, we need to
1867 * split in the head.
1868 *
1869 * Note: We have the chance that split_rec.r_refcount = 0,
1870 * recs_need = 0 and len > 0, which means we just cut the head from
1871 * the orig_rec and in that case we have done some modification in
1872 * orig_rec above, so the check for r_cpos is faked.
1873 */
1874 if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) {
1875 len = le64_to_cpu(split_rec->r_cpos) -
1876 le64_to_cpu(orig_rec->r_cpos);
1877 orig_rec->r_clusters = cpu_to_le32(len);
1878 index++;
1879 }
1880
1881 le16_add_cpu(&rf_list->rl_used, recs_need);
1882
1883 if (split_rec->r_refcount) {
1884 rf_list->rl_recs[index] = *split_rec;
1885 mlog(0, "insert refcount record start %llu, len %u, count %u "
1886 "to leaf block %llu at index %d\n",
1887 (unsigned long long)le64_to_cpu(split_rec->r_cpos),
1888 le32_to_cpu(split_rec->r_clusters),
1889 le32_to_cpu(split_rec->r_refcount),
1890 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1891
1892 if (merge)
1893 ocfs2_refcount_rec_merge(rb, index);
1894 }
1895
1896 ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
1897 if (ret)
1898 mlog_errno(ret);
1899
1900out:
1901 brelse(new_bh);
1902 return ret;
1903}
1904
1905static int __ocfs2_increase_refcount(handle_t *handle,
1906 struct ocfs2_caching_info *ci,
1907 struct buffer_head *ref_root_bh,
1908 u64 cpos, u32 len, int merge,
1909 struct ocfs2_alloc_context *meta_ac,
1910 struct ocfs2_cached_dealloc_ctxt *dealloc)
1911{
1912 int ret = 0, index;
1913 struct buffer_head *ref_leaf_bh = NULL;
1914 struct ocfs2_refcount_rec rec;
1915 unsigned int set_len = 0;
1916
1917 mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n",
1918 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1919 (unsigned long long)cpos, len);
1920
1921 while (len) {
1922 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
1923 cpos, len, &rec, &index,
1924 &ref_leaf_bh);
1925 if (ret) {
1926 mlog_errno(ret);
1927 goto out;
1928 }
1929
1930 set_len = le32_to_cpu(rec.r_clusters);
1931
1932 /*
1933 * Here we may meet with 3 situations:
1934 *
1935 * 1. If we find an already existing record, and the length
1936 * is the same, cool, we just need to increase the r_refcount
1937 * and it is OK.
1938 * 2. If we find a hole, just insert it with r_refcount = 1.
1939 * 3. If we are in the middle of one extent record, split
1940 * it.
1941 */
1942 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
1943 set_len <= len) {
1944 mlog(0, "increase refcount rec, start %llu, len %u, "
1945 "count %u\n", (unsigned long long)cpos, set_len,
1946 le32_to_cpu(rec.r_refcount));
1947 ret = ocfs2_change_refcount_rec(handle, ci,
1948 ref_leaf_bh, index,
1949 merge, 1);
1950 if (ret) {
1951 mlog_errno(ret);
1952 goto out;
1953 }
1954 } else if (!rec.r_refcount) {
1955 rec.r_refcount = cpu_to_le32(1);
1956
1957 mlog(0, "insert refcount rec, start %llu, len %u\n",
1958 (unsigned long long)le64_to_cpu(rec.r_cpos),
1959 set_len);
1960 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
1961 ref_leaf_bh,
1962 &rec, index,
1963 merge, meta_ac);
1964 if (ret) {
1965 mlog_errno(ret);
1966 goto out;
1967 }
1968 } else {
1969 set_len = min((u64)(cpos + len),
1970 le64_to_cpu(rec.r_cpos) + set_len) - cpos;
1971 rec.r_cpos = cpu_to_le64(cpos);
1972 rec.r_clusters = cpu_to_le32(set_len);
1973 le32_add_cpu(&rec.r_refcount, 1);
1974
1975 mlog(0, "split refcount rec, start %llu, "
1976 "len %u, count %u\n",
1977 (unsigned long long)le64_to_cpu(rec.r_cpos),
1978 set_len, le32_to_cpu(rec.r_refcount));
1979 ret = ocfs2_split_refcount_rec(handle, ci,
1980 ref_root_bh, ref_leaf_bh,
1981 &rec, index, merge,
1982 meta_ac, dealloc);
1983 if (ret) {
1984 mlog_errno(ret);
1985 goto out;
1986 }
1987 }
1988
1989 cpos += set_len;
1990 len -= set_len;
1991 brelse(ref_leaf_bh);
1992 ref_leaf_bh = NULL;
1993 }
1994
1995out:
1996 brelse(ref_leaf_bh);
1997 return ret;
1998}
1999
2000static int ocfs2_remove_refcount_extent(handle_t *handle,
2001 struct ocfs2_caching_info *ci,
2002 struct buffer_head *ref_root_bh,
2003 struct buffer_head *ref_leaf_bh,
2004 struct ocfs2_alloc_context *meta_ac,
2005 struct ocfs2_cached_dealloc_ctxt *dealloc)
2006{
2007 int ret;
2008 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2009 struct ocfs2_refcount_block *rb =
2010 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2011 struct ocfs2_extent_tree et;
2012
2013 BUG_ON(rb->rf_records.rl_used);
2014
2015 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
2016 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
2017 1, meta_ac, dealloc);
2018 if (ret) {
2019 mlog_errno(ret);
2020 goto out;
2021 }
2022
2023 ocfs2_remove_from_cache(ci, ref_leaf_bh);
2024
2025 /*
2026 * add the freed block to the dealloc so that it will be freed
2027 * when we run dealloc.
2028 */
2029 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
2030 le16_to_cpu(rb->rf_suballoc_slot),
2031 le64_to_cpu(rb->rf_blkno),
2032 le16_to_cpu(rb->rf_suballoc_bit));
2033 if (ret) {
2034 mlog_errno(ret);
2035 goto out;
2036 }
2037
2038 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
2039 OCFS2_JOURNAL_ACCESS_WRITE);
2040 if (ret) {
2041 mlog_errno(ret);
2042 goto out;
2043 }
2044
2045 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
2046
2047 le32_add_cpu(&rb->rf_clusters, -1);
2048
2049 /*
2050 * check whether we need to restore the root refcount block if
2051 * there is no leaf extent block at atll.
2052 */
2053 if (!rb->rf_list.l_next_free_rec) {
2054 BUG_ON(rb->rf_clusters);
2055
2056 mlog(0, "reset refcount tree root %llu to be a record block.\n",
2057 (unsigned long long)ref_root_bh->b_blocknr);
2058
2059 rb->rf_flags = 0;
2060 rb->rf_parent = 0;
2061 rb->rf_cpos = 0;
2062 memset(&rb->rf_records, 0, sb->s_blocksize -
2063 offsetof(struct ocfs2_refcount_block, rf_records));
2064 rb->rf_records.rl_count =
2065 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
2066 }
2067
2068 ocfs2_journal_dirty(handle, ref_root_bh);
2069
2070out:
2071 return ret;
2072}
2073
2074int ocfs2_increase_refcount(handle_t *handle,
2075 struct ocfs2_caching_info *ci,
2076 struct buffer_head *ref_root_bh,
2077 u64 cpos, u32 len,
2078 struct ocfs2_alloc_context *meta_ac,
2079 struct ocfs2_cached_dealloc_ctxt *dealloc)
2080{
2081 return __ocfs2_increase_refcount(handle, ci, ref_root_bh,
2082 cpos, len, 1,
2083 meta_ac, dealloc);
2084}
2085
2086static int ocfs2_decrease_refcount_rec(handle_t *handle,
2087 struct ocfs2_caching_info *ci,
2088 struct buffer_head *ref_root_bh,
2089 struct buffer_head *ref_leaf_bh,
2090 int index, u64 cpos, unsigned int len,
2091 struct ocfs2_alloc_context *meta_ac,
2092 struct ocfs2_cached_dealloc_ctxt *dealloc)
2093{
2094 int ret;
2095 struct ocfs2_refcount_block *rb =
2096 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2097 struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
2098
2099 BUG_ON(cpos < le64_to_cpu(rec->r_cpos));
2100 BUG_ON(cpos + len >
2101 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters));
2102
2103 if (cpos == le64_to_cpu(rec->r_cpos) &&
2104 len == le32_to_cpu(rec->r_clusters))
2105 ret = ocfs2_change_refcount_rec(handle, ci,
2106 ref_leaf_bh, index, 1, -1);
2107 else {
2108 struct ocfs2_refcount_rec split = *rec;
2109 split.r_cpos = cpu_to_le64(cpos);
2110 split.r_clusters = cpu_to_le32(len);
2111
2112 le32_add_cpu(&split.r_refcount, -1);
2113
2114 mlog(0, "split refcount rec, start %llu, "
2115 "len %u, count %u, original start %llu, len %u\n",
2116 (unsigned long long)le64_to_cpu(split.r_cpos),
2117 len, le32_to_cpu(split.r_refcount),
2118 (unsigned long long)le64_to_cpu(rec->r_cpos),
2119 le32_to_cpu(rec->r_clusters));
2120 ret = ocfs2_split_refcount_rec(handle, ci,
2121 ref_root_bh, ref_leaf_bh,
2122 &split, index, 1,
2123 meta_ac, dealloc);
2124 }
2125
2126 if (ret) {
2127 mlog_errno(ret);
2128 goto out;
2129 }
2130
2131 /* Remove the leaf refcount block if it contains no refcount record. */
2132 if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) {
2133 ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh,
2134 ref_leaf_bh, meta_ac,
2135 dealloc);
2136 if (ret)
2137 mlog_errno(ret);
2138 }
2139
2140out:
2141 return ret;
2142}
2143
2144static int __ocfs2_decrease_refcount(handle_t *handle,
2145 struct ocfs2_caching_info *ci,
2146 struct buffer_head *ref_root_bh,
2147 u64 cpos, u32 len,
2148 struct ocfs2_alloc_context *meta_ac,
2149 struct ocfs2_cached_dealloc_ctxt *dealloc,
2150 int delete)
2151{
2152 int ret = 0, index = 0;
2153 struct ocfs2_refcount_rec rec;
2154 unsigned int r_count = 0, r_len;
2155 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2156 struct buffer_head *ref_leaf_bh = NULL;
2157
2158 mlog(0, "Tree owner %llu, decrease refcount start %llu, "
2159 "len %u, delete %u\n",
2160 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2161 (unsigned long long)cpos, len, delete);
2162
2163 while (len) {
2164 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
2165 cpos, len, &rec, &index,
2166 &ref_leaf_bh);
2167 if (ret) {
2168 mlog_errno(ret);
2169 goto out;
2170 }
2171
2172 r_count = le32_to_cpu(rec.r_refcount);
2173 BUG_ON(r_count == 0);
2174 if (!delete)
2175 BUG_ON(r_count > 1);
2176
2177 r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) +
2178 le32_to_cpu(rec.r_clusters)) - cpos;
2179
2180 ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh,
2181 ref_leaf_bh, index,
2182 cpos, r_len,
2183 meta_ac, dealloc);
2184 if (ret) {
2185 mlog_errno(ret);
2186 goto out;
2187 }
2188
2189 if (le32_to_cpu(rec.r_refcount) == 1 && delete) {
2190 ret = ocfs2_cache_cluster_dealloc(dealloc,
2191 ocfs2_clusters_to_blocks(sb, cpos),
2192 r_len);
2193 if (ret) {
2194 mlog_errno(ret);
2195 goto out;
2196 }
2197 }
2198
2199 cpos += r_len;
2200 len -= r_len;
2201 brelse(ref_leaf_bh);
2202 ref_leaf_bh = NULL;
2203 }
2204
2205out:
2206 brelse(ref_leaf_bh);
2207 return ret;
2208}
2209
2210/* Caller must hold refcount tree lock. */
2211int ocfs2_decrease_refcount(struct inode *inode,
2212 handle_t *handle, u32 cpos, u32 len,
2213 struct ocfs2_alloc_context *meta_ac,
2214 struct ocfs2_cached_dealloc_ctxt *dealloc,
2215 int delete)
2216{
2217 int ret;
2218 u64 ref_blkno;
2219 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2220 struct buffer_head *ref_root_bh = NULL;
2221 struct ocfs2_refcount_tree *tree;
2222
2223 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2224
2225 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
2226 if (ret) {
2227 mlog_errno(ret);
2228 goto out;
2229 }
2230
2231 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree);
2232 if (ret) {
2233 mlog_errno(ret);
2234 goto out;
2235 }
2236
2237 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
2238 &ref_root_bh);
2239 if (ret) {
2240 mlog_errno(ret);
2241 goto out;
2242 }
2243
2244 ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
2245 cpos, len, meta_ac, dealloc, delete);
2246 if (ret)
2247 mlog_errno(ret);
2248out:
2249 brelse(ref_root_bh);
2250 return ret;
2251}
2252
2253/*
2254 * Mark the already-existing extent at cpos as refcounted for len clusters.
2255 * This adds the refcount extent flag.
2256 *
2257 * If the existing extent is larger than the request, initiate a
2258 * split. An attempt will be made at merging with adjacent extents.
2259 *
2260 * The caller is responsible for passing down meta_ac if we'll need it.
2261 */
2262static int ocfs2_mark_extent_refcounted(struct inode *inode,
2263 struct ocfs2_extent_tree *et,
2264 handle_t *handle, u32 cpos,
2265 u32 len, u32 phys,
2266 struct ocfs2_alloc_context *meta_ac,
2267 struct ocfs2_cached_dealloc_ctxt *dealloc)
2268{
2269 int ret;
2270
2271 mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n",
2272 inode->i_ino, cpos, len, phys);
2273
2274 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
2275 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
2276 "tree, but the feature bit is not set in the "
2277 "super block.", inode->i_ino);
2278 ret = -EROFS;
2279 goto out;
2280 }
2281
2282 ret = ocfs2_change_extent_flag(handle, et, cpos,
2283 len, phys, meta_ac, dealloc,
2284 OCFS2_EXT_REFCOUNTED, 0);
2285 if (ret)
2286 mlog_errno(ret);
2287
2288out:
2289 return ret;
2290}
2291
2292/*
2293 * Given some contiguous physical clusters, calculate what we need
2294 * for modifying their refcount.
2295 */
2296static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2297 struct ocfs2_caching_info *ci,
2298 struct buffer_head *ref_root_bh,
2299 u64 start_cpos,
2300 u32 clusters,
2301 int *meta_add,
2302 int *credits)
2303{
2304 int ret = 0, index, ref_blocks = 0, recs_add = 0;
2305 u64 cpos = start_cpos;
2306 struct ocfs2_refcount_block *rb;
2307 struct ocfs2_refcount_rec rec;
2308 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
2309 u32 len;
2310
2311 mlog(0, "start_cpos %llu, clusters %u\n",
2312 (unsigned long long)start_cpos, clusters);
2313 while (clusters) {
2314 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
2315 cpos, clusters, &rec,
2316 &index, &ref_leaf_bh);
2317 if (ret) {
2318 mlog_errno(ret);
2319 goto out;
2320 }
2321
2322 if (ref_leaf_bh != prev_bh) {
2323 /*
2324 * Now we encounter a new leaf block, so calculate
2325 * whether we need to extend the old leaf.
2326 */
2327 if (prev_bh) {
2328 rb = (struct ocfs2_refcount_block *)
2329 prev_bh->b_data;
2330
2331 if (le64_to_cpu(rb->rf_records.rl_used) +
2332 recs_add >
2333 le16_to_cpu(rb->rf_records.rl_count))
2334 ref_blocks++;
2335 }
2336
2337 recs_add = 0;
2338 *credits += 1;
2339 brelse(prev_bh);
2340 prev_bh = ref_leaf_bh;
2341 get_bh(prev_bh);
2342 }
2343
2344 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2345
2346 mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu,"
2347 "rec->r_clusters %u, rec->r_refcount %u, index %d\n",
2348 recs_add, (unsigned long long)cpos, clusters,
2349 (unsigned long long)le64_to_cpu(rec.r_cpos),
2350 le32_to_cpu(rec.r_clusters),
2351 le32_to_cpu(rec.r_refcount), index);
2352
2353 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
2354 le32_to_cpu(rec.r_clusters)) - cpos;
2355 /*
2356 * If the refcount rec already exist, cool. We just need
2357 * to check whether there is a split. Otherwise we just need
2358 * to increase the refcount.
2359 * If we will insert one, increases recs_add.
2360 *
2361 * We record all the records which will be inserted to the
2362 * same refcount block, so that we can tell exactly whether
2363 * we need a new refcount block or not.
2364 */
2365 if (rec.r_refcount) {
2366 /* Check whether we need a split at the beginning. */
2367 if (cpos == start_cpos &&
2368 cpos != le64_to_cpu(rec.r_cpos))
2369 recs_add++;
2370
2371 /* Check whether we need a split in the end. */
2372 if (cpos + clusters < le64_to_cpu(rec.r_cpos) +
2373 le32_to_cpu(rec.r_clusters))
2374 recs_add++;
2375 } else
2376 recs_add++;
2377
2378 brelse(ref_leaf_bh);
2379 ref_leaf_bh = NULL;
2380 clusters -= len;
2381 cpos += len;
2382 }
2383
2384 if (prev_bh) {
2385 rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
2386
2387 if (le64_to_cpu(rb->rf_records.rl_used) + recs_add >
2388 le16_to_cpu(rb->rf_records.rl_count))
2389 ref_blocks++;
2390
2391 *credits += 1;
2392 }
2393
2394 if (!ref_blocks)
2395 goto out;
2396
2397 mlog(0, "we need ref_blocks %d\n", ref_blocks);
2398 *meta_add += ref_blocks;
2399 *credits += ref_blocks;
2400
2401 /*
2402 * So we may need ref_blocks to insert into the tree.
2403 * That also means we need to change the b-tree and add that number
2404 * of records since we never merge them.
2405 * We need one more block for expansion since the new created leaf
2406 * block is also full and needs split.
2407 */
2408 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
2409 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) {
2410 struct ocfs2_extent_tree et;
2411
2412 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
2413 *meta_add += ocfs2_extend_meta_needed(et.et_root_el);
2414 *credits += ocfs2_calc_extend_credits(sb,
2415 et.et_root_el,
2416 ref_blocks);
2417 } else {
2418 *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
2419 *meta_add += 1;
2420 }
2421
2422out:
2423 brelse(ref_leaf_bh);
2424 brelse(prev_bh);
2425 return ret;
2426}
2427
2428/*
2429 * For refcount tree, we will decrease some contiguous clusters
2430 * refcount count, so just go through it to see how many blocks
2431 * we gonna touch and whether we need to create new blocks.
2432 *
2433 * Normally the refcount blocks store these refcount should be
2434 * continguous also, so that we can get the number easily.
2435 * As for meta_ac, we will at most add split 2 refcount record and
2436 * 2 more refcount block, so just check it in a rough way.
2437 *
2438 * Caller must hold refcount tree lock.
2439 */
2440int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2441 struct buffer_head *di_bh,
2442 u64 phys_blkno,
2443 u32 clusters,
2444 int *credits,
2445 struct ocfs2_alloc_context **meta_ac)
2446{
2447 int ret, ref_blocks = 0;
2448 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2449 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2450 struct buffer_head *ref_root_bh = NULL;
2451 struct ocfs2_refcount_tree *tree;
2452 u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
2453
2454 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
2455 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
2456 "tree, but the feature bit is not set in the "
2457 "super block.", inode->i_ino);
2458 ret = -EROFS;
2459 goto out;
2460 }
2461
2462 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2463
2464 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2465 le64_to_cpu(di->i_refcount_loc), &tree);
2466 if (ret) {
2467 mlog_errno(ret);
2468 goto out;
2469 }
2470
2471 ret = ocfs2_read_refcount_block(&tree->rf_ci,
2472 le64_to_cpu(di->i_refcount_loc),
2473 &ref_root_bh);
2474 if (ret) {
2475 mlog_errno(ret);
2476 goto out;
2477 }
2478
2479 ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
2480 &tree->rf_ci,
2481 ref_root_bh,
2482 start_cpos, clusters,
2483 &ref_blocks, credits);
2484 if (ret) {
2485 mlog_errno(ret);
2486 goto out;
2487 }
2488
2489 mlog(0, "reserve new metadata %d, credits = %d\n",
2490 ref_blocks, *credits);
2491
2492 if (ref_blocks) {
2493 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2494 ref_blocks, meta_ac);
2495 if (ret)
2496 mlog_errno(ret);
2497 }
2498
2499out:
2500 brelse(ref_root_bh);
2501 return ret;
2502}
2503
2504#define MAX_CONTIG_BYTES 1048576
2505
2506static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb)
2507{
2508 return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES);
2509}
2510
2511static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb)
2512{
2513 return ~(ocfs2_cow_contig_clusters(sb) - 1);
2514}
2515
2516/*
2517 * Given an extent that starts at 'start' and an I/O that starts at 'cpos',
2518 * find an offset (start + (n * contig_clusters)) that is closest to cpos
2519 * while still being less than or equal to it.
2520 *
2521 * The goal is to break the extent at a multiple of contig_clusters.
2522 */
2523static inline unsigned int ocfs2_cow_align_start(struct super_block *sb,
2524 unsigned int start,
2525 unsigned int cpos)
2526{
2527 BUG_ON(start > cpos);
2528
2529 return start + ((cpos - start) & ocfs2_cow_contig_mask(sb));
2530}
2531
2532/*
2533 * Given a cluster count of len, pad it out so that it is a multiple
2534 * of contig_clusters.
2535 */
2536static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
2537 unsigned int len)
2538{
2539 unsigned int padded =
2540 (len + (ocfs2_cow_contig_clusters(sb) - 1)) &
2541 ocfs2_cow_contig_mask(sb);
2542
2543 /* Did we wrap? */
2544 if (padded < len)
2545 padded = UINT_MAX;
2546
2547 return padded;
2548}
2549
2550/*
2551 * Calculate out the start and number of virtual clusters we need to to CoW.
2552 *
2553 * cpos is vitual start cluster position we want to do CoW in a
2554 * file and write_len is the cluster length.
2555 * max_cpos is the place where we want to stop CoW intentionally.
2556 *
2557 * Normal we will start CoW from the beginning of extent record cotaining cpos.
2558 * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
2559 * get good I/O from the resulting extent tree.
2560 */
2561static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
2562 struct ocfs2_extent_list *el,
2563 u32 cpos,
2564 u32 write_len,
2565 u32 max_cpos,
2566 u32 *cow_start,
2567 u32 *cow_len)
2568{
2569 int ret = 0;
2570 int tree_height = le16_to_cpu(el->l_tree_depth), i;
2571 struct buffer_head *eb_bh = NULL;
2572 struct ocfs2_extent_block *eb = NULL;
2573 struct ocfs2_extent_rec *rec;
2574 unsigned int want_clusters, rec_end = 0;
2575 int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
2576 int leaf_clusters;
2577
2578 BUG_ON(cpos + write_len > max_cpos);
2579
2580 if (tree_height > 0) {
2581 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
2582 if (ret) {
2583 mlog_errno(ret);
2584 goto out;
2585 }
2586
2587 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2588 el = &eb->h_list;
2589
2590 if (el->l_tree_depth) {
2591 ocfs2_error(inode->i_sb,
2592 "Inode %lu has non zero tree depth in "
2593 "leaf block %llu\n", inode->i_ino,
2594 (unsigned long long)eb_bh->b_blocknr);
2595 ret = -EROFS;
2596 goto out;
2597 }
2598 }
2599
2600 *cow_len = 0;
2601 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
2602 rec = &el->l_recs[i];
2603
2604 if (ocfs2_is_empty_extent(rec)) {
2605 mlog_bug_on_msg(i != 0, "Inode %lu has empty record in "
2606 "index %d\n", inode->i_ino, i);
2607 continue;
2608 }
2609
2610 if (le32_to_cpu(rec->e_cpos) +
2611 le16_to_cpu(rec->e_leaf_clusters) <= cpos)
2612 continue;
2613
2614 if (*cow_len == 0) {
2615 /*
2616 * We should find a refcounted record in the
2617 * first pass.
2618 */
2619 BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED));
2620 *cow_start = le32_to_cpu(rec->e_cpos);
2621 }
2622
2623 /*
2624 * If we encounter a hole, a non-refcounted record or
2625 * pass the max_cpos, stop the search.
2626 */
2627 if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
2628 (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) ||
2629 (max_cpos <= le32_to_cpu(rec->e_cpos)))
2630 break;
2631
2632 leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
2633 rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
2634 if (rec_end > max_cpos) {
2635 rec_end = max_cpos;
2636 leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos);
2637 }
2638
2639 /*
2640 * How many clusters do we actually need from
2641 * this extent? First we see how many we actually
2642 * need to complete the write. If that's smaller
2643 * than contig_clusters, we try for contig_clusters.
2644 */
2645 if (!*cow_len)
2646 want_clusters = write_len;
2647 else
2648 want_clusters = (cpos + write_len) -
2649 (*cow_start + *cow_len);
2650 if (want_clusters < contig_clusters)
2651 want_clusters = contig_clusters;
2652
2653 /*
2654 * If the write does not cover the whole extent, we
2655 * need to calculate how we're going to split the extent.
2656 * We try to do it on contig_clusters boundaries.
2657 *
2658 * Any extent smaller than contig_clusters will be
2659 * CoWed in its entirety.
2660 */
2661 if (leaf_clusters <= contig_clusters)
2662 *cow_len += leaf_clusters;
2663 else if (*cow_len || (*cow_start == cpos)) {
2664 /*
2665 * This extent needs to be CoW'd from its
2666 * beginning, so all we have to do is compute
2667 * how many clusters to grab. We align
2668 * want_clusters to the edge of contig_clusters
2669 * to get better I/O.
2670 */
2671 want_clusters = ocfs2_cow_align_length(inode->i_sb,
2672 want_clusters);
2673
2674 if (leaf_clusters < want_clusters)
2675 *cow_len += leaf_clusters;
2676 else
2677 *cow_len += want_clusters;
2678 } else if ((*cow_start + contig_clusters) >=
2679 (cpos + write_len)) {
2680 /*
2681 * Breaking off contig_clusters at the front
2682 * of the extent will cover our write. That's
2683 * easy.
2684 */
2685 *cow_len = contig_clusters;
2686 } else if ((rec_end - cpos) <= contig_clusters) {
2687 /*
2688 * Breaking off contig_clusters at the tail of
2689 * this extent will cover cpos.
2690 */
2691 *cow_start = rec_end - contig_clusters;
2692 *cow_len = contig_clusters;
2693 } else if ((rec_end - cpos) <= want_clusters) {
2694 /*
2695 * While we can't fit the entire write in this
2696 * extent, we know that the write goes from cpos
2697 * to the end of the extent. Break that off.
2698 * We try to break it at some multiple of
2699 * contig_clusters from the front of the extent.
2700 * Failing that (ie, cpos is within
2701 * contig_clusters of the front), we'll CoW the
2702 * entire extent.
2703 */
2704 *cow_start = ocfs2_cow_align_start(inode->i_sb,
2705 *cow_start, cpos);
2706 *cow_len = rec_end - *cow_start;
2707 } else {
2708 /*
2709 * Ok, the entire write lives in the middle of
2710 * this extent. Let's try to slice the extent up
2711 * nicely. Optimally, our CoW region starts at
2712 * m*contig_clusters from the beginning of the
2713 * extent and goes for n*contig_clusters,
2714 * covering the entire write.
2715 */
2716 *cow_start = ocfs2_cow_align_start(inode->i_sb,
2717 *cow_start, cpos);
2718
2719 want_clusters = (cpos + write_len) - *cow_start;
2720 want_clusters = ocfs2_cow_align_length(inode->i_sb,
2721 want_clusters);
2722 if (*cow_start + want_clusters <= rec_end)
2723 *cow_len = want_clusters;
2724 else
2725 *cow_len = rec_end - *cow_start;
2726 }
2727
2728 /* Have we covered our entire write yet? */
2729 if ((*cow_start + *cow_len) >= (cpos + write_len))
2730 break;
2731
2732 /*
2733 * If we reach the end of the extent block and don't get enough
2734 * clusters, continue with the next extent block if possible.
2735 */
2736 if (i + 1 == le16_to_cpu(el->l_next_free_rec) &&
2737 eb && eb->h_next_leaf_blk) {
2738 brelse(eb_bh);
2739 eb_bh = NULL;
2740
2741 ret = ocfs2_read_extent_block(INODE_CACHE(inode),
2742 le64_to_cpu(eb->h_next_leaf_blk),
2743 &eb_bh);
2744 if (ret) {
2745 mlog_errno(ret);
2746 goto out;
2747 }
2748
2749 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2750 el = &eb->h_list;
2751 i = -1;
2752 }
2753 }
2754
2755out:
2756 brelse(eb_bh);
2757 return ret;
2758}
2759
2760/*
2761 * Prepare meta_ac, data_ac and calculate credits when we want to add some
2762 * num_clusters in data_tree "et" and change the refcount for the old
2763 * clusters(starting form p_cluster) in the refcount tree.
2764 *
2765 * Note:
2766 * 1. since we may split the old tree, so we at most will need num_clusters + 2
2767 * more new leaf records.
2768 * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so
2769 * just give data_ac = NULL.
2770 */
2771static int ocfs2_lock_refcount_allocators(struct super_block *sb,
2772 u32 p_cluster, u32 num_clusters,
2773 struct ocfs2_extent_tree *et,
2774 struct ocfs2_caching_info *ref_ci,
2775 struct buffer_head *ref_root_bh,
2776 struct ocfs2_alloc_context **meta_ac,
2777 struct ocfs2_alloc_context **data_ac,
2778 int *credits)
2779{
2780 int ret = 0, meta_add = 0;
2781 int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et);
2782
2783 if (num_free_extents < 0) {
2784 ret = num_free_extents;
2785 mlog_errno(ret);
2786 goto out;
2787 }
2788
2789 if (num_free_extents < num_clusters + 2)
2790 meta_add =
2791 ocfs2_extend_meta_needed(et->et_root_el);
2792
2793 *credits += ocfs2_calc_extend_credits(sb, et->et_root_el,
2794 num_clusters + 2);
2795
2796 ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
2797 p_cluster, num_clusters,
2798 &meta_add, credits);
2799 if (ret) {
2800 mlog_errno(ret);
2801 goto out;
2802 }
2803
2804 mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n",
2805 meta_add, num_clusters, *credits);
2806 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
2807 meta_ac);
2808 if (ret) {
2809 mlog_errno(ret);
2810 goto out;
2811 }
2812
2813 if (data_ac) {
2814 ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters,
2815 data_ac);
2816 if (ret)
2817 mlog_errno(ret);
2818 }
2819
2820out:
2821 if (ret) {
2822 if (*meta_ac) {
2823 ocfs2_free_alloc_context(*meta_ac);
2824 *meta_ac = NULL;
2825 }
2826 }
2827
2828 return ret;
2829}
2830
2831static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
2832{
2833 BUG_ON(buffer_dirty(bh));
2834
2835 clear_buffer_mapped(bh);
2836
2837 return 0;
2838}
2839
2840static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2841 struct ocfs2_cow_context *context,
2842 u32 cpos, u32 old_cluster,
2843 u32 new_cluster, u32 new_len)
2844{
2845 int ret = 0, partial;
2846 struct ocfs2_caching_info *ci = context->data_et.et_ci;
2847 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2848 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2849 struct page *page;
2850 pgoff_t page_index;
2851 unsigned int from, to;
2852 loff_t offset, end, map_end;
2853 struct address_space *mapping = context->inode->i_mapping;
2854
2855 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
2856 new_cluster, new_len, cpos);
2857
2858 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2859 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
2860
2861 while (offset < end) {
2862 page_index = offset >> PAGE_CACHE_SHIFT;
2863 map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
2864 if (map_end > end)
2865 map_end = end;
2866
2867 /* from, to is the offset within the page. */
2868 from = offset & (PAGE_CACHE_SIZE - 1);
2869 to = PAGE_CACHE_SIZE;
2870 if (map_end & (PAGE_CACHE_SIZE - 1))
2871 to = map_end & (PAGE_CACHE_SIZE - 1);
2872
2873 page = grab_cache_page(mapping, page_index);
2874
2875 /* This page can't be dirtied before we CoW it out. */
2876 BUG_ON(PageDirty(page));
2877
2878 if (!PageUptodate(page)) {
2879 ret = block_read_full_page(page, ocfs2_get_block);
2880 if (ret) {
2881 mlog_errno(ret);
2882 goto unlock;
2883 }
2884 lock_page(page);
2885 }
2886
2887 if (page_has_buffers(page)) {
2888 ret = walk_page_buffers(handle, page_buffers(page),
2889 from, to, &partial,
2890 ocfs2_clear_cow_buffer);
2891 if (ret) {
2892 mlog_errno(ret);
2893 goto unlock;
2894 }
2895 }
2896
2897 ocfs2_map_and_dirty_page(context->inode,
2898 handle, from, to,
2899 page, 0, &new_block);
2900 mark_page_accessed(page);
2901unlock:
2902 unlock_page(page);
2903 page_cache_release(page);
2904 page = NULL;
2905 offset = map_end;
2906 if (ret)
2907 break;
2908 }
2909
2910 return ret;
2911}
2912
2913static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
2914 struct ocfs2_cow_context *context,
2915 u32 cpos, u32 old_cluster,
2916 u32 new_cluster, u32 new_len)
2917{
2918 int ret = 0;
2919 struct super_block *sb = context->inode->i_sb;
2920 struct ocfs2_caching_info *ci = context->data_et.et_ci;
2921 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
2922 u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
2923 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2924 struct ocfs2_super *osb = OCFS2_SB(sb);
2925 struct buffer_head *old_bh = NULL;
2926 struct buffer_head *new_bh = NULL;
2927
2928 mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster,
2929 new_cluster, new_len);
2930
2931 for (i = 0; i < blocks; i++, old_block++, new_block++) {
2932 new_bh = sb_getblk(osb->sb, new_block);
2933 if (new_bh == NULL) {
2934 ret = -EIO;
2935 mlog_errno(ret);
2936 break;
2937 }
2938
2939 ocfs2_set_new_buffer_uptodate(ci, new_bh);
2940
2941 ret = ocfs2_read_block(ci, old_block, &old_bh, NULL);
2942 if (ret) {
2943 mlog_errno(ret);
2944 break;
2945 }
2946
2947 ret = ocfs2_journal_access(handle, ci, new_bh,
2948 OCFS2_JOURNAL_ACCESS_CREATE);
2949 if (ret) {
2950 mlog_errno(ret);
2951 break;
2952 }
2953
2954 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
2955 ret = ocfs2_journal_dirty(handle, new_bh);
2956 if (ret) {
2957 mlog_errno(ret);
2958 break;
2959 }
2960
2961 brelse(new_bh);
2962 brelse(old_bh);
2963 new_bh = NULL;
2964 old_bh = NULL;
2965 }
2966
2967 brelse(new_bh);
2968 brelse(old_bh);
2969 return ret;
2970}
2971
2972static int ocfs2_clear_ext_refcount(handle_t *handle,
2973 struct ocfs2_extent_tree *et,
2974 u32 cpos, u32 p_cluster, u32 len,
2975 unsigned int ext_flags,
2976 struct ocfs2_alloc_context *meta_ac,
2977 struct ocfs2_cached_dealloc_ctxt *dealloc)
2978{
2979 int ret, index;
2980 struct ocfs2_extent_rec replace_rec;
2981 struct ocfs2_path *path = NULL;
2982 struct ocfs2_extent_list *el;
2983 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
2984 u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
2985
2986 mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n",
2987 (unsigned long long)ino, cpos, len, p_cluster, ext_flags);
2988
2989 memset(&replace_rec, 0, sizeof(replace_rec));
2990 replace_rec.e_cpos = cpu_to_le32(cpos);
2991 replace_rec.e_leaf_clusters = cpu_to_le16(len);
2992 replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb,
2993 p_cluster));
2994 replace_rec.e_flags = ext_flags;
2995 replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED;
2996
2997 path = ocfs2_new_path_from_et(et);
2998 if (!path) {
2999 ret = -ENOMEM;
3000 mlog_errno(ret);
3001 goto out;
3002 }
3003
3004 ret = ocfs2_find_path(et->et_ci, path, cpos);
3005 if (ret) {
3006 mlog_errno(ret);
3007 goto out;
3008 }
3009
3010 el = path_leaf_el(path);
3011
3012 index = ocfs2_search_extent_list(el, cpos);
3013 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
3014 ocfs2_error(sb,
3015 "Inode %llu has an extent at cpos %u which can no "
3016 "longer be found.\n",
3017 (unsigned long long)ino, cpos);
3018 ret = -EROFS;
3019 goto out;
3020 }
3021
3022 ret = ocfs2_split_extent(handle, et, path, index,
3023 &replace_rec, meta_ac, dealloc);
3024 if (ret)
3025 mlog_errno(ret);
3026
3027out:
3028 ocfs2_free_path(path);
3029 return ret;
3030}
3031
3032static int ocfs2_replace_clusters(handle_t *handle,
3033 struct ocfs2_cow_context *context,
3034 u32 cpos, u32 old,
3035 u32 new, u32 len,
3036 unsigned int ext_flags)
3037{
3038 int ret;
3039 struct ocfs2_caching_info *ci = context->data_et.et_ci;
3040 u64 ino = ocfs2_metadata_cache_owner(ci);
3041
3042 mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n",
3043 (unsigned long long)ino, cpos, old, new, len, ext_flags);
3044
3045 /*If the old clusters is unwritten, no need to duplicate. */
3046 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
3047 ret = context->cow_duplicate_clusters(handle, context, cpos,
3048 old, new, len);
3049 if (ret) {
3050 mlog_errno(ret);
3051 goto out;
3052 }
3053 }
3054
3055 ret = ocfs2_clear_ext_refcount(handle, &context->data_et,
3056 cpos, new, len, ext_flags,
3057 context->meta_ac, &context->dealloc);
3058 if (ret)
3059 mlog_errno(ret);
3060out:
3061 return ret;
3062}
3063
3064static int ocfs2_cow_sync_writeback(struct super_block *sb,
3065 struct ocfs2_cow_context *context,
3066 u32 cpos, u32 num_clusters)
3067{
3068 int ret = 0;
3069 loff_t offset, end, map_end;
3070 pgoff_t page_index;
3071 struct page *page;
3072
3073 if (ocfs2_should_order_data(context->inode))
3074 return 0;
3075
3076 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
3077 end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
3078
3079 ret = filemap_fdatawrite_range(context->inode->i_mapping,
3080 offset, end - 1);
3081 if (ret < 0) {
3082 mlog_errno(ret);
3083 return ret;
3084 }
3085
3086 while (offset < end) {
3087 page_index = offset >> PAGE_CACHE_SHIFT;
3088 map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
3089 if (map_end > end)
3090 map_end = end;
3091
3092 page = grab_cache_page(context->inode->i_mapping, page_index);
3093 BUG_ON(!page);
3094
3095 wait_on_page_writeback(page);
3096 if (PageError(page)) {
3097 ret = -EIO;
3098 mlog_errno(ret);
3099 } else
3100 mark_page_accessed(page);
3101
3102 unlock_page(page);
3103 page_cache_release(page);
3104 page = NULL;
3105 offset = map_end;
3106 if (ret)
3107 break;
3108 }
3109
3110 return ret;
3111}
3112
3113static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context,
3114 u32 v_cluster, u32 *p_cluster,
3115 u32 *num_clusters,
3116 unsigned int *extent_flags)
3117{
3118 return ocfs2_get_clusters(context->inode, v_cluster, p_cluster,
3119 num_clusters, extent_flags);
3120}
3121
3122static int ocfs2_make_clusters_writable(struct super_block *sb,
3123 struct ocfs2_cow_context *context,
3124 u32 cpos, u32 p_cluster,
3125 u32 num_clusters, unsigned int e_flags)
3126{
3127 int ret, delete, index, credits = 0;
3128 u32 new_bit, new_len;
3129 unsigned int set_len;
3130 struct ocfs2_super *osb = OCFS2_SB(sb);
3131 handle_t *handle;
3132 struct buffer_head *ref_leaf_bh = NULL;
3133 struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci;
3134 struct ocfs2_refcount_rec rec;
3135
3136 mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n",
3137 cpos, p_cluster, num_clusters, e_flags);
3138
3139 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
3140 &context->data_et,
3141 ref_ci,
3142 context->ref_root_bh,
3143 &context->meta_ac,
3144 &context->data_ac, &credits);
3145 if (ret) {
3146 mlog_errno(ret);
3147 return ret;
3148 }
3149
3150 if (context->post_refcount)
3151 credits += context->post_refcount->credits;
3152
3153 credits += context->extra_credits;
3154 handle = ocfs2_start_trans(osb, credits);
3155 if (IS_ERR(handle)) {
3156 ret = PTR_ERR(handle);
3157 mlog_errno(ret);
3158 goto out;
3159 }
3160
3161 while (num_clusters) {
3162 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3163 p_cluster, num_clusters,
3164 &rec, &index, &ref_leaf_bh);
3165 if (ret) {
3166 mlog_errno(ret);
3167 goto out_commit;
3168 }
3169
3170 BUG_ON(!rec.r_refcount);
3171 set_len = min((u64)p_cluster + num_clusters,
3172 le64_to_cpu(rec.r_cpos) +
3173 le32_to_cpu(rec.r_clusters)) - p_cluster;
3174
3175 /*
3176 * There are many different situation here.
3177 * 1. If refcount == 1, remove the flag and don't COW.
3178 * 2. If refcount > 1, allocate clusters.
3179 * Here we may not allocate r_len once at a time, so continue
3180 * until we reach num_clusters.
3181 */
3182 if (le32_to_cpu(rec.r_refcount) == 1) {
3183 delete = 0;
3184 ret = ocfs2_clear_ext_refcount(handle,
3185 &context->data_et,
3186 cpos, p_cluster,
3187 set_len, e_flags,
3188 context->meta_ac,
3189 &context->dealloc);
3190 if (ret) {
3191 mlog_errno(ret);
3192 goto out_commit;
3193 }
3194 } else {
3195 delete = 1;
3196
3197 ret = __ocfs2_claim_clusters(osb, handle,
3198 context->data_ac,
3199 1, set_len,
3200 &new_bit, &new_len);
3201 if (ret) {
3202 mlog_errno(ret);
3203 goto out_commit;
3204 }
3205
3206 ret = ocfs2_replace_clusters(handle, context,
3207 cpos, p_cluster, new_bit,
3208 new_len, e_flags);
3209 if (ret) {
3210 mlog_errno(ret);
3211 goto out_commit;
3212 }
3213 set_len = new_len;
3214 }
3215
3216 ret = __ocfs2_decrease_refcount(handle, ref_ci,
3217 context->ref_root_bh,
3218 p_cluster, set_len,
3219 context->meta_ac,
3220 &context->dealloc, delete);
3221 if (ret) {
3222 mlog_errno(ret);
3223 goto out_commit;
3224 }
3225
3226 cpos += set_len;
3227 p_cluster += set_len;
3228 num_clusters -= set_len;
3229 brelse(ref_leaf_bh);
3230 ref_leaf_bh = NULL;
3231 }
3232
3233 /* handle any post_cow action. */
3234 if (context->post_refcount && context->post_refcount->func) {
3235 ret = context->post_refcount->func(context->inode, handle,
3236 context->post_refcount->para);
3237 if (ret) {
3238 mlog_errno(ret);
3239 goto out_commit;
3240 }
3241 }
3242
3243 /*
3244 * Here we should write the new page out first if we are
3245 * in write-back mode.
3246 */
3247 if (context->get_clusters == ocfs2_di_get_clusters) {
3248 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
3249 if (ret)
3250 mlog_errno(ret);
3251 }
3252
3253out_commit:
3254 ocfs2_commit_trans(osb, handle);
3255
3256out:
3257 if (context->data_ac) {
3258 ocfs2_free_alloc_context(context->data_ac);
3259 context->data_ac = NULL;
3260 }
3261 if (context->meta_ac) {
3262 ocfs2_free_alloc_context(context->meta_ac);
3263 context->meta_ac = NULL;
3264 }
3265 brelse(ref_leaf_bh);
3266
3267 return ret;
3268}
3269
3270static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
3271{
3272 int ret = 0;
3273 struct inode *inode = context->inode;
3274 u32 cow_start = context->cow_start, cow_len = context->cow_len;
3275 u32 p_cluster, num_clusters;
3276 unsigned int ext_flags;
3277 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3278
3279 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
3280 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
3281 "tree, but the feature bit is not set in the "
3282 "super block.", inode->i_ino);
3283 return -EROFS;
3284 }
3285
3286 ocfs2_init_dealloc_ctxt(&context->dealloc);
3287
3288 while (cow_len) {
3289 ret = context->get_clusters(context, cow_start, &p_cluster,
3290 &num_clusters, &ext_flags);
3291 if (ret) {
3292 mlog_errno(ret);
3293 break;
3294 }
3295
3296 BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED));
3297
3298 if (cow_len < num_clusters)
3299 num_clusters = cow_len;
3300
3301 ret = ocfs2_make_clusters_writable(inode->i_sb, context,
3302 cow_start, p_cluster,
3303 num_clusters, ext_flags);
3304 if (ret) {
3305 mlog_errno(ret);
3306 break;
3307 }
3308
3309 cow_len -= num_clusters;
3310 cow_start += num_clusters;
3311 }
3312
3313 if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
3314 ocfs2_schedule_truncate_log_flush(osb, 1);
3315 ocfs2_run_deallocs(osb, &context->dealloc);
3316 }
3317
3318 return ret;
3319}
3320
3321/*
3322 * Starting at cpos, try to CoW write_len clusters. Don't CoW
3323 * past max_cpos. This will stop when it runs into a hole or an
3324 * unrefcounted extent.
3325 */
3326static int ocfs2_refcount_cow_hunk(struct inode *inode,
3327 struct buffer_head *di_bh,
3328 u32 cpos, u32 write_len, u32 max_cpos)
3329{
3330 int ret;
3331 u32 cow_start = 0, cow_len = 0;
3332 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3333 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3334 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3335 struct buffer_head *ref_root_bh = NULL;
3336 struct ocfs2_refcount_tree *ref_tree;
3337 struct ocfs2_cow_context *context = NULL;
3338
3339 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
3340
3341 ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
3342 cpos, write_len, max_cpos,
3343 &cow_start, &cow_len);
3344 if (ret) {
3345 mlog_errno(ret);
3346 goto out;
3347 }
3348
3349 mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
3350 "cow_len %u\n", inode->i_ino,
3351 cpos, write_len, cow_start, cow_len);
3352
3353 BUG_ON(cow_len == 0);
3354
3355 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3356 if (!context) {
3357 ret = -ENOMEM;
3358 mlog_errno(ret);
3359 goto out;
3360 }
3361
3362 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
3363 1, &ref_tree, &ref_root_bh);
3364 if (ret) {
3365 mlog_errno(ret);
3366 goto out;
3367 }
3368
3369 context->inode = inode;
3370 context->cow_start = cow_start;
3371 context->cow_len = cow_len;
3372 context->ref_tree = ref_tree;
3373 context->ref_root_bh = ref_root_bh;
3374 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
3375 context->get_clusters = ocfs2_di_get_clusters;
3376
3377 ocfs2_init_dinode_extent_tree(&context->data_et,
3378 INODE_CACHE(inode), di_bh);
3379
3380 ret = ocfs2_replace_cow(context);
3381 if (ret)
3382 mlog_errno(ret);
3383
3384 /*
3385 * truncate the extent map here since no matter whether we meet with
3386 * any error during the action, we shouldn't trust cached extent map
3387 * any more.
3388 */
3389 ocfs2_extent_map_trunc(inode, cow_start);
3390
3391 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3392 brelse(ref_root_bh);
3393out:
3394 kfree(context);
3395 return ret;
3396}
3397
3398/*
3399 * CoW any and all clusters between cpos and cpos+write_len.
3400 * Don't CoW past max_cpos. If this returns successfully, all
3401 * clusters between cpos and cpos+write_len are safe to modify.
3402 */
3403int ocfs2_refcount_cow(struct inode *inode,
3404 struct buffer_head *di_bh,
3405 u32 cpos, u32 write_len, u32 max_cpos)
3406{
3407 int ret = 0;
3408 u32 p_cluster, num_clusters;
3409 unsigned int ext_flags;
3410
3411 while (write_len) {
3412 ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
3413 &num_clusters, &ext_flags);
3414 if (ret) {
3415 mlog_errno(ret);
3416 break;
3417 }
3418
3419 if (write_len < num_clusters)
3420 num_clusters = write_len;
3421
3422 if (ext_flags & OCFS2_EXT_REFCOUNTED) {
3423 ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
3424 num_clusters, max_cpos);
3425 if (ret) {
3426 mlog_errno(ret);
3427 break;
3428 }
3429 }
3430
3431 write_len -= num_clusters;
3432 cpos += num_clusters;
3433 }
3434
3435 return ret;
3436}
3437
3438static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context,
3439 u32 v_cluster, u32 *p_cluster,
3440 u32 *num_clusters,
3441 unsigned int *extent_flags)
3442{
3443 struct inode *inode = context->inode;
3444 struct ocfs2_xattr_value_root *xv = context->cow_object;
3445
3446 return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster,
3447 num_clusters, &xv->xr_list,
3448 extent_flags);
3449}
3450
3451/*
3452 * Given a xattr value root, calculate the most meta/credits we need for
3453 * refcount tree change if we truncate it to 0.
3454 */
3455int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
3456 struct ocfs2_caching_info *ref_ci,
3457 struct buffer_head *ref_root_bh,
3458 struct ocfs2_xattr_value_root *xv,
3459 int *meta_add, int *credits)
3460{
3461 int ret = 0, index, ref_blocks = 0;
3462 u32 p_cluster, num_clusters;
3463 u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters);
3464 struct ocfs2_refcount_block *rb;
3465 struct ocfs2_refcount_rec rec;
3466 struct buffer_head *ref_leaf_bh = NULL;
3467
3468 while (cpos < clusters) {
3469 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
3470 &num_clusters, &xv->xr_list,
3471 NULL);
3472 if (ret) {
3473 mlog_errno(ret);
3474 goto out;
3475 }
3476
3477 cpos += num_clusters;
3478
3479 while (num_clusters) {
3480 ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh,
3481 p_cluster, num_clusters,
3482 &rec, &index,
3483 &ref_leaf_bh);
3484 if (ret) {
3485 mlog_errno(ret);
3486 goto out;
3487 }
3488
3489 BUG_ON(!rec.r_refcount);
3490
3491 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
3492
3493 /*
3494 * We really don't know whether the other clusters is in
3495 * this refcount block or not, so just take the worst
3496 * case that all the clusters are in this block and each
3497 * one will split a refcount rec, so totally we need
3498 * clusters * 2 new refcount rec.
3499 */
3500 if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
3501 le16_to_cpu(rb->rf_records.rl_count))
3502 ref_blocks++;
3503
3504 *credits += 1;
3505 brelse(ref_leaf_bh);
3506 ref_leaf_bh = NULL;
3507
3508 if (num_clusters <= le32_to_cpu(rec.r_clusters))
3509 break;
3510 else
3511 num_clusters -= le32_to_cpu(rec.r_clusters);
3512 p_cluster += num_clusters;
3513 }
3514 }
3515
3516 *meta_add += ref_blocks;
3517 if (!ref_blocks)
3518 goto out;
3519
3520 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
3521 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
3522 *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
3523 else {
3524 struct ocfs2_extent_tree et;
3525
3526 ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh);
3527 *credits += ocfs2_calc_extend_credits(inode->i_sb,
3528 et.et_root_el,
3529 ref_blocks);
3530 }
3531
3532out:
3533 brelse(ref_leaf_bh);
3534 return ret;
3535}
3536
3537/*
3538 * Do CoW for xattr.
3539 */
3540int ocfs2_refcount_cow_xattr(struct inode *inode,
3541 struct ocfs2_dinode *di,
3542 struct ocfs2_xattr_value_buf *vb,
3543 struct ocfs2_refcount_tree *ref_tree,
3544 struct buffer_head *ref_root_bh,
3545 u32 cpos, u32 write_len,
3546 struct ocfs2_post_refcount *post)
3547{
3548 int ret;
3549 struct ocfs2_xattr_value_root *xv = vb->vb_xv;
3550 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3551 struct ocfs2_cow_context *context = NULL;
3552 u32 cow_start, cow_len;
3553
3554 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
3555
3556 ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
3557 cpos, write_len, UINT_MAX,
3558 &cow_start, &cow_len);
3559 if (ret) {
3560 mlog_errno(ret);
3561 goto out;
3562 }
3563
3564 BUG_ON(cow_len == 0);
3565
3566 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3567 if (!context) {
3568 ret = -ENOMEM;
3569 mlog_errno(ret);
3570 goto out;
3571 }
3572
3573 context->inode = inode;
3574 context->cow_start = cow_start;
3575 context->cow_len = cow_len;
3576 context->ref_tree = ref_tree;
3577 context->ref_root_bh = ref_root_bh;;
3578 context->cow_object = xv;
3579
3580 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
3581 /* We need the extra credits for duplicate_clusters by jbd. */
3582 context->extra_credits =
3583 ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len;
3584 context->get_clusters = ocfs2_xattr_value_get_clusters;
3585 context->post_refcount = post;
3586
3587 ocfs2_init_xattr_value_extent_tree(&context->data_et,
3588 INODE_CACHE(inode), vb);
3589
3590 ret = ocfs2_replace_cow(context);
3591 if (ret)
3592 mlog_errno(ret);
3593
3594out:
3595 kfree(context);
3596 return ret;
3597}
3598
3599/*
3600 * Insert a new extent into refcount tree and mark a extent rec
3601 * as refcounted in the dinode tree.
3602 */
3603int ocfs2_add_refcount_flag(struct inode *inode,
3604 struct ocfs2_extent_tree *data_et,
3605 struct ocfs2_caching_info *ref_ci,
3606 struct buffer_head *ref_root_bh,
3607 u32 cpos, u32 p_cluster, u32 num_clusters,
3608 struct ocfs2_cached_dealloc_ctxt *dealloc,
3609 struct ocfs2_post_refcount *post)
3610{
3611 int ret;
3612 handle_t *handle;
3613 int credits = 1, ref_blocks = 0;
3614 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3615 struct ocfs2_alloc_context *meta_ac = NULL;
3616
3617 ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
3618 ref_ci, ref_root_bh,
3619 p_cluster, num_clusters,
3620 &ref_blocks, &credits);
3621 if (ret) {
3622 mlog_errno(ret);
3623 goto out;
3624 }
3625
3626 mlog(0, "reserve new metadata %d, credits = %d\n",
3627 ref_blocks, credits);
3628
3629 if (ref_blocks) {
3630 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
3631 ref_blocks, &meta_ac);
3632 if (ret) {
3633 mlog_errno(ret);
3634 goto out;
3635 }
3636 }
3637
3638 if (post)
3639 credits += post->credits;
3640
3641 handle = ocfs2_start_trans(osb, credits);
3642 if (IS_ERR(handle)) {
3643 ret = PTR_ERR(handle);
3644 mlog_errno(ret);
3645 goto out;
3646 }
3647
3648 ret = ocfs2_mark_extent_refcounted(inode, data_et, handle,
3649 cpos, num_clusters, p_cluster,
3650 meta_ac, dealloc);
3651 if (ret) {
3652 mlog_errno(ret);
3653 goto out_commit;
3654 }
3655
3656 ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
3657 p_cluster, num_clusters, 0,
3658 meta_ac, dealloc);
3659 if (ret) {
3660 mlog_errno(ret);
3661 goto out_commit;
3662 }
3663
3664 if (post && post->func) {
3665 ret = post->func(inode, handle, post->para);
3666 if (ret)
3667 mlog_errno(ret);
3668 }
3669
3670out_commit:
3671 ocfs2_commit_trans(osb, handle);
3672out:
3673 if (meta_ac)
3674 ocfs2_free_alloc_context(meta_ac);
3675 return ret;
3676}
3677
3678static int ocfs2_change_ctime(struct inode *inode,
3679 struct buffer_head *di_bh)
3680{
3681 int ret;
3682 handle_t *handle;
3683 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3684
3685 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
3686 OCFS2_INODE_UPDATE_CREDITS);
3687 if (IS_ERR(handle)) {
3688 ret = PTR_ERR(handle);
3689 mlog_errno(ret);
3690 goto out;
3691 }
3692
3693 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
3694 OCFS2_JOURNAL_ACCESS_WRITE);
3695 if (ret) {
3696 mlog_errno(ret);
3697 goto out_commit;
3698 }
3699
3700 inode->i_ctime = CURRENT_TIME;
3701 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3702 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3703
3704 ocfs2_journal_dirty(handle, di_bh);
3705
3706out_commit:
3707 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
3708out:
3709 return ret;
3710}
3711
3712static int ocfs2_attach_refcount_tree(struct inode *inode,
3713 struct buffer_head *di_bh)
3714{
3715 int ret, data_changed = 0;
3716 struct buffer_head *ref_root_bh = NULL;
3717 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3718 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3719 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3720 struct ocfs2_refcount_tree *ref_tree;
3721 unsigned int ext_flags;
3722 loff_t size;
3723 u32 cpos, num_clusters, clusters, p_cluster;
3724 struct ocfs2_cached_dealloc_ctxt dealloc;
3725 struct ocfs2_extent_tree di_et;
3726
3727 ocfs2_init_dealloc_ctxt(&dealloc);
3728
3729 if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) {
3730 ret = ocfs2_create_refcount_tree(inode, di_bh);
3731 if (ret) {
3732 mlog_errno(ret);
3733 goto out;
3734 }
3735 }
3736
3737 BUG_ON(!di->i_refcount_loc);
3738 ret = ocfs2_lock_refcount_tree(osb,
3739 le64_to_cpu(di->i_refcount_loc), 1,
3740 &ref_tree, &ref_root_bh);
3741 if (ret) {
3742 mlog_errno(ret);
3743 goto out;
3744 }
3745
3746 ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh);
3747
3748 size = i_size_read(inode);
3749 clusters = ocfs2_clusters_for_bytes(inode->i_sb, size);
3750
3751 cpos = 0;
3752 while (cpos < clusters) {
3753 ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
3754 &num_clusters, &ext_flags);
3755
3756 if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) {
3757 ret = ocfs2_add_refcount_flag(inode, &di_et,
3758 &ref_tree->rf_ci,
3759 ref_root_bh, cpos,
3760 p_cluster, num_clusters,
3761 &dealloc, NULL);
3762 if (ret) {
3763 mlog_errno(ret);
3764 goto unlock;
3765 }
3766
3767 data_changed = 1;
3768 }
3769 cpos += num_clusters;
3770 }
3771
3772 if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
3773 ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh,
3774 &ref_tree->rf_ci,
3775 ref_root_bh,
3776 &dealloc);
3777 if (ret) {
3778 mlog_errno(ret);
3779 goto unlock;
3780 }
3781 }
3782
3783 if (data_changed) {
3784 ret = ocfs2_change_ctime(inode, di_bh);
3785 if (ret)
3786 mlog_errno(ret);
3787 }
3788
3789unlock:
3790 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3791 brelse(ref_root_bh);
3792
3793 if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) {
3794 ocfs2_schedule_truncate_log_flush(osb, 1);
3795 ocfs2_run_deallocs(osb, &dealloc);
3796 }
3797out:
3798 /*
3799 * Empty the extent map so that we may get the right extent
3800 * record from the disk.
3801 */
3802 ocfs2_extent_map_trunc(inode, 0);
3803
3804 return ret;
3805}
3806
3807static int ocfs2_add_refcounted_extent(struct inode *inode,
3808 struct ocfs2_extent_tree *et,
3809 struct ocfs2_caching_info *ref_ci,
3810 struct buffer_head *ref_root_bh,
3811 u32 cpos, u32 p_cluster, u32 num_clusters,
3812 unsigned int ext_flags,
3813 struct ocfs2_cached_dealloc_ctxt *dealloc)
3814{
3815 int ret;
3816 handle_t *handle;
3817 int credits = 0;
3818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3819 struct ocfs2_alloc_context *meta_ac = NULL;
3820
3821 ret = ocfs2_lock_refcount_allocators(inode->i_sb,
3822 p_cluster, num_clusters,
3823 et, ref_ci,
3824 ref_root_bh, &meta_ac,
3825 NULL, &credits);
3826 if (ret) {
3827 mlog_errno(ret);
3828 goto out;
3829 }
3830
3831 handle = ocfs2_start_trans(osb, credits);
3832 if (IS_ERR(handle)) {
3833 ret = PTR_ERR(handle);
3834 mlog_errno(ret);
3835 goto out;
3836 }
3837
3838 ret = ocfs2_insert_extent(handle, et, cpos,
3839 cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
3840 p_cluster)),
3841 num_clusters, ext_flags, meta_ac);
3842 if (ret) {
3843 mlog_errno(ret);
3844 goto out_commit;
3845 }
3846
3847 ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
3848 p_cluster, num_clusters,
3849 meta_ac, dealloc);
3850 if (ret)
3851 mlog_errno(ret);
3852
3853out_commit:
3854 ocfs2_commit_trans(osb, handle);
3855out:
3856 if (meta_ac)
3857 ocfs2_free_alloc_context(meta_ac);
3858 return ret;
3859}
3860
3861static int ocfs2_duplicate_extent_list(struct inode *s_inode,
3862 struct inode *t_inode,
3863 struct buffer_head *t_bh,
3864 struct ocfs2_caching_info *ref_ci,
3865 struct buffer_head *ref_root_bh,
3866 struct ocfs2_cached_dealloc_ctxt *dealloc)
3867{
3868 int ret = 0;
3869 u32 p_cluster, num_clusters, clusters, cpos;
3870 loff_t size;
3871 unsigned int ext_flags;
3872 struct ocfs2_extent_tree et;
3873
3874 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh);
3875
3876 size = i_size_read(s_inode);
3877 clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size);
3878
3879 cpos = 0;
3880 while (cpos < clusters) {
3881 ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
3882 &num_clusters, &ext_flags);
3883
3884 if (p_cluster) {
3885 ret = ocfs2_add_refcounted_extent(t_inode, &et,
3886 ref_ci, ref_root_bh,
3887 cpos, p_cluster,
3888 num_clusters,
3889 ext_flags,
3890 dealloc);
3891 if (ret) {
3892 mlog_errno(ret);
3893 goto out;
3894 }
3895 }
3896
3897 cpos += num_clusters;
3898 }
3899
3900out:
3901 return ret;
3902}
3903
3904/*
3905 * change the new file's attributes to the src.
3906 *
3907 * reflink creates a snapshot of a file, that means the attributes
3908 * must be identical except for three exceptions - nlink, ino, and ctime.
3909 */
3910static int ocfs2_complete_reflink(struct inode *s_inode,
3911 struct buffer_head *s_bh,
3912 struct inode *t_inode,
3913 struct buffer_head *t_bh,
3914 bool preserve)
3915{
3916 int ret;
3917 handle_t *handle;
3918 struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data;
3919 struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data;
3920 loff_t size = i_size_read(s_inode);
3921
3922 handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb),
3923 OCFS2_INODE_UPDATE_CREDITS);
3924 if (IS_ERR(handle)) {
3925 ret = PTR_ERR(handle);
3926 mlog_errno(ret);
3927 return ret;
3928 }
3929
3930 ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh,
3931 OCFS2_JOURNAL_ACCESS_WRITE);
3932 if (ret) {
3933 mlog_errno(ret);
3934 goto out_commit;
3935 }
3936
3937 spin_lock(&OCFS2_I(t_inode)->ip_lock);
3938 OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters;
3939 OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr;
3940 OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
3941 spin_unlock(&OCFS2_I(t_inode)->ip_lock);
3942 i_size_write(t_inode, size);
3943
3944 di->i_xattr_inline_size = s_di->i_xattr_inline_size;
3945 di->i_clusters = s_di->i_clusters;
3946 di->i_size = s_di->i_size;
3947 di->i_dyn_features = s_di->i_dyn_features;
3948 di->i_attr = s_di->i_attr;
3949
3950 if (preserve) {
3951 di->i_uid = s_di->i_uid;
3952 di->i_gid = s_di->i_gid;
3953 di->i_mode = s_di->i_mode;
3954
3955 /*
3956 * update time.
3957 * we want mtime to appear identical to the source and
3958 * update ctime.
3959 */
3960 t_inode->i_ctime = CURRENT_TIME;
3961
3962 di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
3963 di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
3964
3965 t_inode->i_mtime = s_inode->i_mtime;
3966 di->i_mtime = s_di->i_mtime;
3967 di->i_mtime_nsec = s_di->i_mtime_nsec;
3968 }
3969
3970 ocfs2_journal_dirty(handle, t_bh);
3971
3972out_commit:
3973 ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle);
3974 return ret;
3975}
3976
3977static int ocfs2_create_reflink_node(struct inode *s_inode,
3978 struct buffer_head *s_bh,
3979 struct inode *t_inode,
3980 struct buffer_head *t_bh,
3981 bool preserve)
3982{
3983 int ret;
3984 struct buffer_head *ref_root_bh = NULL;
3985 struct ocfs2_cached_dealloc_ctxt dealloc;
3986 struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
3987 struct ocfs2_refcount_block *rb;
3988 struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
3989 struct ocfs2_refcount_tree *ref_tree;
3990
3991 ocfs2_init_dealloc_ctxt(&dealloc);
3992
3993 ret = ocfs2_set_refcount_tree(t_inode, t_bh,
3994 le64_to_cpu(di->i_refcount_loc));
3995 if (ret) {
3996 mlog_errno(ret);
3997 goto out;
3998 }
3999
4000 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
4001 1, &ref_tree, &ref_root_bh);
4002 if (ret) {
4003 mlog_errno(ret);
4004 goto out;
4005 }
4006 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
4007
4008 ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
4009 &ref_tree->rf_ci, ref_root_bh,
4010 &dealloc);
4011 if (ret) {
4012 mlog_errno(ret);
4013 goto out_unlock_refcount;
4014 }
4015
4016 ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh, preserve);
4017 if (ret)
4018 mlog_errno(ret);
4019
4020out_unlock_refcount:
4021 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
4022 brelse(ref_root_bh);
4023out:
4024 if (ocfs2_dealloc_has_cluster(&dealloc)) {
4025 ocfs2_schedule_truncate_log_flush(osb, 1);
4026 ocfs2_run_deallocs(osb, &dealloc);
4027 }
4028
4029 return ret;
4030}
4031
4032static int __ocfs2_reflink(struct dentry *old_dentry,
4033 struct buffer_head *old_bh,
4034 struct inode *new_inode,
4035 bool preserve)
4036{
4037 int ret;
4038 struct inode *inode = old_dentry->d_inode;
4039 struct buffer_head *new_bh = NULL;
4040
4041 ret = filemap_fdatawrite(inode->i_mapping);
4042 if (ret) {
4043 mlog_errno(ret);
4044 goto out;
4045 }
4046
4047 ret = ocfs2_attach_refcount_tree(inode, old_bh);
4048 if (ret) {
4049 mlog_errno(ret);
4050 goto out;
4051 }
4052
4053 mutex_lock(&new_inode->i_mutex);
4054 ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
4055 if (ret) {
4056 mlog_errno(ret);
4057 goto out_unlock;
4058 }
4059
4060 ret = ocfs2_create_reflink_node(inode, old_bh,
4061 new_inode, new_bh, preserve);
4062 if (ret) {
4063 mlog_errno(ret);
4064 goto inode_unlock;
4065 }
4066
4067 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
4068 ret = ocfs2_reflink_xattrs(inode, old_bh,
4069 new_inode, new_bh,
4070 preserve);
4071 if (ret)
4072 mlog_errno(ret);
4073 }
4074inode_unlock:
4075 ocfs2_inode_unlock(new_inode, 1);
4076 brelse(new_bh);
4077out_unlock:
4078 mutex_unlock(&new_inode->i_mutex);
4079out:
4080 if (!ret) {
4081 ret = filemap_fdatawait(inode->i_mapping);
4082 if (ret)
4083 mlog_errno(ret);
4084 }
4085 return ret;
4086}
4087
4088static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4089 struct dentry *new_dentry, bool preserve)
4090{
4091 int error;
4092 struct inode *inode = old_dentry->d_inode;
4093 struct buffer_head *old_bh = NULL;
4094 struct inode *new_orphan_inode = NULL;
4095
4096 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
4097 return -EOPNOTSUPP;
4098
4099 error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
4100 &new_orphan_inode);
4101 if (error) {
4102 mlog_errno(error);
4103 goto out;
4104 }
4105
4106 error = ocfs2_inode_lock(inode, &old_bh, 1);
4107 if (error) {
4108 mlog_errno(error);
4109 goto out;
4110 }
4111
4112 down_write(&OCFS2_I(inode)->ip_xattr_sem);
4113 down_write(&OCFS2_I(inode)->ip_alloc_sem);
4114 error = __ocfs2_reflink(old_dentry, old_bh,
4115 new_orphan_inode, preserve);
4116 up_write(&OCFS2_I(inode)->ip_alloc_sem);
4117 up_write(&OCFS2_I(inode)->ip_xattr_sem);
4118
4119 ocfs2_inode_unlock(inode, 1);
4120 brelse(old_bh);
4121
4122 if (error) {
4123 mlog_errno(error);
4124 goto out;
4125 }
4126
4127 /* If the security isn't preserved, we need to re-initialize them. */
4128 if (!preserve) {
4129 error = ocfs2_init_security_and_acl(dir, new_orphan_inode);
4130 if (error)
4131 mlog_errno(error);
4132 }
4133out:
4134 if (!error) {
4135 error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
4136 new_dentry);
4137 if (error)
4138 mlog_errno(error);
4139 }
4140
4141 if (new_orphan_inode) {
4142 /*
4143 * We need to open_unlock the inode no matter whether we
4144 * succeed or not, so that other nodes can delete it later.
4145 */
4146 ocfs2_open_unlock(new_orphan_inode);
4147 if (error)
4148 iput(new_orphan_inode);
4149 }
4150
4151 return error;
4152}
4153
4154/*
4155 * Below here are the bits used by OCFS2_IOC_REFLINK() to fake
4156 * sys_reflink(). This will go away when vfs_reflink() exists in
4157 * fs/namei.c.
4158 */
4159
4160/* copied from may_create in VFS. */
4161static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
4162{
4163 if (child->d_inode)
4164 return -EEXIST;
4165 if (IS_DEADDIR(dir))
4166 return -ENOENT;
4167 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
4168}
4169
4170/* copied from user_path_parent. */
4171static int ocfs2_user_path_parent(const char __user *path,
4172 struct nameidata *nd, char **name)
4173{
4174 char *s = getname(path);
4175 int error;
4176
4177 if (IS_ERR(s))
4178 return PTR_ERR(s);
4179
4180 error = path_lookup(s, LOOKUP_PARENT, nd);
4181 if (error)
4182 putname(s);
4183 else
4184 *name = s;
4185
4186 return error;
4187}
4188
4189/**
4190 * ocfs2_vfs_reflink - Create a reference-counted link
4191 *
4192 * @old_dentry: source dentry + inode
4193 * @dir: directory to create the target
4194 * @new_dentry: target dentry
4195 * @preserve: if true, preserve all file attributes
4196 */
4197int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4198 struct dentry *new_dentry, bool preserve)
4199{
4200 struct inode *inode = old_dentry->d_inode;
4201 int error;
4202
4203 if (!inode)
4204 return -ENOENT;
4205
4206 error = ocfs2_may_create(dir, new_dentry);
4207 if (error)
4208 return error;
4209
4210 if (dir->i_sb != inode->i_sb)
4211 return -EXDEV;
4212
4213 /*
4214 * A reflink to an append-only or immutable file cannot be created.
4215 */
4216 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4217 return -EPERM;
4218
4219 /* Only regular files can be reflinked. */
4220 if (!S_ISREG(inode->i_mode))
4221 return -EPERM;
4222
4223 /*
4224 * If the caller wants to preserve ownership, they require the
4225 * rights to do so.
4226 */
4227 if (preserve) {
4228 if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN))
4229 return -EPERM;
4230 if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
4231 return -EPERM;
4232 }
4233
4234 /*
4235 * If the caller is modifying any aspect of the attributes, they
4236 * are not creating a snapshot. They need read permission on the
4237 * file.
4238 */
4239 if (!preserve) {
4240 error = inode_permission(inode, MAY_READ);
4241 if (error)
4242 return error;
4243 }
4244
4245 mutex_lock(&inode->i_mutex);
4246 vfs_dq_init(dir);
4247 error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
4248 mutex_unlock(&inode->i_mutex);
4249 if (!error)
4250 fsnotify_create(dir, new_dentry);
4251 return error;
4252}
4253/*
4254 * Most codes are copied from sys_linkat.
4255 */
4256int ocfs2_reflink_ioctl(struct inode *inode,
4257 const char __user *oldname,
4258 const char __user *newname,
4259 bool preserve)
4260{
4261 struct dentry *new_dentry;
4262 struct nameidata nd;
4263 struct path old_path;
4264 int error;
4265 char *to = NULL;
4266
4267 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
4268 return -EOPNOTSUPP;
4269
4270 error = user_path_at(AT_FDCWD, oldname, 0, &old_path);
4271 if (error) {
4272 mlog_errno(error);
4273 return error;
4274 }
4275
4276 error = ocfs2_user_path_parent(newname, &nd, &to);
4277 if (error) {
4278 mlog_errno(error);
4279 goto out;
4280 }
4281
4282 error = -EXDEV;
4283 if (old_path.mnt != nd.path.mnt)
4284 goto out_release;
4285 new_dentry = lookup_create(&nd, 0);
4286 error = PTR_ERR(new_dentry);
4287 if (IS_ERR(new_dentry)) {
4288 mlog_errno(error);
4289 goto out_unlock;
4290 }
4291
4292 error = mnt_want_write(nd.path.mnt);
4293 if (error) {
4294 mlog_errno(error);
4295 goto out_dput;
4296 }
4297
4298 error = ocfs2_vfs_reflink(old_path.dentry,
4299 nd.path.dentry->d_inode,
4300 new_dentry, preserve);
4301 mnt_drop_write(nd.path.mnt);
4302out_dput:
4303 dput(new_dentry);
4304out_unlock:
4305 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
4306out_release:
4307 path_put(&nd.path);
4308 putname(to);
4309out:
4310 path_put(&old_path);
4311
4312 return error;
4313}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
new file mode 100644
index 000000000000..c1d19b1d3ecc
--- /dev/null
+++ b/fs/ocfs2/refcounttree.h
@@ -0,0 +1,106 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * refcounttree.h
5 *
6 * Copyright (C) 2009 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17#ifndef OCFS2_REFCOUNTTREE_H
18#define OCFS2_REFCOUNTTREE_H
19
20struct ocfs2_refcount_tree {
21 struct rb_node rf_node;
22 u64 rf_blkno;
23 u32 rf_generation;
24 struct rw_semaphore rf_sem;
25 struct ocfs2_lock_res rf_lockres;
26 struct kref rf_getcnt;
27 int rf_removed;
28
29 /* the following 4 fields are used by caching_info. */
30 struct ocfs2_caching_info rf_ci;
31 spinlock_t rf_lock;
32 struct mutex rf_io_mutex;
33 struct super_block *rf_sb;
34};
35
36void ocfs2_purge_refcount_trees(struct ocfs2_super *osb);
37int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw,
38 struct ocfs2_refcount_tree **tree,
39 struct buffer_head **ref_bh);
40void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
41 struct ocfs2_refcount_tree *tree,
42 int rw);
43
44int ocfs2_decrease_refcount(struct inode *inode,
45 handle_t *handle, u32 cpos, u32 len,
46 struct ocfs2_alloc_context *meta_ac,
47 struct ocfs2_cached_dealloc_ctxt *dealloc,
48 int delete);
49int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
50 struct buffer_head *di_bh,
51 u64 phys_blkno,
52 u32 clusters,
53 int *credits,
54 struct ocfs2_alloc_context **meta_ac);
55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
56 u32 cpos, u32 write_len, u32 max_cpos);
57
58typedef int (ocfs2_post_refcount_func)(struct inode *inode,
59 handle_t *handle,
60 void *para);
61/*
62 * Some refcount caller need to do more work after we modify the data b-tree
63 * during refcount operation(including CoW and add refcount flag), and make the
64 * transaction complete. So it must give us this structure so that we can do it
65 * within our transaction.
66 *
67 */
68struct ocfs2_post_refcount {
69 int credits; /* credits it need for journal. */
70 ocfs2_post_refcount_func *func; /* real function. */
71 void *para;
72};
73
74int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
75 struct ocfs2_caching_info *ref_ci,
76 struct buffer_head *ref_root_bh,
77 struct ocfs2_xattr_value_root *xv,
78 int *meta_add, int *credits);
79int ocfs2_refcount_cow_xattr(struct inode *inode,
80 struct ocfs2_dinode *di,
81 struct ocfs2_xattr_value_buf *vb,
82 struct ocfs2_refcount_tree *ref_tree,
83 struct buffer_head *ref_root_bh,
84 u32 cpos, u32 write_len,
85 struct ocfs2_post_refcount *post);
86int ocfs2_add_refcount_flag(struct inode *inode,
87 struct ocfs2_extent_tree *data_et,
88 struct ocfs2_caching_info *ref_ci,
89 struct buffer_head *ref_root_bh,
90 u32 cpos, u32 p_cluster, u32 num_clusters,
91 struct ocfs2_cached_dealloc_ctxt *dealloc,
92 struct ocfs2_post_refcount *post);
93int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh);
94int ocfs2_try_remove_refcount_tree(struct inode *inode,
95 struct buffer_head *di_bh);
96int ocfs2_increase_refcount(handle_t *handle,
97 struct ocfs2_caching_info *ci,
98 struct buffer_head *ref_root_bh,
99 u64 cpos, u32 len,
100 struct ocfs2_alloc_context *meta_ac,
101 struct ocfs2_cached_dealloc_ctxt *dealloc);
102int ocfs2_reflink_ioctl(struct inode *inode,
103 const char __user *oldname,
104 const char __user *newname,
105 bool preserve);
106#endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 424adaa5f900..3c3d673a4d20 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", 106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
107 new_clusters, first_new_cluster); 107 new_clusters, first_new_cluster);
108 108
109 ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh, 109 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
110 OCFS2_JOURNAL_ACCESS_WRITE); 110 group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
111 if (ret < 0) { 111 if (ret < 0) {
112 mlog_errno(ret); 112 mlog_errno(ret);
113 goto out; 113 goto out;
@@ -141,7 +141,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
141 } 141 }
142 142
143 /* update the inode accordingly. */ 143 /* update the inode accordingly. */
144 ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh, 144 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
145 OCFS2_JOURNAL_ACCESS_WRITE); 145 OCFS2_JOURNAL_ACCESS_WRITE);
146 if (ret < 0) { 146 if (ret < 0) {
147 mlog_errno(ret); 147 mlog_errno(ret);
@@ -514,7 +514,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
514 goto out_unlock; 514 goto out_unlock;
515 } 515 }
516 516
517 ocfs2_set_new_buffer_uptodate(inode, group_bh); 517 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh);
518 518
519 ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); 519 ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
520 if (ret) { 520 if (ret) {
@@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
536 cl = &fe->id2.i_chain; 536 cl = &fe->id2.i_chain;
537 cr = &cl->cl_recs[input->chain]; 537 cr = &cl->cl_recs[input->chain];
538 538
539 ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh, 539 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode),
540 OCFS2_JOURNAL_ACCESS_WRITE); 540 group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
541 if (ret < 0) { 541 if (ret < 0) {
542 mlog_errno(ret); 542 mlog_errno(ret);
543 goto out_commit; 543 goto out_commit;
@@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
552 goto out_commit; 552 goto out_commit;
553 } 553 }
554 554
555 ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh, 555 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
556 OCFS2_JOURNAL_ACCESS_WRITE); 556 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
557 if (ret < 0) { 557 if (ret < 0) {
558 mlog_errno(ret); 558 mlog_errno(ret);
559 goto out_commit; 559 goto out_commit;
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 40661e7824e9..bfbd7e9e949f 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If 150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
151 * this is not true, the read of -1 (UINT64_MAX) will fail. 151 * this is not true, the read of -1 (UINT64_MAX) will fail.
152 */ 152 */
153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, 153 ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
154 OCFS2_BH_IGNORE_CACHE, NULL); 154 si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
155 if (ret == 0) { 155 if (ret == 0) {
156 spin_lock(&osb->osb_lock); 156 spin_lock(&osb->osb_lock);
157 ocfs2_update_slot_info(si); 157 ocfs2_update_slot_info(si);
@@ -213,7 +213,7 @@ static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
213 ocfs2_update_disk_slot_old(si, slot_num, &bh); 213 ocfs2_update_disk_slot_old(si, slot_num, &bh);
214 spin_unlock(&osb->osb_lock); 214 spin_unlock(&osb->osb_lock);
215 215
216 status = ocfs2_write_block(osb, bh, si->si_inode); 216 status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
217 if (status < 0) 217 if (status < 0)
218 mlog_errno(status); 218 mlog_errno(status);
219 219
@@ -404,8 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
404 (unsigned long long)blkno); 404 (unsigned long long)blkno);
405 405
406 bh = NULL; /* Acquire a fresh bh */ 406 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, 407 status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
408 OCFS2_BH_IGNORE_CACHE, NULL); 408 1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
409 if (status < 0) { 409 if (status < 0) {
410 mlog_errno(status); 410 mlog_errno(status);
411 goto bail; 411 goto bail;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 73a16d4666dc..c30b644d9572 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -310,7 +310,7 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
310 int rc; 310 int rc;
311 struct buffer_head *tmp = *bh; 311 struct buffer_head *tmp = *bh;
312 312
313 rc = ocfs2_read_block(inode, gd_blkno, &tmp, 313 rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
314 ocfs2_validate_group_descriptor); 314 ocfs2_validate_group_descriptor);
315 if (rc) 315 if (rc)
316 goto out; 316 goto out;
@@ -352,7 +352,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
352 } 352 }
353 353
354 status = ocfs2_journal_access_gd(handle, 354 status = ocfs2_journal_access_gd(handle,
355 alloc_inode, 355 INODE_CACHE(alloc_inode),
356 bg_bh, 356 bg_bh,
357 OCFS2_JOURNAL_ACCESS_CREATE); 357 OCFS2_JOURNAL_ACCESS_CREATE);
358 if (status < 0) { 358 if (status < 0) {
@@ -476,7 +476,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
476 mlog_errno(status); 476 mlog_errno(status);
477 goto bail; 477 goto bail;
478 } 478 }
479 ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); 479 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
480 480
481 status = ocfs2_block_group_fill(handle, 481 status = ocfs2_block_group_fill(handle,
482 alloc_inode, 482 alloc_inode,
@@ -491,7 +491,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
491 491
492 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 492 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
493 493
494 status = ocfs2_journal_access_di(handle, alloc_inode, 494 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
495 bh, OCFS2_JOURNAL_ACCESS_WRITE); 495 bh, OCFS2_JOURNAL_ACCESS_WRITE);
496 if (status < 0) { 496 if (status < 0) {
497 mlog_errno(status); 497 mlog_errno(status);
@@ -1033,7 +1033,7 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1033 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1033 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1034 1034
1035 status = ocfs2_journal_access_gd(handle, 1035 status = ocfs2_journal_access_gd(handle,
1036 alloc_inode, 1036 INODE_CACHE(alloc_inode),
1037 group_bh, 1037 group_bh,
1038 journal_type); 1038 journal_type);
1039 if (status < 0) { 1039 if (status < 0) {
@@ -1106,7 +1106,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1106 bg_ptr = le64_to_cpu(bg->bg_next_group); 1106 bg_ptr = le64_to_cpu(bg->bg_next_group);
1107 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 1107 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
1108 1108
1109 status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh, 1109 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1110 prev_bg_bh,
1110 OCFS2_JOURNAL_ACCESS_WRITE); 1111 OCFS2_JOURNAL_ACCESS_WRITE);
1111 if (status < 0) { 1112 if (status < 0) {
1112 mlog_errno(status); 1113 mlog_errno(status);
@@ -1121,8 +1122,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1121 goto out_rollback; 1122 goto out_rollback;
1122 } 1123 }
1123 1124
1124 status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh, 1125 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1125 OCFS2_JOURNAL_ACCESS_WRITE); 1126 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1126 if (status < 0) { 1127 if (status < 0) {
1127 mlog_errno(status); 1128 mlog_errno(status);
1128 goto out_rollback; 1129 goto out_rollback;
@@ -1136,8 +1137,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1136 goto out_rollback; 1137 goto out_rollback;
1137 } 1138 }
1138 1139
1139 status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh, 1140 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1140 OCFS2_JOURNAL_ACCESS_WRITE); 1141 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1141 if (status < 0) { 1142 if (status < 0) {
1142 mlog_errno(status); 1143 mlog_errno(status);
1143 goto out_rollback; 1144 goto out_rollback;
@@ -1288,7 +1289,7 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1288 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 1289 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1289 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; 1290 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1290 1291
1291 ret = ocfs2_journal_access_di(handle, inode, di_bh, 1292 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1292 OCFS2_JOURNAL_ACCESS_WRITE); 1293 OCFS2_JOURNAL_ACCESS_WRITE);
1293 if (ret < 0) { 1294 if (ret < 0) {
1294 mlog_errno(ret); 1295 mlog_errno(ret);
@@ -1461,7 +1462,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1461 /* Ok, claim our bits now: set the info on dinode, chainlist 1462 /* Ok, claim our bits now: set the info on dinode, chainlist
1462 * and then the group */ 1463 * and then the group */
1463 status = ocfs2_journal_access_di(handle, 1464 status = ocfs2_journal_access_di(handle,
1464 alloc_inode, 1465 INODE_CACHE(alloc_inode),
1465 ac->ac_bh, 1466 ac->ac_bh,
1466 OCFS2_JOURNAL_ACCESS_WRITE); 1467 OCFS2_JOURNAL_ACCESS_WRITE);
1467 if (status < 0) { 1468 if (status < 0) {
@@ -1907,8 +1908,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1907 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1908 if (ocfs2_is_cluster_bitmap(alloc_inode))
1908 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1909 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1909 1910
1910 status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh, 1911 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1911 journal_type); 1912 group_bh, journal_type);
1912 if (status < 0) { 1913 if (status < 0) {
1913 mlog_errno(status); 1914 mlog_errno(status);
1914 goto bail; 1915 goto bail;
@@ -1993,8 +1994,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1993 goto bail; 1994 goto bail;
1994 } 1995 }
1995 1996
1996 status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh, 1997 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1997 OCFS2_JOURNAL_ACCESS_WRITE); 1998 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1998 if (status < 0) { 1999 if (status < 0) {
1999 mlog_errno(status); 2000 mlog_errno(status);
2000 goto bail; 2001 goto bail;
@@ -2151,7 +2152,7 @@ int ocfs2_lock_allocators(struct inode *inode,
2151 2152
2152 BUG_ON(clusters_to_add != 0 && data_ac == NULL); 2153 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
2153 2154
2154 num_free_extents = ocfs2_num_free_extents(osb, inode, et); 2155 num_free_extents = ocfs2_num_free_extents(osb, et);
2155 if (num_free_extents < 0) { 2156 if (num_free_extents < 0) {
2156 ret = num_free_extents; 2157 ret = num_free_extents;
2157 mlog_errno(ret); 2158 mlog_errno(ret);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a3f8871d21fd..24feb449a1dc 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -69,6 +69,7 @@
69#include "ver.h" 69#include "ver.h"
70#include "xattr.h" 70#include "xattr.h"
71#include "quota.h" 71#include "quota.h"
72#include "refcounttree.h"
72 73
73#include "buffer_head_io.h" 74#include "buffer_head_io.h"
74 75
@@ -965,7 +966,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
965 return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); 966 return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
966} 967}
967 968
968static struct quotactl_ops ocfs2_quotactl_ops = { 969static const struct quotactl_ops ocfs2_quotactl_ops = {
969 .quota_on = ocfs2_quota_on, 970 .quota_on = ocfs2_quota_on,
970 .quota_off = ocfs2_quota_off, 971 .quota_off = ocfs2_quota_off,
971 .quota_sync = vfs_quota_sync, 972 .quota_sync = vfs_quota_sync,
@@ -1668,8 +1669,6 @@ static void ocfs2_inode_init_once(void *data)
1668 spin_lock_init(&oi->ip_lock); 1669 spin_lock_init(&oi->ip_lock);
1669 ocfs2_extent_map_init(&oi->vfs_inode); 1670 ocfs2_extent_map_init(&oi->vfs_inode);
1670 INIT_LIST_HEAD(&oi->ip_io_markers); 1671 INIT_LIST_HEAD(&oi->ip_io_markers);
1671 oi->ip_created_trans = 0;
1672 oi->ip_last_trans = 0;
1673 oi->ip_dir_start_lookup = 0; 1672 oi->ip_dir_start_lookup = 0;
1674 1673
1675 init_rwsem(&oi->ip_alloc_sem); 1674 init_rwsem(&oi->ip_alloc_sem);
@@ -1683,7 +1682,8 @@ static void ocfs2_inode_init_once(void *data)
1683 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1682 ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
1684 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1683 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
1685 1684
1686 ocfs2_metadata_cache_init(&oi->vfs_inode); 1685 ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
1686 &ocfs2_inode_caching_ops);
1687 1687
1688 inode_init_once(&oi->vfs_inode); 1688 inode_init_once(&oi->vfs_inode);
1689} 1689}
@@ -1859,6 +1859,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1859 1859
1860 ocfs2_sync_blockdev(sb); 1860 ocfs2_sync_blockdev(sb);
1861 1861
1862 ocfs2_purge_refcount_trees(osb);
1863
1862 /* No cluster connection means we've failed during mount, so skip 1864 /* No cluster connection means we've failed during mount, so skip
1863 * all the steps which depended on that to complete. */ 1865 * all the steps which depended on that to complete. */
1864 if (osb->cconn) { 1866 if (osb->cconn) {
@@ -2065,6 +2067,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2065 goto bail; 2067 goto bail;
2066 } 2068 }
2067 2069
2070 osb->osb_rf_lock_tree = RB_ROOT;
2071
2068 osb->s_feature_compat = 2072 osb->s_feature_compat =
2069 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); 2073 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
2070 osb->s_feature_ro_compat = 2074 osb->s_feature_ro_compat =
@@ -2490,7 +2494,8 @@ void __ocfs2_abort(struct super_block* sb,
2490 /* Force a panic(). This stinks, but it's better than letting 2494 /* Force a panic(). This stinks, but it's better than letting
2491 * things continue without having a proper hard readonly 2495 * things continue without having a proper hard readonly
2492 * here. */ 2496 * here. */
2493 OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; 2497 if (!ocfs2_mount_local(OCFS2_SB(sb)))
2498 OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
2494 ocfs2_handle_error(sb); 2499 ocfs2_handle_error(sb);
2495} 2500}
2496 2501
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 187b99ff0368..b6284f235d2f 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,15 +75,77 @@ struct ocfs2_meta_cache_item {
75 75
76static struct kmem_cache *ocfs2_uptodate_cachep = NULL; 76static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
77 77
78void ocfs2_metadata_cache_init(struct inode *inode) 78u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
79{ 79{
80 struct ocfs2_inode_info *oi = OCFS2_I(inode); 80 BUG_ON(!ci || !ci->ci_ops);
81 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
82 81
83 oi->ip_flags |= OCFS2_INODE_CACHE_INLINE; 82 return ci->ci_ops->co_owner(ci);
83}
84
85struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci)
86{
87 BUG_ON(!ci || !ci->ci_ops);
88
89 return ci->ci_ops->co_get_super(ci);
90}
91
92static void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci)
93{
94 BUG_ON(!ci || !ci->ci_ops);
95
96 ci->ci_ops->co_cache_lock(ci);
97}
98
99static void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci)
100{
101 BUG_ON(!ci || !ci->ci_ops);
102
103 ci->ci_ops->co_cache_unlock(ci);
104}
105
106void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
107{
108 BUG_ON(!ci || !ci->ci_ops);
109
110 ci->ci_ops->co_io_lock(ci);
111}
112
113void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
114{
115 BUG_ON(!ci || !ci->ci_ops);
116
117 ci->ci_ops->co_io_unlock(ci);
118}
119
120
121static void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci,
122 int clear)
123{
124 ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
84 ci->ci_num_cached = 0; 125 ci->ci_num_cached = 0;
126
127 if (clear) {
128 ci->ci_created_trans = 0;
129 ci->ci_last_trans = 0;
130 }
131}
132
133void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
134 const struct ocfs2_caching_operations *ops)
135{
136 BUG_ON(!ops);
137
138 ci->ci_ops = ops;
139 ocfs2_metadata_cache_reset(ci, 1);
85} 140}
86 141
142void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci)
143{
144 ocfs2_metadata_cache_purge(ci);
145 ocfs2_metadata_cache_reset(ci, 1);
146}
147
148
87/* No lock taken here as 'root' is not expected to be visible to other 149/* No lock taken here as 'root' is not expected to be visible to other
88 * processes. */ 150 * processes. */
89static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root) 151static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
@@ -112,19 +174,20 @@ static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
112 * This function is a few more lines longer than necessary due to some 174 * This function is a few more lines longer than necessary due to some
113 * accounting done here, but I think it's worth tracking down those 175 * accounting done here, but I think it's worth tracking down those
114 * bugs sooner -- Mark */ 176 * bugs sooner -- Mark */
115void ocfs2_metadata_cache_purge(struct inode *inode) 177void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
116{ 178{
117 struct ocfs2_inode_info *oi = OCFS2_I(inode);
118 unsigned int tree, to_purge, purged; 179 unsigned int tree, to_purge, purged;
119 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
120 struct rb_root root = RB_ROOT; 180 struct rb_root root = RB_ROOT;
121 181
122 spin_lock(&oi->ip_lock); 182 BUG_ON(!ci || !ci->ci_ops);
123 tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE); 183
184 ocfs2_metadata_cache_lock(ci);
185 tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
124 to_purge = ci->ci_num_cached; 186 to_purge = ci->ci_num_cached;
125 187
126 mlog(0, "Purge %u %s items from Inode %llu\n", to_purge, 188 mlog(0, "Purge %u %s items from Owner %llu\n", to_purge,
127 tree ? "array" : "tree", (unsigned long long)oi->ip_blkno); 189 tree ? "array" : "tree",
190 (unsigned long long)ocfs2_metadata_cache_owner(ci));
128 191
129 /* If we're a tree, save off the root so that we can safely 192 /* If we're a tree, save off the root so that we can safely
130 * initialize the cache. We do the work to free tree members 193 * initialize the cache. We do the work to free tree members
@@ -132,16 +195,17 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
132 if (tree) 195 if (tree)
133 root = ci->ci_cache.ci_tree; 196 root = ci->ci_cache.ci_tree;
134 197
135 ocfs2_metadata_cache_init(inode); 198 ocfs2_metadata_cache_reset(ci, 0);
136 spin_unlock(&oi->ip_lock); 199 ocfs2_metadata_cache_unlock(ci);
137 200
138 purged = ocfs2_purge_copied_metadata_tree(&root); 201 purged = ocfs2_purge_copied_metadata_tree(&root);
139 /* If possible, track the number wiped so that we can more 202 /* If possible, track the number wiped so that we can more
140 * easily detect counting errors. Unfortunately, this is only 203 * easily detect counting errors. Unfortunately, this is only
141 * meaningful for trees. */ 204 * meaningful for trees. */
142 if (tree && purged != to_purge) 205 if (tree && purged != to_purge)
143 mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n", 206 mlog(ML_ERROR, "Owner %llu, count = %u, purged = %u\n",
144 (unsigned long long)oi->ip_blkno, to_purge, purged); 207 (unsigned long long)ocfs2_metadata_cache_owner(ci),
208 to_purge, purged);
145} 209}
146 210
147/* Returns the index in the cache array, -1 if not found. 211/* Returns the index in the cache array, -1 if not found.
@@ -182,27 +246,25 @@ ocfs2_search_cache_tree(struct ocfs2_caching_info *ci,
182 return NULL; 246 return NULL;
183} 247}
184 248
185static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi, 249static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
186 struct buffer_head *bh) 250 struct buffer_head *bh)
187{ 251{
188 int index = -1; 252 int index = -1;
189 struct ocfs2_meta_cache_item *item = NULL; 253 struct ocfs2_meta_cache_item *item = NULL;
190 254
191 spin_lock(&oi->ip_lock); 255 ocfs2_metadata_cache_lock(ci);
192 256
193 mlog(0, "Inode %llu, query block %llu (inline = %u)\n", 257 mlog(0, "Owner %llu, query block %llu (inline = %u)\n",
194 (unsigned long long)oi->ip_blkno, 258 (unsigned long long)ocfs2_metadata_cache_owner(ci),
195 (unsigned long long) bh->b_blocknr, 259 (unsigned long long) bh->b_blocknr,
196 !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE)); 260 !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
197 261
198 if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) 262 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
199 index = ocfs2_search_cache_array(&oi->ip_metadata_cache, 263 index = ocfs2_search_cache_array(ci, bh->b_blocknr);
200 bh->b_blocknr);
201 else 264 else
202 item = ocfs2_search_cache_tree(&oi->ip_metadata_cache, 265 item = ocfs2_search_cache_tree(ci, bh->b_blocknr);
203 bh->b_blocknr);
204 266
205 spin_unlock(&oi->ip_lock); 267 ocfs2_metadata_cache_unlock(ci);
206 268
207 mlog(0, "index = %d, item = %p\n", index, item); 269 mlog(0, "index = %d, item = %p\n", index, item);
208 270
@@ -214,7 +276,7 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
214 * 276 *
215 * This can be called under lock_buffer() 277 * This can be called under lock_buffer()
216 */ 278 */
217int ocfs2_buffer_uptodate(struct inode *inode, 279int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
218 struct buffer_head *bh) 280 struct buffer_head *bh)
219{ 281{
220 /* Doesn't matter if the bh is in our cache or not -- if it's 282 /* Doesn't matter if the bh is in our cache or not -- if it's
@@ -230,24 +292,24 @@ int ocfs2_buffer_uptodate(struct inode *inode,
230 292
231 /* Ok, locally the buffer is marked as up to date, now search 293 /* Ok, locally the buffer is marked as up to date, now search
232 * our cache to see if we can trust that. */ 294 * our cache to see if we can trust that. */
233 return ocfs2_buffer_cached(OCFS2_I(inode), bh); 295 return ocfs2_buffer_cached(ci, bh);
234} 296}
235 297
236/* 298/*
237 * Determine whether a buffer is currently out on a read-ahead request. 299 * Determine whether a buffer is currently out on a read-ahead request.
238 * ip_io_sem should be held to serialize submitters with the logic here. 300 * ci_io_sem should be held to serialize submitters with the logic here.
239 */ 301 */
240int ocfs2_buffer_read_ahead(struct inode *inode, 302int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
241 struct buffer_head *bh) 303 struct buffer_head *bh)
242{ 304{
243 return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh); 305 return buffer_locked(bh) && ocfs2_buffer_cached(ci, bh);
244} 306}
245 307
246/* Requires ip_lock */ 308/* Requires ip_lock */
247static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, 309static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
248 sector_t block) 310 sector_t block)
249{ 311{
250 BUG_ON(ci->ci_num_cached >= OCFS2_INODE_MAX_CACHE_ARRAY); 312 BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY);
251 313
252 mlog(0, "block %llu takes position %u\n", (unsigned long long) block, 314 mlog(0, "block %llu takes position %u\n", (unsigned long long) block,
253 ci->ci_num_cached); 315 ci->ci_num_cached);
@@ -292,66 +354,64 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
292 ci->ci_num_cached++; 354 ci->ci_num_cached++;
293} 355}
294 356
295static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi, 357/* co_cache_lock() must be held */
296 struct ocfs2_caching_info *ci) 358static inline int ocfs2_insert_can_use_array(struct ocfs2_caching_info *ci)
297{ 359{
298 assert_spin_locked(&oi->ip_lock); 360 return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
299 361 (ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
300 return (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) &&
301 (ci->ci_num_cached < OCFS2_INODE_MAX_CACHE_ARRAY);
302} 362}
303 363
304/* tree should be exactly OCFS2_INODE_MAX_CACHE_ARRAY wide. NULL the 364/* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the
305 * pointers in tree after we use them - this allows caller to detect 365 * pointers in tree after we use them - this allows caller to detect
306 * when to free in case of error. */ 366 * when to free in case of error.
307static void ocfs2_expand_cache(struct ocfs2_inode_info *oi, 367 *
368 * The co_cache_lock() must be held. */
369static void ocfs2_expand_cache(struct ocfs2_caching_info *ci,
308 struct ocfs2_meta_cache_item **tree) 370 struct ocfs2_meta_cache_item **tree)
309{ 371{
310 int i; 372 int i;
311 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
312 373
313 mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY, 374 mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
314 "Inode %llu, num cached = %u, should be %u\n", 375 "Owner %llu, num cached = %u, should be %u\n",
315 (unsigned long long)oi->ip_blkno, ci->ci_num_cached, 376 (unsigned long long)ocfs2_metadata_cache_owner(ci),
316 OCFS2_INODE_MAX_CACHE_ARRAY); 377 ci->ci_num_cached, OCFS2_CACHE_INFO_MAX_ARRAY);
317 mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE), 378 mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE),
318 "Inode %llu not marked as inline anymore!\n", 379 "Owner %llu not marked as inline anymore!\n",
319 (unsigned long long)oi->ip_blkno); 380 (unsigned long long)ocfs2_metadata_cache_owner(ci));
320 assert_spin_locked(&oi->ip_lock);
321 381
322 /* Be careful to initialize the tree members *first* because 382 /* Be careful to initialize the tree members *first* because
323 * once the ci_tree is used, the array is junk... */ 383 * once the ci_tree is used, the array is junk... */
324 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) 384 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
325 tree[i]->c_block = ci->ci_cache.ci_array[i]; 385 tree[i]->c_block = ci->ci_cache.ci_array[i];
326 386
327 oi->ip_flags &= ~OCFS2_INODE_CACHE_INLINE; 387 ci->ci_flags &= ~OCFS2_CACHE_FL_INLINE;
328 ci->ci_cache.ci_tree = RB_ROOT; 388 ci->ci_cache.ci_tree = RB_ROOT;
329 /* this will be set again by __ocfs2_insert_cache_tree */ 389 /* this will be set again by __ocfs2_insert_cache_tree */
330 ci->ci_num_cached = 0; 390 ci->ci_num_cached = 0;
331 391
332 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { 392 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
333 __ocfs2_insert_cache_tree(ci, tree[i]); 393 __ocfs2_insert_cache_tree(ci, tree[i]);
334 tree[i] = NULL; 394 tree[i] = NULL;
335 } 395 }
336 396
337 mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n", 397 mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
338 (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached); 398 (unsigned long long)ocfs2_metadata_cache_owner(ci),
399 ci->ci_flags, ci->ci_num_cached);
339} 400}
340 401
341/* Slow path function - memory allocation is necessary. See the 402/* Slow path function - memory allocation is necessary. See the
342 * comment above ocfs2_set_buffer_uptodate for more information. */ 403 * comment above ocfs2_set_buffer_uptodate for more information. */
343static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, 404static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
344 sector_t block, 405 sector_t block,
345 int expand_tree) 406 int expand_tree)
346{ 407{
347 int i; 408 int i;
348 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
349 struct ocfs2_meta_cache_item *new = NULL; 409 struct ocfs2_meta_cache_item *new = NULL;
350 struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] = 410 struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
351 { NULL, }; 411 { NULL, };
352 412
353 mlog(0, "Inode %llu, block %llu, expand = %d\n", 413 mlog(0, "Owner %llu, block %llu, expand = %d\n",
354 (unsigned long long)oi->ip_blkno, 414 (unsigned long long)ocfs2_metadata_cache_owner(ci),
355 (unsigned long long)block, expand_tree); 415 (unsigned long long)block, expand_tree);
356 416
357 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); 417 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
@@ -364,7 +424,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
364 if (expand_tree) { 424 if (expand_tree) {
365 /* Do *not* allocate an array here - the removal code 425 /* Do *not* allocate an array here - the removal code
366 * has no way of tracking that. */ 426 * has no way of tracking that. */
367 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { 427 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
368 tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, 428 tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep,
369 GFP_NOFS); 429 GFP_NOFS);
370 if (!tree[i]) { 430 if (!tree[i]) {
@@ -376,21 +436,21 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
376 } 436 }
377 } 437 }
378 438
379 spin_lock(&oi->ip_lock); 439 ocfs2_metadata_cache_lock(ci);
380 if (ocfs2_insert_can_use_array(oi, ci)) { 440 if (ocfs2_insert_can_use_array(ci)) {
381 mlog(0, "Someone cleared the tree underneath us\n"); 441 mlog(0, "Someone cleared the tree underneath us\n");
382 /* Ok, items were removed from the cache in between 442 /* Ok, items were removed from the cache in between
383 * locks. Detect this and revert back to the fast path */ 443 * locks. Detect this and revert back to the fast path */
384 ocfs2_append_cache_array(ci, block); 444 ocfs2_append_cache_array(ci, block);
385 spin_unlock(&oi->ip_lock); 445 ocfs2_metadata_cache_unlock(ci);
386 goto out_free; 446 goto out_free;
387 } 447 }
388 448
389 if (expand_tree) 449 if (expand_tree)
390 ocfs2_expand_cache(oi, tree); 450 ocfs2_expand_cache(ci, tree);
391 451
392 __ocfs2_insert_cache_tree(ci, new); 452 __ocfs2_insert_cache_tree(ci, new);
393 spin_unlock(&oi->ip_lock); 453 ocfs2_metadata_cache_unlock(ci);
394 454
395 new = NULL; 455 new = NULL;
396out_free: 456out_free:
@@ -400,14 +460,14 @@ out_free:
400 /* If these were used, then ocfs2_expand_cache re-set them to 460 /* If these were used, then ocfs2_expand_cache re-set them to
401 * NULL for us. */ 461 * NULL for us. */
402 if (tree[0]) { 462 if (tree[0]) {
403 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) 463 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
404 if (tree[i]) 464 if (tree[i])
405 kmem_cache_free(ocfs2_uptodate_cachep, 465 kmem_cache_free(ocfs2_uptodate_cachep,
406 tree[i]); 466 tree[i]);
407 } 467 }
408} 468}
409 469
410/* Item insertion is guarded by ip_io_mutex, so the insertion path takes 470/* Item insertion is guarded by co_io_lock(), so the insertion path takes
411 * advantage of this by not rechecking for a duplicate insert during 471 * advantage of this by not rechecking for a duplicate insert during
412 * the slow case. Additionally, if the cache needs to be bumped up to 472 * the slow case. Additionally, if the cache needs to be bumped up to
413 * a tree, the code will not recheck after acquiring the lock -- 473 * a tree, the code will not recheck after acquiring the lock --
@@ -425,59 +485,55 @@ out_free:
425 * Readahead buffers can be passed in here before the I/O request is 485 * Readahead buffers can be passed in here before the I/O request is
426 * completed. 486 * completed.
427 */ 487 */
428void ocfs2_set_buffer_uptodate(struct inode *inode, 488void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
429 struct buffer_head *bh) 489 struct buffer_head *bh)
430{ 490{
431 int expand; 491 int expand;
432 struct ocfs2_inode_info *oi = OCFS2_I(inode);
433 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
434 492
435 /* The block may very well exist in our cache already, so avoid 493 /* The block may very well exist in our cache already, so avoid
436 * doing any more work in that case. */ 494 * doing any more work in that case. */
437 if (ocfs2_buffer_cached(oi, bh)) 495 if (ocfs2_buffer_cached(ci, bh))
438 return; 496 return;
439 497
440 mlog(0, "Inode %llu, inserting block %llu\n", 498 mlog(0, "Owner %llu, inserting block %llu\n",
441 (unsigned long long)oi->ip_blkno, 499 (unsigned long long)ocfs2_metadata_cache_owner(ci),
442 (unsigned long long)bh->b_blocknr); 500 (unsigned long long)bh->b_blocknr);
443 501
444 /* No need to recheck under spinlock - insertion is guarded by 502 /* No need to recheck under spinlock - insertion is guarded by
445 * ip_io_mutex */ 503 * co_io_lock() */
446 spin_lock(&oi->ip_lock); 504 ocfs2_metadata_cache_lock(ci);
447 if (ocfs2_insert_can_use_array(oi, ci)) { 505 if (ocfs2_insert_can_use_array(ci)) {
448 /* Fast case - it's an array and there's a free 506 /* Fast case - it's an array and there's a free
449 * spot. */ 507 * spot. */
450 ocfs2_append_cache_array(ci, bh->b_blocknr); 508 ocfs2_append_cache_array(ci, bh->b_blocknr);
451 spin_unlock(&oi->ip_lock); 509 ocfs2_metadata_cache_unlock(ci);
452 return; 510 return;
453 } 511 }
454 512
455 expand = 0; 513 expand = 0;
456 if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) { 514 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
457 /* We need to bump things up to a tree. */ 515 /* We need to bump things up to a tree. */
458 expand = 1; 516 expand = 1;
459 } 517 }
460 spin_unlock(&oi->ip_lock); 518 ocfs2_metadata_cache_unlock(ci);
461 519
462 __ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand); 520 __ocfs2_set_buffer_uptodate(ci, bh->b_blocknr, expand);
463} 521}
464 522
465/* Called against a newly allocated buffer. Most likely nobody should 523/* Called against a newly allocated buffer. Most likely nobody should
466 * be able to read this sort of metadata while it's still being 524 * be able to read this sort of metadata while it's still being
467 * allocated, but this is careful to take ip_io_mutex anyway. */ 525 * allocated, but this is careful to take co_io_lock() anyway. */
468void ocfs2_set_new_buffer_uptodate(struct inode *inode, 526void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
469 struct buffer_head *bh) 527 struct buffer_head *bh)
470{ 528{
471 struct ocfs2_inode_info *oi = OCFS2_I(inode);
472
473 /* This should definitely *not* exist in our cache */ 529 /* This should definitely *not* exist in our cache */
474 BUG_ON(ocfs2_buffer_cached(oi, bh)); 530 BUG_ON(ocfs2_buffer_cached(ci, bh));
475 531
476 set_buffer_uptodate(bh); 532 set_buffer_uptodate(bh);
477 533
478 mutex_lock(&oi->ip_io_mutex); 534 ocfs2_metadata_cache_io_lock(ci);
479 ocfs2_set_buffer_uptodate(inode, bh); 535 ocfs2_set_buffer_uptodate(ci, bh);
480 mutex_unlock(&oi->ip_io_mutex); 536 ocfs2_metadata_cache_io_unlock(ci);
481} 537}
482 538
483/* Requires ip_lock. */ 539/* Requires ip_lock. */
@@ -487,7 +543,7 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
487 sector_t *array = ci->ci_cache.ci_array; 543 sector_t *array = ci->ci_cache.ci_array;
488 int bytes; 544 int bytes;
489 545
490 BUG_ON(index < 0 || index >= OCFS2_INODE_MAX_CACHE_ARRAY); 546 BUG_ON(index < 0 || index >= OCFS2_CACHE_INFO_MAX_ARRAY);
491 BUG_ON(index >= ci->ci_num_cached); 547 BUG_ON(index >= ci->ci_num_cached);
492 BUG_ON(!ci->ci_num_cached); 548 BUG_ON(!ci->ci_num_cached);
493 549
@@ -515,21 +571,19 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
515 ci->ci_num_cached--; 571 ci->ci_num_cached--;
516} 572}
517 573
518static void ocfs2_remove_block_from_cache(struct inode *inode, 574static void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci,
519 sector_t block) 575 sector_t block)
520{ 576{
521 int index; 577 int index;
522 struct ocfs2_meta_cache_item *item = NULL; 578 struct ocfs2_meta_cache_item *item = NULL;
523 struct ocfs2_inode_info *oi = OCFS2_I(inode);
524 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
525 579
526 spin_lock(&oi->ip_lock); 580 ocfs2_metadata_cache_lock(ci);
527 mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n", 581 mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n",
528 (unsigned long long)oi->ip_blkno, 582 (unsigned long long)ocfs2_metadata_cache_owner(ci),
529 (unsigned long long) block, ci->ci_num_cached, 583 (unsigned long long) block, ci->ci_num_cached,
530 oi->ip_flags & OCFS2_INODE_CACHE_INLINE); 584 ci->ci_flags & OCFS2_CACHE_FL_INLINE);
531 585
532 if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) { 586 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
533 index = ocfs2_search_cache_array(ci, block); 587 index = ocfs2_search_cache_array(ci, block);
534 if (index != -1) 588 if (index != -1)
535 ocfs2_remove_metadata_array(ci, index); 589 ocfs2_remove_metadata_array(ci, index);
@@ -538,7 +592,7 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
538 if (item) 592 if (item)
539 ocfs2_remove_metadata_tree(ci, item); 593 ocfs2_remove_metadata_tree(ci, item);
540 } 594 }
541 spin_unlock(&oi->ip_lock); 595 ocfs2_metadata_cache_unlock(ci);
542 596
543 if (item) 597 if (item)
544 kmem_cache_free(ocfs2_uptodate_cachep, item); 598 kmem_cache_free(ocfs2_uptodate_cachep, item);
@@ -549,23 +603,24 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
549 * bother reverting things to an inlined array in the case of a remove 603 * bother reverting things to an inlined array in the case of a remove
550 * which moves us back under the limit. 604 * which moves us back under the limit.
551 */ 605 */
552void ocfs2_remove_from_cache(struct inode *inode, 606void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
553 struct buffer_head *bh) 607 struct buffer_head *bh)
554{ 608{
555 sector_t block = bh->b_blocknr; 609 sector_t block = bh->b_blocknr;
556 610
557 ocfs2_remove_block_from_cache(inode, block); 611 ocfs2_remove_block_from_cache(ci, block);
558} 612}
559 613
560/* Called when we remove xattr clusters from an inode. */ 614/* Called when we remove xattr clusters from an inode. */
561void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, 615void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
562 sector_t block, 616 sector_t block,
563 u32 c_len) 617 u32 c_len)
564{ 618{
565 unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len; 619 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
620 unsigned int i, b_len = ocfs2_clusters_to_blocks(sb, 1) * c_len;
566 621
567 for (i = 0; i < b_len; i++, block++) 622 for (i = 0; i < b_len; i++, block++)
568 ocfs2_remove_block_from_cache(inode, block); 623 ocfs2_remove_block_from_cache(ci, block);
569} 624}
570 625
571int __init init_ocfs2_uptodate_cache(void) 626int __init init_ocfs2_uptodate_cache(void)
@@ -577,7 +632,7 @@ int __init init_ocfs2_uptodate_cache(void)
577 return -ENOMEM; 632 return -ENOMEM;
578 633
579 mlog(0, "%u inlined cache items per inode.\n", 634 mlog(0, "%u inlined cache items per inode.\n",
580 OCFS2_INODE_MAX_CACHE_ARRAY); 635 OCFS2_CACHE_INFO_MAX_ARRAY);
581 636
582 return 0; 637 return 0;
583} 638}
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 531b4b3a0c47..0d826fe2da0d 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -26,24 +26,59 @@
26#ifndef OCFS2_UPTODATE_H 26#ifndef OCFS2_UPTODATE_H
27#define OCFS2_UPTODATE_H 27#define OCFS2_UPTODATE_H
28 28
29/*
30 * The caching code relies on locking provided by the user of
31 * struct ocfs2_caching_info. These operations connect that up.
32 */
33struct ocfs2_caching_operations {
34 /*
35 * A u64 representing the owning structure. Usually this
36 * is the block number (i_blkno or whatnot). This is used so
37 * that caching log messages can identify the owning structure.
38 */
39 u64 (*co_owner)(struct ocfs2_caching_info *ci);
40
41 /* The superblock is needed during I/O. */
42 struct super_block *(*co_get_super)(struct ocfs2_caching_info *ci);
43 /*
44 * Lock and unlock the caching data. These will not sleep, and
45 * should probably be spinlocks.
46 */
47 void (*co_cache_lock)(struct ocfs2_caching_info *ci);
48 void (*co_cache_unlock)(struct ocfs2_caching_info *ci);
49
50 /*
51 * Lock and unlock for disk I/O. These will sleep, and should
52 * be mutexes.
53 */
54 void (*co_io_lock)(struct ocfs2_caching_info *ci);
55 void (*co_io_unlock)(struct ocfs2_caching_info *ci);
56};
57
29int __init init_ocfs2_uptodate_cache(void); 58int __init init_ocfs2_uptodate_cache(void);
30void exit_ocfs2_uptodate_cache(void); 59void exit_ocfs2_uptodate_cache(void);
31 60
32void ocfs2_metadata_cache_init(struct inode *inode); 61void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
33void ocfs2_metadata_cache_purge(struct inode *inode); 62 const struct ocfs2_caching_operations *ops);
63void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
64void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci);
65
66u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
67void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci);
68void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci);
34 69
35int ocfs2_buffer_uptodate(struct inode *inode, 70int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
36 struct buffer_head *bh); 71 struct buffer_head *bh);
37void ocfs2_set_buffer_uptodate(struct inode *inode, 72void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
38 struct buffer_head *bh); 73 struct buffer_head *bh);
39void ocfs2_set_new_buffer_uptodate(struct inode *inode, 74void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
40 struct buffer_head *bh); 75 struct buffer_head *bh);
41void ocfs2_remove_from_cache(struct inode *inode, 76void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
42 struct buffer_head *bh); 77 struct buffer_head *bh);
43void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, 78void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
44 sector_t block, 79 sector_t block,
45 u32 c_len); 80 u32 c_len);
46int ocfs2_buffer_read_ahead(struct inode *inode, 81int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
47 struct buffer_head *bh); 82 struct buffer_head *bh);
48 83
49#endif /* OCFS2_UPTODATE_H */ 84#endif /* OCFS2_UPTODATE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d1a27cda984f..fe3419068df2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -55,7 +55,8 @@
55#include "buffer_head_io.h" 55#include "buffer_head_io.h"
56#include "super.h" 56#include "super.h"
57#include "xattr.h" 57#include "xattr.h"
58 58#include "refcounttree.h"
59#include "acl.h"
59 60
60struct ocfs2_xattr_def_value_root { 61struct ocfs2_xattr_def_value_root {
61 struct ocfs2_xattr_value_root xv; 62 struct ocfs2_xattr_value_root xv;
@@ -140,7 +141,7 @@ struct ocfs2_xattr_search {
140 int not_found; 141 int not_found;
141}; 142};
142 143
143static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, 144static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
144 struct ocfs2_xattr_header *xh, 145 struct ocfs2_xattr_header *xh,
145 int index, 146 int index,
146 int *block_off, 147 int *block_off,
@@ -157,7 +158,7 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
157 struct ocfs2_xattr_search *xs); 158 struct ocfs2_xattr_search *xs);
158 159
159static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 160static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
160 struct ocfs2_xattr_tree_root *xt, 161 struct buffer_head *blk_bh,
161 char *buffer, 162 char *buffer,
162 size_t buffer_size); 163 size_t buffer_size);
163 164
@@ -170,12 +171,42 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
170 struct ocfs2_xattr_search *xs, 171 struct ocfs2_xattr_search *xs,
171 struct ocfs2_xattr_set_ctxt *ctxt); 172 struct ocfs2_xattr_set_ctxt *ctxt);
172 173
173static int ocfs2_delete_xattr_index_block(struct inode *inode, 174typedef int (xattr_tree_rec_func)(struct inode *inode,
174 struct buffer_head *xb_bh); 175 struct buffer_head *root_bh,
176 u64 blkno, u32 cpos, u32 len, void *para);
177static int ocfs2_iterate_xattr_index_block(struct inode *inode,
178 struct buffer_head *root_bh,
179 xattr_tree_rec_func *rec_func,
180 void *para);
181static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
182 struct ocfs2_xattr_bucket *bucket,
183 void *para);
184static int ocfs2_rm_xattr_cluster(struct inode *inode,
185 struct buffer_head *root_bh,
186 u64 blkno,
187 u32 cpos,
188 u32 len,
189 void *para);
190
175static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 191static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
176 u64 src_blk, u64 last_blk, u64 to_blk, 192 u64 src_blk, u64 last_blk, u64 to_blk,
177 unsigned int start_bucket, 193 unsigned int start_bucket,
178 u32 *first_hash); 194 u32 *first_hash);
195static int ocfs2_prepare_refcount_xattr(struct inode *inode,
196 struct ocfs2_dinode *di,
197 struct ocfs2_xattr_info *xi,
198 struct ocfs2_xattr_search *xis,
199 struct ocfs2_xattr_search *xbs,
200 struct ocfs2_refcount_tree **ref_tree,
201 int *meta_need,
202 int *credits);
203static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
204 struct ocfs2_xattr_bucket *bucket,
205 int offset,
206 struct ocfs2_xattr_value_root **xv,
207 struct buffer_head **bh);
208static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
209 const void *value, size_t size, int flags);
179 210
180static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 211static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
181{ 212{
@@ -254,9 +285,9 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
254 break; 285 break;
255 } 286 }
256 287
257 if (!ocfs2_buffer_uptodate(bucket->bu_inode, 288 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
258 bucket->bu_bhs[i])) 289 bucket->bu_bhs[i]))
259 ocfs2_set_new_buffer_uptodate(bucket->bu_inode, 290 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
260 bucket->bu_bhs[i]); 291 bucket->bu_bhs[i]);
261 } 292 }
262 293
@@ -271,7 +302,7 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
271{ 302{
272 int rc; 303 int rc;
273 304
274 rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, 305 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
275 bucket->bu_blocks, bucket->bu_bhs, 0, 306 bucket->bu_blocks, bucket->bu_bhs, 0,
276 NULL); 307 NULL);
277 if (!rc) { 308 if (!rc) {
@@ -297,7 +328,8 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
297 int i, rc = 0; 328 int i, rc = 0;
298 329
299 for (i = 0; i < bucket->bu_blocks; i++) { 330 for (i = 0; i < bucket->bu_blocks; i++) {
300 rc = ocfs2_journal_access(handle, bucket->bu_inode, 331 rc = ocfs2_journal_access(handle,
332 INODE_CACHE(bucket->bu_inode),
301 bucket->bu_bhs[i], type); 333 bucket->bu_bhs[i], type);
302 if (rc) { 334 if (rc) {
303 mlog_errno(rc); 335 mlog_errno(rc);
@@ -399,7 +431,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
399 int rc; 431 int rc;
400 struct buffer_head *tmp = *bh; 432 struct buffer_head *tmp = *bh;
401 433
402 rc = ocfs2_read_block(inode, xb_blkno, &tmp, 434 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
403 ocfs2_validate_xattr_block); 435 ocfs2_validate_xattr_block);
404 436
405 /* If ocfs2_read_block() got us a new bh, pass it up. */ 437 /* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -596,15 +628,14 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
596 int status = 0; 628 int status = 0;
597 handle_t *handle = ctxt->handle; 629 handle_t *handle = ctxt->handle;
598 enum ocfs2_alloc_restarted why; 630 enum ocfs2_alloc_restarted why;
599 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
600 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 631 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
601 struct ocfs2_extent_tree et; 632 struct ocfs2_extent_tree et;
602 633
603 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); 634 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
604 635
605 ocfs2_init_xattr_value_extent_tree(&et, inode, vb); 636 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
606 637
607 status = vb->vb_access(handle, inode, vb->vb_bh, 638 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
608 OCFS2_JOURNAL_ACCESS_WRITE); 639 OCFS2_JOURNAL_ACCESS_WRITE);
609 if (status < 0) { 640 if (status < 0) {
610 mlog_errno(status); 641 mlog_errno(status);
@@ -612,13 +643,11 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
612 } 643 }
613 644
614 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 645 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
615 status = ocfs2_add_clusters_in_btree(osb, 646 status = ocfs2_add_clusters_in_btree(handle,
616 inode, 647 &et,
617 &logical_start, 648 &logical_start,
618 clusters_to_add, 649 clusters_to_add,
619 0, 650 0,
620 &et,
621 handle,
622 ctxt->data_ac, 651 ctxt->data_ac,
623 ctxt->meta_ac, 652 ctxt->meta_ac,
624 &why); 653 &why);
@@ -649,6 +678,7 @@ leave:
649static int __ocfs2_remove_xattr_range(struct inode *inode, 678static int __ocfs2_remove_xattr_range(struct inode *inode,
650 struct ocfs2_xattr_value_buf *vb, 679 struct ocfs2_xattr_value_buf *vb,
651 u32 cpos, u32 phys_cpos, u32 len, 680 u32 cpos, u32 phys_cpos, u32 len,
681 unsigned int ext_flags,
652 struct ocfs2_xattr_set_ctxt *ctxt) 682 struct ocfs2_xattr_set_ctxt *ctxt)
653{ 683{
654 int ret; 684 int ret;
@@ -656,16 +686,16 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
656 handle_t *handle = ctxt->handle; 686 handle_t *handle = ctxt->handle;
657 struct ocfs2_extent_tree et; 687 struct ocfs2_extent_tree et;
658 688
659 ocfs2_init_xattr_value_extent_tree(&et, inode, vb); 689 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
660 690
661 ret = vb->vb_access(handle, inode, vb->vb_bh, 691 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
662 OCFS2_JOURNAL_ACCESS_WRITE); 692 OCFS2_JOURNAL_ACCESS_WRITE);
663 if (ret) { 693 if (ret) {
664 mlog_errno(ret); 694 mlog_errno(ret);
665 goto out; 695 goto out;
666 } 696 }
667 697
668 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac, 698 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
669 &ctxt->dealloc); 699 &ctxt->dealloc);
670 if (ret) { 700 if (ret) {
671 mlog_errno(ret); 701 mlog_errno(ret);
@@ -680,7 +710,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
680 goto out; 710 goto out;
681 } 711 }
682 712
683 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len); 713 if (ext_flags & OCFS2_EXT_REFCOUNTED)
714 ret = ocfs2_decrease_refcount(inode, handle,
715 ocfs2_blocks_to_clusters(inode->i_sb,
716 phys_blkno),
717 len, ctxt->meta_ac, &ctxt->dealloc, 1);
718 else
719 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
720 phys_blkno, len);
684 if (ret) 721 if (ret)
685 mlog_errno(ret); 722 mlog_errno(ret);
686 723
@@ -695,6 +732,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
695 struct ocfs2_xattr_set_ctxt *ctxt) 732 struct ocfs2_xattr_set_ctxt *ctxt)
696{ 733{
697 int ret = 0; 734 int ret = 0;
735 unsigned int ext_flags;
698 u32 trunc_len, cpos, phys_cpos, alloc_size; 736 u32 trunc_len, cpos, phys_cpos, alloc_size;
699 u64 block; 737 u64 block;
700 738
@@ -706,7 +744,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
706 while (trunc_len) { 744 while (trunc_len) {
707 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 745 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
708 &alloc_size, 746 &alloc_size,
709 &vb->vb_xv->xr_list); 747 &vb->vb_xv->xr_list, &ext_flags);
710 if (ret) { 748 if (ret) {
711 mlog_errno(ret); 749 mlog_errno(ret);
712 goto out; 750 goto out;
@@ -717,15 +755,15 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
717 755
718 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 756 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
719 phys_cpos, alloc_size, 757 phys_cpos, alloc_size,
720 ctxt); 758 ext_flags, ctxt);
721 if (ret) { 759 if (ret) {
722 mlog_errno(ret); 760 mlog_errno(ret);
723 goto out; 761 goto out;
724 } 762 }
725 763
726 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 764 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
727 ocfs2_remove_xattr_clusters_from_cache(inode, block, 765 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
728 alloc_size); 766 block, alloc_size);
729 cpos += alloc_size; 767 cpos += alloc_size;
730 trunc_len -= alloc_size; 768 trunc_len -= alloc_size;
731 } 769 }
@@ -810,6 +848,23 @@ static int ocfs2_xattr_list_entries(struct inode *inode,
810 return result; 848 return result;
811} 849}
812 850
851int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
852 struct ocfs2_dinode *di)
853{
854 struct ocfs2_xattr_header *xh;
855 int i;
856
857 xh = (struct ocfs2_xattr_header *)
858 ((void *)di + inode->i_sb->s_blocksize -
859 le16_to_cpu(di->i_xattr_inline_size));
860
861 for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
862 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
863 return 1;
864
865 return 0;
866}
867
813static int ocfs2_xattr_ibody_list(struct inode *inode, 868static int ocfs2_xattr_ibody_list(struct inode *inode,
814 struct ocfs2_dinode *di, 869 struct ocfs2_dinode *di,
815 char *buffer, 870 char *buffer,
@@ -855,11 +910,9 @@ static int ocfs2_xattr_block_list(struct inode *inode,
855 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 910 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
856 ret = ocfs2_xattr_list_entries(inode, header, 911 ret = ocfs2_xattr_list_entries(inode, header,
857 buffer, buffer_size); 912 buffer, buffer_size);
858 } else { 913 } else
859 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; 914 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
860 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
861 buffer, buffer_size); 915 buffer, buffer_size);
862 }
863 916
864 brelse(blk_bh); 917 brelse(blk_bh);
865 918
@@ -961,7 +1014,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
961 cpos = 0; 1014 cpos = 0;
962 while (cpos < clusters) { 1015 while (cpos < clusters) {
963 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1016 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
964 &num_clusters, el); 1017 &num_clusters, el, NULL);
965 if (ret) { 1018 if (ret) {
966 mlog_errno(ret); 1019 mlog_errno(ret);
967 goto out; 1020 goto out;
@@ -970,7 +1023,8 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
970 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1023 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
971 /* Copy ocfs2_xattr_value */ 1024 /* Copy ocfs2_xattr_value */
972 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1025 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
973 ret = ocfs2_read_block(inode, blkno, &bh, NULL); 1026 ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1027 &bh, NULL);
974 if (ret) { 1028 if (ret) {
975 mlog_errno(ret); 1029 mlog_errno(ret);
976 goto out; 1030 goto out;
@@ -1085,7 +1139,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1085 i = xs->here - xs->header->xh_entries; 1139 i = xs->here - xs->header->xh_entries;
1086 1140
1087 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1141 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1088 ret = ocfs2_xattr_bucket_get_name_value(inode, 1142 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1089 bucket_xh(xs->bucket), 1143 bucket_xh(xs->bucket),
1090 i, 1144 i,
1091 &block_off, 1145 &block_off,
@@ -1183,7 +1237,7 @@ static int ocfs2_xattr_get(struct inode *inode,
1183 1237
1184static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1238static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1185 handle_t *handle, 1239 handle_t *handle,
1186 struct ocfs2_xattr_value_root *xv, 1240 struct ocfs2_xattr_value_buf *vb,
1187 const void *value, 1241 const void *value,
1188 int value_len) 1242 int value_len)
1189{ 1243{
@@ -1194,28 +1248,34 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1194 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1248 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1195 u64 blkno; 1249 u64 blkno;
1196 struct buffer_head *bh = NULL; 1250 struct buffer_head *bh = NULL;
1251 unsigned int ext_flags;
1252 struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1197 1253
1198 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1254 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1199 1255
1200 while (cpos < clusters) { 1256 while (cpos < clusters) {
1201 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1257 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1202 &num_clusters, &xv->xr_list); 1258 &num_clusters, &xv->xr_list,
1259 &ext_flags);
1203 if (ret) { 1260 if (ret) {
1204 mlog_errno(ret); 1261 mlog_errno(ret);
1205 goto out; 1262 goto out;
1206 } 1263 }
1207 1264
1265 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1266
1208 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1267 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1209 1268
1210 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1269 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1211 ret = ocfs2_read_block(inode, blkno, &bh, NULL); 1270 ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1271 &bh, NULL);
1212 if (ret) { 1272 if (ret) {
1213 mlog_errno(ret); 1273 mlog_errno(ret);
1214 goto out; 1274 goto out;
1215 } 1275 }
1216 1276
1217 ret = ocfs2_journal_access(handle, 1277 ret = ocfs2_journal_access(handle,
1218 inode, 1278 INODE_CACHE(inode),
1219 bh, 1279 bh,
1220 OCFS2_JOURNAL_ACCESS_WRITE); 1280 OCFS2_JOURNAL_ACCESS_WRITE);
1221 if (ret < 0) { 1281 if (ret < 0) {
@@ -1266,7 +1326,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
1266 void *val = xs->base + offs; 1326 void *val = xs->base + offs;
1267 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 1327 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1268 1328
1269 ret = vb->vb_access(handle, inode, vb->vb_bh, 1329 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1270 OCFS2_JOURNAL_ACCESS_WRITE); 1330 OCFS2_JOURNAL_ACCESS_WRITE);
1271 if (ret) { 1331 if (ret) {
1272 mlog_errno(ret); 1332 mlog_errno(ret);
@@ -1294,7 +1354,7 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
1294{ 1354{
1295 int ret; 1355 int ret;
1296 1356
1297 ret = vb->vb_access(handle, inode, vb->vb_bh, 1357 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1298 OCFS2_JOURNAL_ACCESS_WRITE); 1358 OCFS2_JOURNAL_ACCESS_WRITE);
1299 if (ret) { 1359 if (ret) {
1300 mlog_errno(ret); 1360 mlog_errno(ret);
@@ -1355,7 +1415,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
1355 mlog_errno(ret); 1415 mlog_errno(ret);
1356 return ret; 1416 return ret;
1357 } 1417 }
1358 ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv, 1418 ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1359 xi->value, xi->value_len); 1419 xi->value, xi->value_len);
1360 if (ret < 0) 1420 if (ret < 0)
1361 mlog_errno(ret); 1421 mlog_errno(ret);
@@ -1594,7 +1654,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1594 1654
1595 ret = __ocfs2_xattr_set_value_outside(inode, 1655 ret = __ocfs2_xattr_set_value_outside(inode,
1596 handle, 1656 handle,
1597 vb.vb_xv, 1657 &vb,
1598 xi->value, 1658 xi->value,
1599 xi->value_len); 1659 xi->value_len);
1600 if (ret < 0) 1660 if (ret < 0)
@@ -1615,7 +1675,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1615 } 1675 }
1616 } 1676 }
1617 1677
1618 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, 1678 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1619 OCFS2_JOURNAL_ACCESS_WRITE); 1679 OCFS2_JOURNAL_ACCESS_WRITE);
1620 if (ret) { 1680 if (ret) {
1621 mlog_errno(ret); 1681 mlog_errno(ret);
@@ -1623,7 +1683,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1623 } 1683 }
1624 1684
1625 if (!(flag & OCFS2_INLINE_XATTR_FL)) { 1685 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1626 ret = vb.vb_access(handle, inode, vb.vb_bh, 1686 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1627 OCFS2_JOURNAL_ACCESS_WRITE); 1687 OCFS2_JOURNAL_ACCESS_WRITE);
1628 if (ret) { 1688 if (ret) {
1629 mlog_errno(ret); 1689 mlog_errno(ret);
@@ -1700,51 +1760,112 @@ out:
1700 return ret; 1760 return ret;
1701} 1761}
1702 1762
1763/*
1764 * In xattr remove, if it is stored outside and refcounted, we may have
1765 * the chance to split the refcount tree. So need the allocators.
1766 */
1767static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
1768 struct ocfs2_xattr_value_root *xv,
1769 struct ocfs2_caching_info *ref_ci,
1770 struct buffer_head *ref_root_bh,
1771 struct ocfs2_alloc_context **meta_ac,
1772 int *ref_credits)
1773{
1774 int ret, meta_add = 0;
1775 u32 p_cluster, num_clusters;
1776 unsigned int ext_flags;
1777
1778 *ref_credits = 0;
1779 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
1780 &num_clusters,
1781 &xv->xr_list,
1782 &ext_flags);
1783 if (ret) {
1784 mlog_errno(ret);
1785 goto out;
1786 }
1787
1788 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
1789 goto out;
1790
1791 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
1792 ref_root_bh, xv,
1793 &meta_add, ref_credits);
1794 if (ret) {
1795 mlog_errno(ret);
1796 goto out;
1797 }
1798
1799 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
1800 meta_add, meta_ac);
1801 if (ret)
1802 mlog_errno(ret);
1803
1804out:
1805 return ret;
1806}
1807
1703static int ocfs2_remove_value_outside(struct inode*inode, 1808static int ocfs2_remove_value_outside(struct inode*inode,
1704 struct ocfs2_xattr_value_buf *vb, 1809 struct ocfs2_xattr_value_buf *vb,
1705 struct ocfs2_xattr_header *header) 1810 struct ocfs2_xattr_header *header,
1811 struct ocfs2_caching_info *ref_ci,
1812 struct buffer_head *ref_root_bh)
1706{ 1813{
1707 int ret = 0, i; 1814 int ret = 0, i, ref_credits;
1708 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1815 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1709 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 1816 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1817 void *val;
1710 1818
1711 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 1819 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1712 1820
1713 ctxt.handle = ocfs2_start_trans(osb,
1714 ocfs2_remove_extent_credits(osb->sb));
1715 if (IS_ERR(ctxt.handle)) {
1716 ret = PTR_ERR(ctxt.handle);
1717 mlog_errno(ret);
1718 goto out;
1719 }
1720
1721 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 1821 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1722 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 1822 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1723 1823
1724 if (!ocfs2_xattr_is_local(entry)) { 1824 if (ocfs2_xattr_is_local(entry))
1725 void *val; 1825 continue;
1726 1826
1727 val = (void *)header + 1827 val = (void *)header +
1728 le16_to_cpu(entry->xe_name_offset); 1828 le16_to_cpu(entry->xe_name_offset);
1729 vb->vb_xv = (struct ocfs2_xattr_value_root *) 1829 vb->vb_xv = (struct ocfs2_xattr_value_root *)
1730 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 1830 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1731 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 1831
1732 if (ret < 0) { 1832 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
1733 mlog_errno(ret); 1833 ref_ci, ref_root_bh,
1734 break; 1834 &ctxt.meta_ac,
1735 } 1835 &ref_credits);
1836
1837 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
1838 ocfs2_remove_extent_credits(osb->sb));
1839 if (IS_ERR(ctxt.handle)) {
1840 ret = PTR_ERR(ctxt.handle);
1841 mlog_errno(ret);
1842 break;
1843 }
1844
1845 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1846 if (ret < 0) {
1847 mlog_errno(ret);
1848 break;
1849 }
1850
1851 ocfs2_commit_trans(osb, ctxt.handle);
1852 if (ctxt.meta_ac) {
1853 ocfs2_free_alloc_context(ctxt.meta_ac);
1854 ctxt.meta_ac = NULL;
1736 } 1855 }
1737 } 1856 }
1738 1857
1739 ocfs2_commit_trans(osb, ctxt.handle); 1858 if (ctxt.meta_ac)
1859 ocfs2_free_alloc_context(ctxt.meta_ac);
1740 ocfs2_schedule_truncate_log_flush(osb, 1); 1860 ocfs2_schedule_truncate_log_flush(osb, 1);
1741 ocfs2_run_deallocs(osb, &ctxt.dealloc); 1861 ocfs2_run_deallocs(osb, &ctxt.dealloc);
1742out:
1743 return ret; 1862 return ret;
1744} 1863}
1745 1864
1746static int ocfs2_xattr_ibody_remove(struct inode *inode, 1865static int ocfs2_xattr_ibody_remove(struct inode *inode,
1747 struct buffer_head *di_bh) 1866 struct buffer_head *di_bh,
1867 struct ocfs2_caching_info *ref_ci,
1868 struct buffer_head *ref_root_bh)
1748{ 1869{
1749 1870
1750 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1871 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1759,13 +1880,21 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode,
1759 ((void *)di + inode->i_sb->s_blocksize - 1880 ((void *)di + inode->i_sb->s_blocksize -
1760 le16_to_cpu(di->i_xattr_inline_size)); 1881 le16_to_cpu(di->i_xattr_inline_size));
1761 1882
1762 ret = ocfs2_remove_value_outside(inode, &vb, header); 1883 ret = ocfs2_remove_value_outside(inode, &vb, header,
1884 ref_ci, ref_root_bh);
1763 1885
1764 return ret; 1886 return ret;
1765} 1887}
1766 1888
1889struct ocfs2_rm_xattr_bucket_para {
1890 struct ocfs2_caching_info *ref_ci;
1891 struct buffer_head *ref_root_bh;
1892};
1893
1767static int ocfs2_xattr_block_remove(struct inode *inode, 1894static int ocfs2_xattr_block_remove(struct inode *inode,
1768 struct buffer_head *blk_bh) 1895 struct buffer_head *blk_bh,
1896 struct ocfs2_caching_info *ref_ci,
1897 struct buffer_head *ref_root_bh)
1769{ 1898{
1770 struct ocfs2_xattr_block *xb; 1899 struct ocfs2_xattr_block *xb;
1771 int ret = 0; 1900 int ret = 0;
@@ -1773,19 +1902,29 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
1773 .vb_bh = blk_bh, 1902 .vb_bh = blk_bh,
1774 .vb_access = ocfs2_journal_access_xb, 1903 .vb_access = ocfs2_journal_access_xb,
1775 }; 1904 };
1905 struct ocfs2_rm_xattr_bucket_para args = {
1906 .ref_ci = ref_ci,
1907 .ref_root_bh = ref_root_bh,
1908 };
1776 1909
1777 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1910 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1778 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1911 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1779 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 1912 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1780 ret = ocfs2_remove_value_outside(inode, &vb, header); 1913 ret = ocfs2_remove_value_outside(inode, &vb, header,
1914 ref_ci, ref_root_bh);
1781 } else 1915 } else
1782 ret = ocfs2_delete_xattr_index_block(inode, blk_bh); 1916 ret = ocfs2_iterate_xattr_index_block(inode,
1917 blk_bh,
1918 ocfs2_rm_xattr_cluster,
1919 &args);
1783 1920
1784 return ret; 1921 return ret;
1785} 1922}
1786 1923
1787static int ocfs2_xattr_free_block(struct inode *inode, 1924static int ocfs2_xattr_free_block(struct inode *inode,
1788 u64 block) 1925 u64 block,
1926 struct ocfs2_caching_info *ref_ci,
1927 struct buffer_head *ref_root_bh)
1789{ 1928{
1790 struct inode *xb_alloc_inode; 1929 struct inode *xb_alloc_inode;
1791 struct buffer_head *xb_alloc_bh = NULL; 1930 struct buffer_head *xb_alloc_bh = NULL;
@@ -1803,7 +1942,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1803 goto out; 1942 goto out;
1804 } 1943 }
1805 1944
1806 ret = ocfs2_xattr_block_remove(inode, blk_bh); 1945 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
1807 if (ret < 0) { 1946 if (ret < 0) {
1808 mlog_errno(ret); 1947 mlog_errno(ret);
1809 goto out; 1948 goto out;
@@ -1863,6 +2002,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1863{ 2002{
1864 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2003 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1865 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2004 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2005 struct ocfs2_refcount_tree *ref_tree = NULL;
2006 struct buffer_head *ref_root_bh = NULL;
2007 struct ocfs2_caching_info *ref_ci = NULL;
1866 handle_t *handle; 2008 handle_t *handle;
1867 int ret; 2009 int ret;
1868 2010
@@ -1872,8 +2014,21 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1872 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2014 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1873 return 0; 2015 return 0;
1874 2016
2017 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2018 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2019 le64_to_cpu(di->i_refcount_loc),
2020 1, &ref_tree, &ref_root_bh);
2021 if (ret) {
2022 mlog_errno(ret);
2023 goto out;
2024 }
2025 ref_ci = &ref_tree->rf_ci;
2026
2027 }
2028
1875 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2029 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1876 ret = ocfs2_xattr_ibody_remove(inode, di_bh); 2030 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2031 ref_ci, ref_root_bh);
1877 if (ret < 0) { 2032 if (ret < 0) {
1878 mlog_errno(ret); 2033 mlog_errno(ret);
1879 goto out; 2034 goto out;
@@ -1882,7 +2037,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1882 2037
1883 if (di->i_xattr_loc) { 2038 if (di->i_xattr_loc) {
1884 ret = ocfs2_xattr_free_block(inode, 2039 ret = ocfs2_xattr_free_block(inode,
1885 le64_to_cpu(di->i_xattr_loc)); 2040 le64_to_cpu(di->i_xattr_loc),
2041 ref_ci, ref_root_bh);
1886 if (ret < 0) { 2042 if (ret < 0) {
1887 mlog_errno(ret); 2043 mlog_errno(ret);
1888 goto out; 2044 goto out;
@@ -1896,7 +2052,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1896 mlog_errno(ret); 2052 mlog_errno(ret);
1897 goto out; 2053 goto out;
1898 } 2054 }
1899 ret = ocfs2_journal_access_di(handle, inode, di_bh, 2055 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1900 OCFS2_JOURNAL_ACCESS_WRITE); 2056 OCFS2_JOURNAL_ACCESS_WRITE);
1901 if (ret) { 2057 if (ret) {
1902 mlog_errno(ret); 2058 mlog_errno(ret);
@@ -1916,6 +2072,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1916out_commit: 2072out_commit:
1917 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2073 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1918out: 2074out:
2075 if (ref_tree)
2076 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2077 brelse(ref_root_bh);
1919 return ret; 2078 return ret;
1920} 2079}
1921 2080
@@ -2083,6 +2242,84 @@ cleanup:
2083 return ret; 2242 return ret;
2084} 2243}
2085 2244
2245static int ocfs2_create_xattr_block(handle_t *handle,
2246 struct inode *inode,
2247 struct buffer_head *inode_bh,
2248 struct ocfs2_alloc_context *meta_ac,
2249 struct buffer_head **ret_bh,
2250 int indexed)
2251{
2252 int ret;
2253 u16 suballoc_bit_start;
2254 u32 num_got;
2255 u64 first_blkno;
2256 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data;
2257 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2258 struct buffer_head *new_bh = NULL;
2259 struct ocfs2_xattr_block *xblk;
2260
2261 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2262 OCFS2_JOURNAL_ACCESS_CREATE);
2263 if (ret < 0) {
2264 mlog_errno(ret);
2265 goto end;
2266 }
2267
2268 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2269 &suballoc_bit_start, &num_got,
2270 &first_blkno);
2271 if (ret < 0) {
2272 mlog_errno(ret);
2273 goto end;
2274 }
2275
2276 new_bh = sb_getblk(inode->i_sb, first_blkno);
2277 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2278
2279 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2280 new_bh,
2281 OCFS2_JOURNAL_ACCESS_CREATE);
2282 if (ret < 0) {
2283 mlog_errno(ret);
2284 goto end;
2285 }
2286
2287 /* Initialize ocfs2_xattr_block */
2288 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2289 memset(xblk, 0, inode->i_sb->s_blocksize);
2290 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2291 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2292 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2293 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2294 xblk->xb_blkno = cpu_to_le64(first_blkno);
2295
2296 if (indexed) {
2297 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2298 xr->xt_clusters = cpu_to_le32(1);
2299 xr->xt_last_eb_blk = 0;
2300 xr->xt_list.l_tree_depth = 0;
2301 xr->xt_list.l_count = cpu_to_le16(
2302 ocfs2_xattr_recs_per_xb(inode->i_sb));
2303 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2304 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2305 }
2306
2307 ret = ocfs2_journal_dirty(handle, new_bh);
2308 if (ret < 0) {
2309 mlog_errno(ret);
2310 goto end;
2311 }
2312 di->i_xattr_loc = cpu_to_le64(first_blkno);
2313 ocfs2_journal_dirty(handle, inode_bh);
2314
2315 *ret_bh = new_bh;
2316 new_bh = NULL;
2317
2318end:
2319 brelse(new_bh);
2320 return ret;
2321}
2322
2086/* 2323/*
2087 * ocfs2_xattr_block_set() 2324 * ocfs2_xattr_block_set()
2088 * 2325 *
@@ -2095,63 +2332,24 @@ static int ocfs2_xattr_block_set(struct inode *inode,
2095 struct ocfs2_xattr_set_ctxt *ctxt) 2332 struct ocfs2_xattr_set_ctxt *ctxt)
2096{ 2333{
2097 struct buffer_head *new_bh = NULL; 2334 struct buffer_head *new_bh = NULL;
2098 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2099 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2100 handle_t *handle = ctxt->handle; 2335 handle_t *handle = ctxt->handle;
2101 struct ocfs2_xattr_block *xblk = NULL; 2336 struct ocfs2_xattr_block *xblk = NULL;
2102 u16 suballoc_bit_start;
2103 u32 num_got;
2104 u64 first_blkno;
2105 int ret; 2337 int ret;
2106 2338
2107 if (!xs->xattr_bh) { 2339 if (!xs->xattr_bh) {
2108 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, 2340 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2109 OCFS2_JOURNAL_ACCESS_CREATE); 2341 ctxt->meta_ac, &new_bh, 0);
2110 if (ret < 0) { 2342 if (ret) {
2111 mlog_errno(ret);
2112 goto end;
2113 }
2114
2115 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
2116 &suballoc_bit_start, &num_got,
2117 &first_blkno);
2118 if (ret < 0) {
2119 mlog_errno(ret);
2120 goto end;
2121 }
2122
2123 new_bh = sb_getblk(inode->i_sb, first_blkno);
2124 ocfs2_set_new_buffer_uptodate(inode, new_bh);
2125
2126 ret = ocfs2_journal_access_xb(handle, inode, new_bh,
2127 OCFS2_JOURNAL_ACCESS_CREATE);
2128 if (ret < 0) {
2129 mlog_errno(ret); 2343 mlog_errno(ret);
2130 goto end; 2344 goto end;
2131 } 2345 }
2132 2346
2133 /* Initialize ocfs2_xattr_block */
2134 xs->xattr_bh = new_bh; 2347 xs->xattr_bh = new_bh;
2135 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2348 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2136 memset(xblk, 0, inode->i_sb->s_blocksize);
2137 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2138 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2139 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2140 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2141 xblk->xb_blkno = cpu_to_le64(first_blkno);
2142
2143 xs->header = &xblk->xb_attrs.xb_header; 2349 xs->header = &xblk->xb_attrs.xb_header;
2144 xs->base = (void *)xs->header; 2350 xs->base = (void *)xs->header;
2145 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2351 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2146 xs->here = xs->header->xh_entries; 2352 xs->here = xs->header->xh_entries;
2147
2148 ret = ocfs2_journal_dirty(handle, new_bh);
2149 if (ret < 0) {
2150 mlog_errno(ret);
2151 goto end;
2152 }
2153 di->i_xattr_loc = cpu_to_le64(first_blkno);
2154 ocfs2_journal_dirty(handle, xs->inode_bh);
2155 } else 2353 } else
2156 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2354 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2157 2355
@@ -2273,7 +2471,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
2273 old_in_xb = 1; 2471 old_in_xb = 1;
2274 2472
2275 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 2473 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2276 ret = ocfs2_xattr_bucket_get_name_value(inode, 2474 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2277 bucket_xh(xbs->bucket), 2475 bucket_xh(xbs->bucket),
2278 i, &block_off, 2476 i, &block_off,
2279 &name_offset); 2477 &name_offset);
@@ -2428,6 +2626,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2428 struct ocfs2_xattr_search *xis, 2626 struct ocfs2_xattr_search *xis,
2429 struct ocfs2_xattr_search *xbs, 2627 struct ocfs2_xattr_search *xbs,
2430 struct ocfs2_xattr_set_ctxt *ctxt, 2628 struct ocfs2_xattr_set_ctxt *ctxt,
2629 int extra_meta,
2431 int *credits) 2630 int *credits)
2432{ 2631{
2433 int clusters_add, meta_add, ret; 2632 int clusters_add, meta_add, ret;
@@ -2444,6 +2643,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2444 return ret; 2643 return ret;
2445 } 2644 }
2446 2645
2646 meta_add += extra_meta;
2447 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " 2647 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2448 "credits = %d\n", xi->name, meta_add, clusters_add, *credits); 2648 "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2449 2649
@@ -2598,7 +2798,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
2598 2798
2599 if (!ret) { 2799 if (!ret) {
2600 /* Update inode ctime. */ 2800 /* Update inode ctime. */
2601 ret = ocfs2_journal_access_di(ctxt->handle, inode, 2801 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2602 xis->inode_bh, 2802 xis->inode_bh,
2603 OCFS2_JOURNAL_ACCESS_WRITE); 2803 OCFS2_JOURNAL_ACCESS_WRITE);
2604 if (ret) { 2804 if (ret) {
@@ -2711,10 +2911,11 @@ int ocfs2_xattr_set(struct inode *inode,
2711{ 2911{
2712 struct buffer_head *di_bh = NULL; 2912 struct buffer_head *di_bh = NULL;
2713 struct ocfs2_dinode *di; 2913 struct ocfs2_dinode *di;
2714 int ret, credits; 2914 int ret, credits, ref_meta = 0, ref_credits = 0;
2715 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2915 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2716 struct inode *tl_inode = osb->osb_tl_inode; 2916 struct inode *tl_inode = osb->osb_tl_inode;
2717 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2917 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2918 struct ocfs2_refcount_tree *ref_tree = NULL;
2718 2919
2719 struct ocfs2_xattr_info xi = { 2920 struct ocfs2_xattr_info xi = {
2720 .name_index = name_index, 2921 .name_index = name_index,
@@ -2779,6 +2980,17 @@ int ocfs2_xattr_set(struct inode *inode,
2779 goto cleanup; 2980 goto cleanup;
2780 } 2981 }
2781 2982
2983 /* Check whether the value is refcounted and do some prepartion. */
2984 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
2985 (!xis.not_found || !xbs.not_found)) {
2986 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
2987 &xis, &xbs, &ref_tree,
2988 &ref_meta, &ref_credits);
2989 if (ret) {
2990 mlog_errno(ret);
2991 goto cleanup;
2992 }
2993 }
2782 2994
2783 mutex_lock(&tl_inode->i_mutex); 2995 mutex_lock(&tl_inode->i_mutex);
2784 2996
@@ -2793,7 +3005,7 @@ int ocfs2_xattr_set(struct inode *inode,
2793 mutex_unlock(&tl_inode->i_mutex); 3005 mutex_unlock(&tl_inode->i_mutex);
2794 3006
2795 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3007 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2796 &xbs, &ctxt, &credits); 3008 &xbs, &ctxt, ref_meta, &credits);
2797 if (ret) { 3009 if (ret) {
2798 mlog_errno(ret); 3010 mlog_errno(ret);
2799 goto cleanup; 3011 goto cleanup;
@@ -2801,7 +3013,7 @@ int ocfs2_xattr_set(struct inode *inode,
2801 3013
2802 /* we need to update inode's ctime field, so add credit for it. */ 3014 /* we need to update inode's ctime field, so add credit for it. */
2803 credits += OCFS2_INODE_UPDATE_CREDITS; 3015 credits += OCFS2_INODE_UPDATE_CREDITS;
2804 ctxt.handle = ocfs2_start_trans(osb, credits); 3016 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
2805 if (IS_ERR(ctxt.handle)) { 3017 if (IS_ERR(ctxt.handle)) {
2806 ret = PTR_ERR(ctxt.handle); 3018 ret = PTR_ERR(ctxt.handle);
2807 mlog_errno(ret); 3019 mlog_errno(ret);
@@ -2819,8 +3031,16 @@ int ocfs2_xattr_set(struct inode *inode,
2819 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3031 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2820 ocfs2_schedule_truncate_log_flush(osb, 1); 3032 ocfs2_schedule_truncate_log_flush(osb, 1);
2821 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3033 ocfs2_run_deallocs(osb, &ctxt.dealloc);
3034
2822cleanup: 3035cleanup:
3036 if (ref_tree)
3037 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
2823 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3038 up_write(&OCFS2_I(inode)->ip_xattr_sem);
3039 if (!value && !ret) {
3040 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3041 if (ret)
3042 mlog_errno(ret);
3043 }
2824 ocfs2_inode_unlock(inode, 1); 3044 ocfs2_inode_unlock(inode, 1);
2825cleanup_nolock: 3045cleanup_nolock:
2826 brelse(di_bh); 3046 brelse(di_bh);
@@ -2849,7 +3069,8 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
2849 u64 e_blkno = 0; 3069 u64 e_blkno = 0;
2850 3070
2851 if (el->l_tree_depth) { 3071 if (el->l_tree_depth) {
2852 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); 3072 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3073 &eb_bh);
2853 if (ret) { 3074 if (ret) {
2854 mlog_errno(ret); 3075 mlog_errno(ret);
2855 goto out; 3076 goto out;
@@ -2931,7 +3152,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
2931 if (cmp) 3152 if (cmp)
2932 continue; 3153 continue;
2933 3154
2934 ret = ocfs2_xattr_bucket_get_name_value(inode, 3155 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2935 xh, 3156 xh,
2936 i, 3157 i,
2937 &block_off, 3158 &block_off,
@@ -3175,7 +3396,7 @@ struct ocfs2_xattr_tree_list {
3175 size_t result; 3396 size_t result;
3176}; 3397};
3177 3398
3178static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, 3399static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3179 struct ocfs2_xattr_header *xh, 3400 struct ocfs2_xattr_header *xh,
3180 int index, 3401 int index,
3181 int *block_off, 3402 int *block_off,
@@ -3188,8 +3409,8 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
3188 3409
3189 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 3410 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3190 3411
3191 *block_off = name_offset >> inode->i_sb->s_blocksize_bits; 3412 *block_off = name_offset >> sb->s_blocksize_bits;
3192 *new_offset = name_offset % inode->i_sb->s_blocksize; 3413 *new_offset = name_offset % sb->s_blocksize;
3193 3414
3194 return 0; 3415 return 0;
3195} 3416}
@@ -3209,7 +3430,7 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
3209 prefix = ocfs2_xattr_prefix(type); 3430 prefix = ocfs2_xattr_prefix(type);
3210 3431
3211 if (prefix) { 3432 if (prefix) {
3212 ret = ocfs2_xattr_bucket_get_name_value(inode, 3433 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3213 bucket_xh(bucket), 3434 bucket_xh(bucket),
3214 i, 3435 i,
3215 &block_off, 3436 &block_off,
@@ -3232,22 +3453,19 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
3232 return ret; 3453 return ret;
3233} 3454}
3234 3455
3235static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 3456static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3236 struct ocfs2_xattr_tree_root *xt, 3457 struct buffer_head *blk_bh,
3237 char *buffer, 3458 xattr_tree_rec_func *rec_func,
3238 size_t buffer_size) 3459 void *para)
3239{ 3460{
3240 struct ocfs2_extent_list *el = &xt->xt_list; 3461 struct ocfs2_xattr_block *xb =
3462 (struct ocfs2_xattr_block *)blk_bh->b_data;
3463 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3241 int ret = 0; 3464 int ret = 0;
3242 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 3465 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3243 u64 p_blkno = 0; 3466 u64 p_blkno = 0;
3244 struct ocfs2_xattr_tree_list xl = {
3245 .buffer = buffer,
3246 .buffer_size = buffer_size,
3247 .result = 0,
3248 };
3249 3467
3250 if (le16_to_cpu(el->l_next_free_rec) == 0) 3468 if (!el->l_next_free_rec || !rec_func)
3251 return 0; 3469 return 0;
3252 3470
3253 while (name_hash > 0) { 3471 while (name_hash > 0) {
@@ -3255,16 +3473,15 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3255 &e_cpos, &num_clusters, el); 3473 &e_cpos, &num_clusters, el);
3256 if (ret) { 3474 if (ret) {
3257 mlog_errno(ret); 3475 mlog_errno(ret);
3258 goto out; 3476 break;
3259 } 3477 }
3260 3478
3261 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, 3479 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3262 ocfs2_list_xattr_bucket, 3480 num_clusters, para);
3263 &xl);
3264 if (ret) { 3481 if (ret) {
3265 if (ret != -ERANGE) 3482 if (ret != -ERANGE)
3266 mlog_errno(ret); 3483 mlog_errno(ret);
3267 goto out; 3484 break;
3268 } 3485 }
3269 3486
3270 if (e_cpos == 0) 3487 if (e_cpos == 0)
@@ -3273,6 +3490,37 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3273 name_hash = e_cpos - 1; 3490 name_hash = e_cpos - 1;
3274 } 3491 }
3275 3492
3493 return ret;
3494
3495}
3496
3497static int ocfs2_list_xattr_tree_rec(struct inode *inode,
3498 struct buffer_head *root_bh,
3499 u64 blkno, u32 cpos, u32 len, void *para)
3500{
3501 return ocfs2_iterate_xattr_buckets(inode, blkno, len,
3502 ocfs2_list_xattr_bucket, para);
3503}
3504
3505static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3506 struct buffer_head *blk_bh,
3507 char *buffer,
3508 size_t buffer_size)
3509{
3510 int ret;
3511 struct ocfs2_xattr_tree_list xl = {
3512 .buffer = buffer,
3513 .buffer_size = buffer_size,
3514 .result = 0,
3515 };
3516
3517 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
3518 ocfs2_list_xattr_tree_rec, &xl);
3519 if (ret) {
3520 mlog_errno(ret);
3521 goto out;
3522 }
3523
3276 ret = xl.result; 3524 ret = xl.result;
3277out: 3525out:
3278 return ret; 3526 return ret;
@@ -3426,7 +3674,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
3426 */ 3674 */
3427 down_write(&oi->ip_alloc_sem); 3675 down_write(&oi->ip_alloc_sem);
3428 3676
3429 ret = ocfs2_journal_access_xb(handle, inode, xb_bh, 3677 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3430 OCFS2_JOURNAL_ACCESS_WRITE); 3678 OCFS2_JOURNAL_ACCESS_WRITE);
3431 if (ret) { 3679 if (ret) {
3432 mlog_errno(ret); 3680 mlog_errno(ret);
@@ -4263,9 +4511,9 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4263 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4511 (unsigned long long)OCFS2_I(inode)->ip_blkno,
4264 prev_cpos, (unsigned long long)bucket_blkno(first)); 4512 prev_cpos, (unsigned long long)bucket_blkno(first));
4265 4513
4266 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 4514 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4267 4515
4268 ret = ocfs2_journal_access_xb(handle, inode, root_bh, 4516 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4269 OCFS2_JOURNAL_ACCESS_WRITE); 4517 OCFS2_JOURNAL_ACCESS_WRITE);
4270 if (ret < 0) { 4518 if (ret < 0) {
4271 mlog_errno(ret); 4519 mlog_errno(ret);
@@ -4319,7 +4567,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4319 4567
4320 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 4568 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4321 num_bits, (unsigned long long)block, v_start); 4569 num_bits, (unsigned long long)block, v_start);
4322 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, 4570 ret = ocfs2_insert_extent(handle, &et, v_start, block,
4323 num_bits, 0, ctxt->meta_ac); 4571 num_bits, 0, ctxt->meta_ac);
4324 if (ret < 0) { 4572 if (ret < 0) {
4325 mlog_errno(ret); 4573 mlog_errno(ret);
@@ -4798,10 +5046,13 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4798 struct ocfs2_xattr_entry *xe = xs->here; 5046 struct ocfs2_xattr_entry *xe = xs->here;
4799 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); 5047 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4800 void *base; 5048 void *base;
5049 struct ocfs2_xattr_value_buf vb = {
5050 .vb_access = ocfs2_journal_access,
5051 };
4801 5052
4802 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); 5053 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4803 5054
4804 ret = ocfs2_xattr_bucket_get_name_value(inode, xh, 5055 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
4805 xe - xh->xh_entries, 5056 xe - xh->xh_entries,
4806 &block_off, 5057 &block_off,
4807 &offset); 5058 &offset);
@@ -4814,8 +5065,10 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4814 xv = (struct ocfs2_xattr_value_root *)(base + offset + 5065 xv = (struct ocfs2_xattr_value_root *)(base + offset +
4815 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5066 OCFS2_XATTR_SIZE(xe->xe_name_len));
4816 5067
5068 vb.vb_xv = xv;
5069 vb.vb_bh = xs->bucket->bu_bhs[block_off];
4817 ret = __ocfs2_xattr_set_value_outside(inode, handle, 5070 ret = __ocfs2_xattr_set_value_outside(inode, handle,
4818 xv, val, value_len); 5071 &vb, val, value_len);
4819 if (ret) 5072 if (ret)
4820 mlog_errno(ret); 5073 mlog_errno(ret);
4821out: 5074out:
@@ -4826,7 +5079,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4826 struct buffer_head *root_bh, 5079 struct buffer_head *root_bh,
4827 u64 blkno, 5080 u64 blkno,
4828 u32 cpos, 5081 u32 cpos,
4829 u32 len) 5082 u32 len,
5083 void *para)
4830{ 5084{
4831 int ret; 5085 int ret;
4832 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5086 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -4838,14 +5092,22 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4838 struct ocfs2_cached_dealloc_ctxt dealloc; 5092 struct ocfs2_cached_dealloc_ctxt dealloc;
4839 struct ocfs2_extent_tree et; 5093 struct ocfs2_extent_tree et;
4840 5094
4841 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 5095 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5096 ocfs2_delete_xattr_in_bucket, para);
5097 if (ret) {
5098 mlog_errno(ret);
5099 return ret;
5100 }
5101
5102 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4842 5103
4843 ocfs2_init_dealloc_ctxt(&dealloc); 5104 ocfs2_init_dealloc_ctxt(&dealloc);
4844 5105
4845 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", 5106 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4846 cpos, len, (unsigned long long)blkno); 5107 cpos, len, (unsigned long long)blkno);
4847 5108
4848 ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len); 5109 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5110 len);
4849 5111
4850 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5112 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4851 if (ret) { 5113 if (ret) {
@@ -4870,14 +5132,14 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4870 goto out; 5132 goto out;
4871 } 5133 }
4872 5134
4873 ret = ocfs2_journal_access_xb(handle, inode, root_bh, 5135 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4874 OCFS2_JOURNAL_ACCESS_WRITE); 5136 OCFS2_JOURNAL_ACCESS_WRITE);
4875 if (ret) { 5137 if (ret) {
4876 mlog_errno(ret); 5138 mlog_errno(ret);
4877 goto out_commit; 5139 goto out_commit;
4878 } 5140 }
4879 5141
4880 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, 5142 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
4881 &dealloc); 5143 &dealloc);
4882 if (ret) { 5144 if (ret) {
4883 mlog_errno(ret); 5145 mlog_errno(ret);
@@ -5220,7 +5482,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5220 struct ocfs2_xattr_bucket *bucket, 5482 struct ocfs2_xattr_bucket *bucket,
5221 void *para) 5483 void *para)
5222{ 5484{
5223 int ret = 0; 5485 int ret = 0, ref_credits;
5224 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5486 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5225 u16 i; 5487 u16 i;
5226 struct ocfs2_xattr_entry *xe; 5488 struct ocfs2_xattr_entry *xe;
@@ -5228,7 +5490,9 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5228 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5490 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5229 int credits = ocfs2_remove_extent_credits(osb->sb) + 5491 int credits = ocfs2_remove_extent_credits(osb->sb) +
5230 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5492 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5231 5493 struct ocfs2_xattr_value_root *xv;
5494 struct ocfs2_rm_xattr_bucket_para *args =
5495 (struct ocfs2_rm_xattr_bucket_para *)para;
5232 5496
5233 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5497 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5234 5498
@@ -5237,7 +5501,16 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5237 if (ocfs2_xattr_is_local(xe)) 5501 if (ocfs2_xattr_is_local(xe))
5238 continue; 5502 continue;
5239 5503
5240 ctxt.handle = ocfs2_start_trans(osb, credits); 5504 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5505 i, &xv, NULL);
5506
5507 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5508 args->ref_ci,
5509 args->ref_root_bh,
5510 &ctxt.meta_ac,
5511 &ref_credits);
5512
5513 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5241 if (IS_ERR(ctxt.handle)) { 5514 if (IS_ERR(ctxt.handle)) {
5242 ret = PTR_ERR(ctxt.handle); 5515 ret = PTR_ERR(ctxt.handle);
5243 mlog_errno(ret); 5516 mlog_errno(ret);
@@ -5248,57 +5521,1439 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5248 i, 0, &ctxt); 5521 i, 0, &ctxt);
5249 5522
5250 ocfs2_commit_trans(osb, ctxt.handle); 5523 ocfs2_commit_trans(osb, ctxt.handle);
5524 if (ctxt.meta_ac) {
5525 ocfs2_free_alloc_context(ctxt.meta_ac);
5526 ctxt.meta_ac = NULL;
5527 }
5251 if (ret) { 5528 if (ret) {
5252 mlog_errno(ret); 5529 mlog_errno(ret);
5253 break; 5530 break;
5254 } 5531 }
5255 } 5532 }
5256 5533
5534 if (ctxt.meta_ac)
5535 ocfs2_free_alloc_context(ctxt.meta_ac);
5257 ocfs2_schedule_truncate_log_flush(osb, 1); 5536 ocfs2_schedule_truncate_log_flush(osb, 1);
5258 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5537 ocfs2_run_deallocs(osb, &ctxt.dealloc);
5259 return ret; 5538 return ret;
5260} 5539}
5261 5540
5262static int ocfs2_delete_xattr_index_block(struct inode *inode, 5541/*
5263 struct buffer_head *xb_bh) 5542 * Whenever we modify a xattr value root in the bucket(e.g, CoW
5543 * or change the extent record flag), we need to recalculate
5544 * the metaecc for the whole bucket. So it is done here.
5545 *
5546 * Note:
5547 * We have to give the extra credits for the caller.
5548 */
5549static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5550 handle_t *handle,
5551 void *para)
5552{
5553 int ret;
5554 struct ocfs2_xattr_bucket *bucket =
5555 (struct ocfs2_xattr_bucket *)para;
5556
5557 ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5558 OCFS2_JOURNAL_ACCESS_WRITE);
5559 if (ret) {
5560 mlog_errno(ret);
5561 return ret;
5562 }
5563
5564 ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5565
5566 return 0;
5567}
5568
5569/*
5570 * Special action we need if the xattr value is refcounted.
5571 *
5572 * 1. If the xattr is refcounted, lock the tree.
5573 * 2. CoW the xattr if we are setting the new value and the value
5574 * will be stored outside.
5575 * 3. In other case, decrease_refcount will work for us, so just
5576 * lock the refcount tree, calculate the meta and credits is OK.
5577 *
5578 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5579 * currently CoW is a completed transaction, while this function
5580 * will also lock the allocators and let us deadlock. So we will
5581 * CoW the whole xattr value.
5582 */
5583static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5584 struct ocfs2_dinode *di,
5585 struct ocfs2_xattr_info *xi,
5586 struct ocfs2_xattr_search *xis,
5587 struct ocfs2_xattr_search *xbs,
5588 struct ocfs2_refcount_tree **ref_tree,
5589 int *meta_add,
5590 int *credits)
5264{ 5591{
5265 struct ocfs2_xattr_block *xb =
5266 (struct ocfs2_xattr_block *)xb_bh->b_data;
5267 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5268 int ret = 0; 5592 int ret = 0;
5269 u32 name_hash = UINT_MAX, e_cpos, num_clusters; 5593 struct ocfs2_xattr_block *xb;
5270 u64 p_blkno; 5594 struct ocfs2_xattr_entry *xe;
5595 char *base;
5596 u32 p_cluster, num_clusters;
5597 unsigned int ext_flags;
5598 int name_offset, name_len;
5599 struct ocfs2_xattr_value_buf vb;
5600 struct ocfs2_xattr_bucket *bucket = NULL;
5601 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5602 struct ocfs2_post_refcount refcount;
5603 struct ocfs2_post_refcount *p = NULL;
5604 struct buffer_head *ref_root_bh = NULL;
5271 5605
5272 if (le16_to_cpu(el->l_next_free_rec) == 0) 5606 if (!xis->not_found) {
5273 return 0; 5607 xe = xis->here;
5608 name_offset = le16_to_cpu(xe->xe_name_offset);
5609 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5610 base = xis->base;
5611 vb.vb_bh = xis->inode_bh;
5612 vb.vb_access = ocfs2_journal_access_di;
5613 } else {
5614 int i, block_off = 0;
5615 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5616 xe = xbs->here;
5617 name_offset = le16_to_cpu(xe->xe_name_offset);
5618 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5619 i = xbs->here - xbs->header->xh_entries;
5274 5620
5275 while (name_hash > 0) { 5621 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5276 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 5622 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5277 &e_cpos, &num_clusters, el); 5623 bucket_xh(xbs->bucket),
5624 i, &block_off,
5625 &name_offset);
5626 if (ret) {
5627 mlog_errno(ret);
5628 goto out;
5629 }
5630 base = bucket_block(xbs->bucket, block_off);
5631 vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5632 vb.vb_access = ocfs2_journal_access;
5633
5634 if (ocfs2_meta_ecc(osb)) {
5635 /*create parameters for ocfs2_post_refcount. */
5636 bucket = xbs->bucket;
5637 refcount.credits = bucket->bu_blocks;
5638 refcount.para = bucket;
5639 refcount.func =
5640 ocfs2_xattr_bucket_post_refcount;
5641 p = &refcount;
5642 }
5643 } else {
5644 base = xbs->base;
5645 vb.vb_bh = xbs->xattr_bh;
5646 vb.vb_access = ocfs2_journal_access_xb;
5647 }
5648 }
5649
5650 if (ocfs2_xattr_is_local(xe))
5651 goto out;
5652
5653 vb.vb_xv = (struct ocfs2_xattr_value_root *)
5654 (base + name_offset + name_len);
5655
5656 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5657 &num_clusters, &vb.vb_xv->xr_list,
5658 &ext_flags);
5659 if (ret) {
5660 mlog_errno(ret);
5661 goto out;
5662 }
5663
5664 /*
5665 * We just need to check the 1st extent record, since we always
5666 * CoW the whole xattr. So there shouldn't be a xattr with
5667 * some REFCOUNT extent recs after the 1st one.
5668 */
5669 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5670 goto out;
5671
5672 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5673 1, ref_tree, &ref_root_bh);
5674 if (ret) {
5675 mlog_errno(ret);
5676 goto out;
5677 }
5678
5679 /*
5680 * If we are deleting the xattr or the new size will be stored inside,
5681 * cool, leave it there, the xattr truncate process will remove them
5682 * for us(it still needs the refcount tree lock and the meta, credits).
5683 * And the worse case is that every cluster truncate will split the
5684 * refcount tree, and make the original extent become 3. So we will need
5685 * 2 * cluster more extent recs at most.
5686 */
5687 if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) {
5688
5689 ret = ocfs2_refcounted_xattr_delete_need(inode,
5690 &(*ref_tree)->rf_ci,
5691 ref_root_bh, vb.vb_xv,
5692 meta_add, credits);
5693 if (ret)
5694 mlog_errno(ret);
5695 goto out;
5696 }
5697
5698 ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5699 *ref_tree, ref_root_bh, 0,
5700 le32_to_cpu(vb.vb_xv->xr_clusters), p);
5701 if (ret)
5702 mlog_errno(ret);
5703
5704out:
5705 brelse(ref_root_bh);
5706 return ret;
5707}
5708
5709/*
5710 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5711 * The physical clusters will be added to refcount tree.
5712 */
5713static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5714 struct ocfs2_xattr_value_root *xv,
5715 struct ocfs2_extent_tree *value_et,
5716 struct ocfs2_caching_info *ref_ci,
5717 struct buffer_head *ref_root_bh,
5718 struct ocfs2_cached_dealloc_ctxt *dealloc,
5719 struct ocfs2_post_refcount *refcount)
5720{
5721 int ret = 0;
5722 u32 clusters = le32_to_cpu(xv->xr_clusters);
5723 u32 cpos, p_cluster, num_clusters;
5724 struct ocfs2_extent_list *el = &xv->xr_list;
5725 unsigned int ext_flags;
5726
5727 cpos = 0;
5728 while (cpos < clusters) {
5729 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5730 &num_clusters, el, &ext_flags);
5731
5732 cpos += num_clusters;
5733 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5734 continue;
5735
5736 BUG_ON(!p_cluster);
5737
5738 ret = ocfs2_add_refcount_flag(inode, value_et,
5739 ref_ci, ref_root_bh,
5740 cpos - num_clusters,
5741 p_cluster, num_clusters,
5742 dealloc, refcount);
5743 if (ret) {
5744 mlog_errno(ret);
5745 break;
5746 }
5747 }
5748
5749 return ret;
5750}
5751
5752/*
5753 * Given a normal ocfs2_xattr_header, refcount all the entries which
5754 * have value stored outside.
5755 * Used for xattrs stored in inode and ocfs2_xattr_block.
5756 */
5757static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5758 struct ocfs2_xattr_value_buf *vb,
5759 struct ocfs2_xattr_header *header,
5760 struct ocfs2_caching_info *ref_ci,
5761 struct buffer_head *ref_root_bh,
5762 struct ocfs2_cached_dealloc_ctxt *dealloc)
5763{
5764
5765 struct ocfs2_xattr_entry *xe;
5766 struct ocfs2_xattr_value_root *xv;
5767 struct ocfs2_extent_tree et;
5768 int i, ret = 0;
5769
5770 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5771 xe = &header->xh_entries[i];
5772
5773 if (ocfs2_xattr_is_local(xe))
5774 continue;
5775
5776 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5777 le16_to_cpu(xe->xe_name_offset) +
5778 OCFS2_XATTR_SIZE(xe->xe_name_len));
5779
5780 vb->vb_xv = xv;
5781 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5782
5783 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5784 ref_ci, ref_root_bh,
5785 dealloc, NULL);
5786 if (ret) {
5787 mlog_errno(ret);
5788 break;
5789 }
5790 }
5791
5792 return ret;
5793}
5794
5795static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5796 struct buffer_head *fe_bh,
5797 struct ocfs2_caching_info *ref_ci,
5798 struct buffer_head *ref_root_bh,
5799 struct ocfs2_cached_dealloc_ctxt *dealloc)
5800{
5801 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5802 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5803 (fe_bh->b_data + inode->i_sb->s_blocksize -
5804 le16_to_cpu(di->i_xattr_inline_size));
5805 struct ocfs2_xattr_value_buf vb = {
5806 .vb_bh = fe_bh,
5807 .vb_access = ocfs2_journal_access_di,
5808 };
5809
5810 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5811 ref_ci, ref_root_bh, dealloc);
5812}
5813
5814struct ocfs2_xattr_tree_value_refcount_para {
5815 struct ocfs2_caching_info *ref_ci;
5816 struct buffer_head *ref_root_bh;
5817 struct ocfs2_cached_dealloc_ctxt *dealloc;
5818};
5819
5820static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5821 struct ocfs2_xattr_bucket *bucket,
5822 int offset,
5823 struct ocfs2_xattr_value_root **xv,
5824 struct buffer_head **bh)
5825{
5826 int ret, block_off, name_offset;
5827 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5828 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5829 void *base;
5830
5831 ret = ocfs2_xattr_bucket_get_name_value(sb,
5832 bucket_xh(bucket),
5833 offset,
5834 &block_off,
5835 &name_offset);
5836 if (ret) {
5837 mlog_errno(ret);
5838 goto out;
5839 }
5840
5841 base = bucket_block(bucket, block_off);
5842
5843 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
5844 OCFS2_XATTR_SIZE(xe->xe_name_len));
5845
5846 if (bh)
5847 *bh = bucket->bu_bhs[block_off];
5848out:
5849 return ret;
5850}
5851
5852/*
5853 * For a given xattr bucket, refcount all the entries which
5854 * have value stored outside.
5855 */
5856static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
5857 struct ocfs2_xattr_bucket *bucket,
5858 void *para)
5859{
5860 int i, ret = 0;
5861 struct ocfs2_extent_tree et;
5862 struct ocfs2_xattr_tree_value_refcount_para *ref =
5863 (struct ocfs2_xattr_tree_value_refcount_para *)para;
5864 struct ocfs2_xattr_header *xh =
5865 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
5866 struct ocfs2_xattr_entry *xe;
5867 struct ocfs2_xattr_value_buf vb = {
5868 .vb_access = ocfs2_journal_access,
5869 };
5870 struct ocfs2_post_refcount refcount = {
5871 .credits = bucket->bu_blocks,
5872 .para = bucket,
5873 .func = ocfs2_xattr_bucket_post_refcount,
5874 };
5875 struct ocfs2_post_refcount *p = NULL;
5876
5877 /* We only need post_refcount if we support metaecc. */
5878 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
5879 p = &refcount;
5880
5881 mlog(0, "refcount bucket %llu, count = %u\n",
5882 (unsigned long long)bucket_blkno(bucket),
5883 le16_to_cpu(xh->xh_count));
5884 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5885 xe = &xh->xh_entries[i];
5886
5887 if (ocfs2_xattr_is_local(xe))
5888 continue;
5889
5890 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
5891 &vb.vb_xv, &vb.vb_bh);
5892 if (ret) {
5893 mlog_errno(ret);
5894 break;
5895 }
5896
5897 ocfs2_init_xattr_value_extent_tree(&et,
5898 INODE_CACHE(inode), &vb);
5899
5900 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
5901 &et, ref->ref_ci,
5902 ref->ref_root_bh,
5903 ref->dealloc, p);
5904 if (ret) {
5905 mlog_errno(ret);
5906 break;
5907 }
5908 }
5909
5910 return ret;
5911
5912}
5913
5914static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
5915 struct buffer_head *root_bh,
5916 u64 blkno, u32 cpos, u32 len, void *para)
5917{
5918 return ocfs2_iterate_xattr_buckets(inode, blkno, len,
5919 ocfs2_xattr_bucket_value_refcount,
5920 para);
5921}
5922
5923static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
5924 struct buffer_head *blk_bh,
5925 struct ocfs2_caching_info *ref_ci,
5926 struct buffer_head *ref_root_bh,
5927 struct ocfs2_cached_dealloc_ctxt *dealloc)
5928{
5929 int ret = 0;
5930 struct ocfs2_xattr_block *xb =
5931 (struct ocfs2_xattr_block *)blk_bh->b_data;
5932
5933 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
5934 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
5935 struct ocfs2_xattr_value_buf vb = {
5936 .vb_bh = blk_bh,
5937 .vb_access = ocfs2_journal_access_xb,
5938 };
5939
5940 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5941 ref_ci, ref_root_bh,
5942 dealloc);
5943 } else {
5944 struct ocfs2_xattr_tree_value_refcount_para para = {
5945 .ref_ci = ref_ci,
5946 .ref_root_bh = ref_root_bh,
5947 .dealloc = dealloc,
5948 };
5949
5950 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
5951 ocfs2_refcount_xattr_tree_rec,
5952 &para);
5953 }
5954
5955 return ret;
5956}
5957
5958int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
5959 struct buffer_head *fe_bh,
5960 struct ocfs2_caching_info *ref_ci,
5961 struct buffer_head *ref_root_bh,
5962 struct ocfs2_cached_dealloc_ctxt *dealloc)
5963{
5964 int ret = 0;
5965 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5966 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5967 struct buffer_head *blk_bh = NULL;
5968
5969 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
5970 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
5971 ref_ci, ref_root_bh,
5972 dealloc);
5278 if (ret) { 5973 if (ret) {
5279 mlog_errno(ret); 5974 mlog_errno(ret);
5280 goto out; 5975 goto out;
5281 } 5976 }
5977 }
5978
5979 if (!di->i_xattr_loc)
5980 goto out;
5981
5982 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
5983 &blk_bh);
5984 if (ret < 0) {
5985 mlog_errno(ret);
5986 goto out;
5987 }
5988
5989 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
5990 ref_root_bh, dealloc);
5991 if (ret)
5992 mlog_errno(ret);
5993
5994 brelse(blk_bh);
5995out:
5996
5997 return ret;
5998}
5999
6000typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6001/*
6002 * Store the information we need in xattr reflink.
6003 * old_bh and new_bh are inode bh for the old and new inode.
6004 */
6005struct ocfs2_xattr_reflink {
6006 struct inode *old_inode;
6007 struct inode *new_inode;
6008 struct buffer_head *old_bh;
6009 struct buffer_head *new_bh;
6010 struct ocfs2_caching_info *ref_ci;
6011 struct buffer_head *ref_root_bh;
6012 struct ocfs2_cached_dealloc_ctxt *dealloc;
6013 should_xattr_reflinked *xattr_reflinked;
6014};
6015
6016/*
6017 * Given a xattr header and xe offset,
6018 * return the proper xv and the corresponding bh.
6019 * xattr in inode, block and xattr tree have different implementaions.
6020 */
6021typedef int (get_xattr_value_root)(struct super_block *sb,
6022 struct buffer_head *bh,
6023 struct ocfs2_xattr_header *xh,
6024 int offset,
6025 struct ocfs2_xattr_value_root **xv,
6026 struct buffer_head **ret_bh,
6027 void *para);
6028
6029/*
6030 * Calculate all the xattr value root metadata stored in this xattr header and
6031 * credits we need if we create them from the scratch.
6032 * We use get_xattr_value_root so that all types of xattr container can use it.
6033 */
6034static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6035 struct buffer_head *bh,
6036 struct ocfs2_xattr_header *xh,
6037 int *metas, int *credits,
6038 int *num_recs,
6039 get_xattr_value_root *func,
6040 void *para)
6041{
6042 int i, ret = 0;
6043 struct ocfs2_xattr_value_root *xv;
6044 struct ocfs2_xattr_entry *xe;
6045
6046 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6047 xe = &xh->xh_entries[i];
6048 if (ocfs2_xattr_is_local(xe))
6049 continue;
6050
6051 ret = func(sb, bh, xh, i, &xv, NULL, para);
6052 if (ret) {
6053 mlog_errno(ret);
6054 break;
6055 }
6056
6057 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6058 le16_to_cpu(xv->xr_list.l_next_free_rec);
6059
6060 *credits += ocfs2_calc_extend_credits(sb,
6061 &def_xv.xv.xr_list,
6062 le32_to_cpu(xv->xr_clusters));
6063
6064 /*
6065 * If the value is a tree with depth > 1, We don't go deep
6066 * to the extent block, so just calculate a maximum record num.
6067 */
6068 if (!xv->xr_list.l_tree_depth)
6069 *num_recs += xv->xr_list.l_next_free_rec;
6070 else
6071 *num_recs += ocfs2_clusters_for_bytes(sb,
6072 XATTR_SIZE_MAX);
6073 }
6074
6075 return ret;
6076}
6077
6078/* Used by xattr inode and block to return the right xv and buffer_head. */
6079static int ocfs2_get_xattr_value_root(struct super_block *sb,
6080 struct buffer_head *bh,
6081 struct ocfs2_xattr_header *xh,
6082 int offset,
6083 struct ocfs2_xattr_value_root **xv,
6084 struct buffer_head **ret_bh,
6085 void *para)
6086{
6087 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6088
6089 *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6090 le16_to_cpu(xe->xe_name_offset) +
6091 OCFS2_XATTR_SIZE(xe->xe_name_len));
6092
6093 if (ret_bh)
6094 *ret_bh = bh;
6095
6096 return 0;
6097}
6098
6099/*
6100 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6101 * It is only used for inline xattr and xattr block.
6102 */
6103static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6104 struct ocfs2_xattr_header *xh,
6105 struct buffer_head *ref_root_bh,
6106 int *credits,
6107 struct ocfs2_alloc_context **meta_ac)
6108{
6109 int ret, meta_add = 0, num_recs = 0;
6110 struct ocfs2_refcount_block *rb =
6111 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6112
6113 *credits = 0;
6114
6115 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6116 &meta_add, credits, &num_recs,
6117 ocfs2_get_xattr_value_root,
6118 NULL);
6119 if (ret) {
6120 mlog_errno(ret);
6121 goto out;
6122 }
6123
6124 /*
6125 * We need to add/modify num_recs in refcount tree, so just calculate
6126 * an approximate number we need for refcount tree change.
6127 * Sometimes we need to split the tree, and after split, half recs
6128 * will be moved to the new block, and a new block can only provide
6129 * half number of recs. So we multiple new blocks by 2.
6130 */
6131 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6132 meta_add += num_recs;
6133 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6134 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6135 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6136 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6137 else
6138 *credits += 1;
6139
6140 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6141 if (ret)
6142 mlog_errno(ret);
6143
6144out:
6145 return ret;
6146}
5282 6147
5283 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, 6148/*
5284 ocfs2_delete_xattr_in_bucket, 6149 * Given a xattr header, reflink all the xattrs in this container.
5285 NULL); 6150 * It can be used for inode, block and bucket.
6151 *
6152 * NOTE:
6153 * Before we call this function, the caller has memcpy the xattr in
6154 * old_xh to the new_xh.
6155 *
6156 * If args.xattr_reflinked is set, call it to decide whether the xe should
6157 * be reflinked or not. If not, remove it from the new xattr header.
6158 */
6159static int ocfs2_reflink_xattr_header(handle_t *handle,
6160 struct ocfs2_xattr_reflink *args,
6161 struct buffer_head *old_bh,
6162 struct ocfs2_xattr_header *xh,
6163 struct buffer_head *new_bh,
6164 struct ocfs2_xattr_header *new_xh,
6165 struct ocfs2_xattr_value_buf *vb,
6166 struct ocfs2_alloc_context *meta_ac,
6167 get_xattr_value_root *func,
6168 void *para)
6169{
6170 int ret = 0, i, j;
6171 struct super_block *sb = args->old_inode->i_sb;
6172 struct buffer_head *value_bh;
6173 struct ocfs2_xattr_entry *xe, *last;
6174 struct ocfs2_xattr_value_root *xv, *new_xv;
6175 struct ocfs2_extent_tree data_et;
6176 u32 clusters, cpos, p_cluster, num_clusters;
6177 unsigned int ext_flags = 0;
6178
6179 mlog(0, "reflink xattr in container %llu, count = %u\n",
6180 (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6181
6182 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6183 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6184 xe = &xh->xh_entries[i];
6185
6186 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6187 xe = &new_xh->xh_entries[j];
6188
6189 le16_add_cpu(&new_xh->xh_count, -1);
6190 if (new_xh->xh_count) {
6191 memmove(xe, xe + 1,
6192 (void *)last - (void *)xe);
6193 memset(last, 0,
6194 sizeof(struct ocfs2_xattr_entry));
6195 }
6196
6197 /*
6198 * We don't want j to increase in the next round since
6199 * it is already moved ahead.
6200 */
6201 j--;
6202 continue;
6203 }
6204
6205 if (ocfs2_xattr_is_local(xe))
6206 continue;
6207
6208 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6209 if (ret) {
6210 mlog_errno(ret);
6211 break;
6212 }
6213
6214 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6215 if (ret) {
6216 mlog_errno(ret);
6217 break;
6218 }
6219
6220 /*
6221 * For the xattr which has l_tree_depth = 0, all the extent
6222 * recs have already be copied to the new xh with the
6223 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6224 * increase the refount count int the refcount tree.
6225 *
6226 * For the xattr which has l_tree_depth > 0, we need
6227 * to initialize it to the empty default value root,
6228 * and then insert the extents one by one.
6229 */
6230 if (xv->xr_list.l_tree_depth) {
6231 memcpy(new_xv, &def_xv, sizeof(def_xv));
6232 vb->vb_xv = new_xv;
6233 vb->vb_bh = value_bh;
6234 ocfs2_init_xattr_value_extent_tree(&data_et,
6235 INODE_CACHE(args->new_inode), vb);
6236 }
6237
6238 clusters = le32_to_cpu(xv->xr_clusters);
6239 cpos = 0;
6240 while (cpos < clusters) {
6241 ret = ocfs2_xattr_get_clusters(args->old_inode,
6242 cpos,
6243 &p_cluster,
6244 &num_clusters,
6245 &xv->xr_list,
6246 &ext_flags);
6247 if (ret) {
6248 mlog_errno(ret);
6249 goto out;
6250 }
6251
6252 BUG_ON(!p_cluster);
6253
6254 if (xv->xr_list.l_tree_depth) {
6255 ret = ocfs2_insert_extent(handle,
6256 &data_et, cpos,
6257 ocfs2_clusters_to_blocks(
6258 args->old_inode->i_sb,
6259 p_cluster),
6260 num_clusters, ext_flags,
6261 meta_ac);
6262 if (ret) {
6263 mlog_errno(ret);
6264 goto out;
6265 }
6266 }
6267
6268 ret = ocfs2_increase_refcount(handle, args->ref_ci,
6269 args->ref_root_bh,
6270 p_cluster, num_clusters,
6271 meta_ac, args->dealloc);
6272 if (ret) {
6273 mlog_errno(ret);
6274 goto out;
6275 }
6276
6277 cpos += num_clusters;
6278 }
6279 }
6280
6281out:
6282 return ret;
6283}
6284
6285static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6286{
6287 int ret = 0, credits = 0;
6288 handle_t *handle;
6289 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6290 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6291 int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6292 int header_off = osb->sb->s_blocksize - inline_size;
6293 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6294 (args->old_bh->b_data + header_off);
6295 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6296 (args->new_bh->b_data + header_off);
6297 struct ocfs2_alloc_context *meta_ac = NULL;
6298 struct ocfs2_inode_info *new_oi;
6299 struct ocfs2_dinode *new_di;
6300 struct ocfs2_xattr_value_buf vb = {
6301 .vb_bh = args->new_bh,
6302 .vb_access = ocfs2_journal_access_di,
6303 };
6304
6305 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6306 &credits, &meta_ac);
6307 if (ret) {
6308 mlog_errno(ret);
6309 goto out;
6310 }
6311
6312 handle = ocfs2_start_trans(osb, credits);
6313 if (IS_ERR(handle)) {
6314 ret = PTR_ERR(handle);
6315 mlog_errno(ret);
6316 goto out;
6317 }
6318
6319 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6320 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6321 if (ret) {
6322 mlog_errno(ret);
6323 goto out_commit;
6324 }
6325
6326 memcpy(args->new_bh->b_data + header_off,
6327 args->old_bh->b_data + header_off, inline_size);
6328
6329 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6330 new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6331
6332 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6333 args->new_bh, new_xh, &vb, meta_ac,
6334 ocfs2_get_xattr_value_root, NULL);
6335 if (ret) {
6336 mlog_errno(ret);
6337 goto out_commit;
6338 }
6339
6340 new_oi = OCFS2_I(args->new_inode);
6341 spin_lock(&new_oi->ip_lock);
6342 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6343 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6344 spin_unlock(&new_oi->ip_lock);
6345
6346 ocfs2_journal_dirty(handle, args->new_bh);
6347
6348out_commit:
6349 ocfs2_commit_trans(osb, handle);
6350
6351out:
6352 if (meta_ac)
6353 ocfs2_free_alloc_context(meta_ac);
6354 return ret;
6355}
6356
6357static int ocfs2_create_empty_xattr_block(struct inode *inode,
6358 struct buffer_head *fe_bh,
6359 struct buffer_head **ret_bh,
6360 int indexed)
6361{
6362 int ret;
6363 handle_t *handle;
6364 struct ocfs2_alloc_context *meta_ac;
6365 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6366
6367 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6368 if (ret < 0) {
6369 mlog_errno(ret);
6370 return ret;
6371 }
6372
6373 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6374 if (IS_ERR(handle)) {
6375 ret = PTR_ERR(handle);
6376 mlog_errno(ret);
6377 goto out;
6378 }
6379
6380 mlog(0, "create new xattr block for inode %llu, index = %d\n",
6381 (unsigned long long)fe_bh->b_blocknr, indexed);
6382 ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6383 meta_ac, ret_bh, indexed);
6384 if (ret)
6385 mlog_errno(ret);
6386
6387 ocfs2_commit_trans(osb, handle);
6388out:
6389 ocfs2_free_alloc_context(meta_ac);
6390 return ret;
6391}
6392
6393static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6394 struct buffer_head *blk_bh,
6395 struct buffer_head *new_blk_bh)
6396{
6397 int ret = 0, credits = 0;
6398 handle_t *handle;
6399 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6400 struct ocfs2_dinode *new_di;
6401 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6402 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6403 struct ocfs2_xattr_block *xb =
6404 (struct ocfs2_xattr_block *)blk_bh->b_data;
6405 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6406 struct ocfs2_xattr_block *new_xb =
6407 (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6408 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6409 struct ocfs2_alloc_context *meta_ac;
6410 struct ocfs2_xattr_value_buf vb = {
6411 .vb_bh = new_blk_bh,
6412 .vb_access = ocfs2_journal_access_xb,
6413 };
6414
6415 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6416 &credits, &meta_ac);
6417 if (ret) {
6418 mlog_errno(ret);
6419 return ret;
6420 }
6421
6422 /* One more credits in case we need to add xattr flags in new inode. */
6423 handle = ocfs2_start_trans(osb, credits + 1);
6424 if (IS_ERR(handle)) {
6425 ret = PTR_ERR(handle);
6426 mlog_errno(ret);
6427 goto out;
6428 }
6429
6430 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6431 ret = ocfs2_journal_access_di(handle,
6432 INODE_CACHE(args->new_inode),
6433 args->new_bh,
6434 OCFS2_JOURNAL_ACCESS_WRITE);
6435 if (ret) {
6436 mlog_errno(ret);
6437 goto out_commit;
6438 }
6439 }
6440
6441 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6442 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6443 if (ret) {
6444 mlog_errno(ret);
6445 goto out_commit;
6446 }
6447
6448 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6449 osb->sb->s_blocksize - header_off);
6450
6451 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6452 new_blk_bh, new_xh, &vb, meta_ac,
6453 ocfs2_get_xattr_value_root, NULL);
6454 if (ret) {
6455 mlog_errno(ret);
6456 goto out_commit;
6457 }
6458
6459 ocfs2_journal_dirty(handle, new_blk_bh);
6460
6461 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6462 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6463 spin_lock(&new_oi->ip_lock);
6464 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6465 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6466 spin_unlock(&new_oi->ip_lock);
6467
6468 ocfs2_journal_dirty(handle, args->new_bh);
6469 }
6470
6471out_commit:
6472 ocfs2_commit_trans(osb, handle);
6473
6474out:
6475 ocfs2_free_alloc_context(meta_ac);
6476 return ret;
6477}
6478
6479struct ocfs2_reflink_xattr_tree_args {
6480 struct ocfs2_xattr_reflink *reflink;
6481 struct buffer_head *old_blk_bh;
6482 struct buffer_head *new_blk_bh;
6483 struct ocfs2_xattr_bucket *old_bucket;
6484 struct ocfs2_xattr_bucket *new_bucket;
6485};
6486
6487/*
6488 * NOTE:
6489 * We have to handle the case that both old bucket and new bucket
6490 * will call this function to get the right ret_bh.
6491 * So The caller must give us the right bh.
6492 */
6493static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6494 struct buffer_head *bh,
6495 struct ocfs2_xattr_header *xh,
6496 int offset,
6497 struct ocfs2_xattr_value_root **xv,
6498 struct buffer_head **ret_bh,
6499 void *para)
6500{
6501 struct ocfs2_reflink_xattr_tree_args *args =
6502 (struct ocfs2_reflink_xattr_tree_args *)para;
6503 struct ocfs2_xattr_bucket *bucket;
6504
6505 if (bh == args->old_bucket->bu_bhs[0])
6506 bucket = args->old_bucket;
6507 else
6508 bucket = args->new_bucket;
6509
6510 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6511 xv, ret_bh);
6512}
6513
6514struct ocfs2_value_tree_metas {
6515 int num_metas;
6516 int credits;
6517 int num_recs;
6518};
6519
6520static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6521 struct buffer_head *bh,
6522 struct ocfs2_xattr_header *xh,
6523 int offset,
6524 struct ocfs2_xattr_value_root **xv,
6525 struct buffer_head **ret_bh,
6526 void *para)
6527{
6528 struct ocfs2_xattr_bucket *bucket =
6529 (struct ocfs2_xattr_bucket *)para;
6530
6531 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6532 xv, ret_bh);
6533}
6534
6535static int ocfs2_calc_value_tree_metas(struct inode *inode,
6536 struct ocfs2_xattr_bucket *bucket,
6537 void *para)
6538{
6539 struct ocfs2_value_tree_metas *metas =
6540 (struct ocfs2_value_tree_metas *)para;
6541 struct ocfs2_xattr_header *xh =
6542 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6543
6544 /* Add the credits for this bucket first. */
6545 metas->credits += bucket->bu_blocks;
6546 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6547 xh, &metas->num_metas,
6548 &metas->credits, &metas->num_recs,
6549 ocfs2_value_tree_metas_in_bucket,
6550 bucket);
6551}
6552
6553/*
6554 * Given a xattr extent rec starting from blkno and having len clusters,
6555 * iterate all the buckets calculate how much metadata we need for reflinking
6556 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6557 */
6558static int ocfs2_lock_reflink_xattr_rec_allocators(
6559 struct ocfs2_reflink_xattr_tree_args *args,
6560 struct ocfs2_extent_tree *xt_et,
6561 u64 blkno, u32 len, int *credits,
6562 struct ocfs2_alloc_context **meta_ac,
6563 struct ocfs2_alloc_context **data_ac)
6564{
6565 int ret, num_free_extents;
6566 struct ocfs2_value_tree_metas metas;
6567 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6568 struct ocfs2_refcount_block *rb;
6569
6570 memset(&metas, 0, sizeof(metas));
6571
6572 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6573 ocfs2_calc_value_tree_metas, &metas);
6574 if (ret) {
6575 mlog_errno(ret);
6576 goto out;
6577 }
6578
6579 *credits = metas.credits;
6580
6581 /*
6582 * Calculate we need for refcount tree change.
6583 *
6584 * We need to add/modify num_recs in refcount tree, so just calculate
6585 * an approximate number we need for refcount tree change.
6586 * Sometimes we need to split the tree, and after split, half recs
6587 * will be moved to the new block, and a new block can only provide
6588 * half number of recs. So we multiple new blocks by 2.
6589 * In the end, we have to add credits for modifying the already
6590 * existed refcount block.
6591 */
6592 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6593 metas.num_recs =
6594 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6595 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6596 metas.num_metas += metas.num_recs;
6597 *credits += metas.num_recs +
6598 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6599 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6600 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6601 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6602 else
6603 *credits += 1;
6604
6605 /* count in the xattr tree change. */
6606 num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6607 if (num_free_extents < 0) {
6608 ret = num_free_extents;
6609 mlog_errno(ret);
6610 goto out;
6611 }
6612
6613 if (num_free_extents < len)
6614 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6615
6616 *credits += ocfs2_calc_extend_credits(osb->sb,
6617 xt_et->et_root_el, len);
6618
6619 if (metas.num_metas) {
6620 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6621 meta_ac);
5286 if (ret) { 6622 if (ret) {
5287 mlog_errno(ret); 6623 mlog_errno(ret);
5288 goto out; 6624 goto out;
5289 } 6625 }
6626 }
5290 6627
5291 ret = ocfs2_rm_xattr_cluster(inode, xb_bh, 6628 if (len) {
5292 p_blkno, e_cpos, num_clusters); 6629 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6630 if (ret)
6631 mlog_errno(ret);
6632 }
6633out:
6634 if (ret) {
6635 if (*meta_ac) {
6636 ocfs2_free_alloc_context(*meta_ac);
6637 meta_ac = NULL;
6638 }
6639 }
6640
6641 return ret;
6642}
6643
6644static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6645 u64 blkno, u64 new_blkno, u32 clusters,
6646 struct ocfs2_alloc_context *meta_ac,
6647 struct ocfs2_alloc_context *data_ac,
6648 struct ocfs2_reflink_xattr_tree_args *args)
6649{
6650 int i, j, ret = 0;
6651 struct super_block *sb = args->reflink->old_inode->i_sb;
6652 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6653 u32 num_buckets = clusters * bpc;
6654 int bpb = args->old_bucket->bu_blocks;
6655 struct ocfs2_xattr_value_buf vb = {
6656 .vb_access = ocfs2_journal_access,
6657 };
6658
6659 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6660 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
5293 if (ret) { 6661 if (ret) {
5294 mlog_errno(ret); 6662 mlog_errno(ret);
5295 break; 6663 break;
5296 } 6664 }
5297 6665
5298 if (e_cpos == 0) 6666 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6667 if (ret) {
6668 mlog_errno(ret);
5299 break; 6669 break;
6670 }
5300 6671
5301 name_hash = e_cpos - 1; 6672 /*
6673 * The real bucket num in this series of blocks is stored
6674 * in the 1st bucket.
6675 */
6676 if (i == 0)
6677 num_buckets = le16_to_cpu(
6678 bucket_xh(args->old_bucket)->xh_num_buckets);
6679
6680 ret = ocfs2_xattr_bucket_journal_access(handle,
6681 args->new_bucket,
6682 OCFS2_JOURNAL_ACCESS_CREATE);
6683 if (ret) {
6684 mlog_errno(ret);
6685 break;
6686 }
6687
6688 for (j = 0; j < bpb; j++)
6689 memcpy(bucket_block(args->new_bucket, j),
6690 bucket_block(args->old_bucket, j),
6691 sb->s_blocksize);
6692
6693 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6694
6695 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6696 args->old_bucket->bu_bhs[0],
6697 bucket_xh(args->old_bucket),
6698 args->new_bucket->bu_bhs[0],
6699 bucket_xh(args->new_bucket),
6700 &vb, meta_ac,
6701 ocfs2_get_reflink_xattr_value_root,
6702 args);
6703 if (ret) {
6704 mlog_errno(ret);
6705 break;
6706 }
6707
6708 /*
6709 * Re-access and dirty the bucket to calculate metaecc.
6710 * Because we may extend the transaction in reflink_xattr_header
6711 * which will let the already accessed block gone.
6712 */
6713 ret = ocfs2_xattr_bucket_journal_access(handle,
6714 args->new_bucket,
6715 OCFS2_JOURNAL_ACCESS_WRITE);
6716 if (ret) {
6717 mlog_errno(ret);
6718 break;
6719 }
6720
6721 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6722 ocfs2_xattr_bucket_relse(args->old_bucket);
6723 ocfs2_xattr_bucket_relse(args->new_bucket);
6724 }
6725
6726 ocfs2_xattr_bucket_relse(args->old_bucket);
6727 ocfs2_xattr_bucket_relse(args->new_bucket);
6728 return ret;
6729}
6730/*
6731 * Create the same xattr extent record in the new inode's xattr tree.
6732 */
6733static int ocfs2_reflink_xattr_rec(struct inode *inode,
6734 struct buffer_head *root_bh,
6735 u64 blkno,
6736 u32 cpos,
6737 u32 len,
6738 void *para)
6739{
6740 int ret, credits = 0;
6741 u32 p_cluster, num_clusters;
6742 u64 new_blkno;
6743 handle_t *handle;
6744 struct ocfs2_reflink_xattr_tree_args *args =
6745 (struct ocfs2_reflink_xattr_tree_args *)para;
6746 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6747 struct ocfs2_alloc_context *meta_ac = NULL;
6748 struct ocfs2_alloc_context *data_ac = NULL;
6749 struct ocfs2_extent_tree et;
6750
6751 ocfs2_init_xattr_tree_extent_tree(&et,
6752 INODE_CACHE(args->reflink->new_inode),
6753 args->new_blk_bh);
6754
6755 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6756 len, &credits,
6757 &meta_ac, &data_ac);
6758 if (ret) {
6759 mlog_errno(ret);
6760 goto out;
6761 }
6762
6763 handle = ocfs2_start_trans(osb, credits);
6764 if (IS_ERR(handle)) {
6765 ret = PTR_ERR(handle);
6766 mlog_errno(ret);
6767 goto out;
6768 }
6769
6770 ret = ocfs2_claim_clusters(osb, handle, data_ac,
6771 len, &p_cluster, &num_clusters);
6772 if (ret) {
6773 mlog_errno(ret);
6774 goto out_commit;
6775 }
6776
6777 new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6778
6779 mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6780 (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6781 ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6782 meta_ac, data_ac, args);
6783 if (ret) {
6784 mlog_errno(ret);
6785 goto out_commit;
6786 }
6787
6788 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6789 (unsigned long long)new_blkno, len, cpos);
6790 ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6791 len, 0, meta_ac);
6792 if (ret)
6793 mlog_errno(ret);
6794
6795out_commit:
6796 ocfs2_commit_trans(osb, handle);
6797
6798out:
6799 if (meta_ac)
6800 ocfs2_free_alloc_context(meta_ac);
6801 if (data_ac)
6802 ocfs2_free_alloc_context(data_ac);
6803 return ret;
6804}
6805
6806/*
6807 * Create reflinked xattr buckets.
6808 * We will add bucket one by one, and refcount all the xattrs in the bucket
6809 * if they are stored outside.
6810 */
6811static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6812 struct buffer_head *blk_bh,
6813 struct buffer_head *new_blk_bh)
6814{
6815 int ret;
6816 struct ocfs2_reflink_xattr_tree_args para;
6817
6818 memset(&para, 0, sizeof(para));
6819 para.reflink = args;
6820 para.old_blk_bh = blk_bh;
6821 para.new_blk_bh = new_blk_bh;
6822
6823 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6824 if (!para.old_bucket) {
6825 mlog_errno(-ENOMEM);
6826 return -ENOMEM;
6827 }
6828
6829 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6830 if (!para.new_bucket) {
6831 ret = -ENOMEM;
6832 mlog_errno(ret);
6833 goto out;
6834 }
6835
6836 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
6837 ocfs2_reflink_xattr_rec,
6838 &para);
6839 if (ret)
6840 mlog_errno(ret);
6841
6842out:
6843 ocfs2_xattr_bucket_free(para.old_bucket);
6844 ocfs2_xattr_bucket_free(para.new_bucket);
6845 return ret;
6846}
6847
6848static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
6849 struct buffer_head *blk_bh)
6850{
6851 int ret, indexed = 0;
6852 struct buffer_head *new_blk_bh = NULL;
6853 struct ocfs2_xattr_block *xb =
6854 (struct ocfs2_xattr_block *)blk_bh->b_data;
6855
6856
6857 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
6858 indexed = 1;
6859
6860 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
6861 &new_blk_bh, indexed);
6862 if (ret) {
6863 mlog_errno(ret);
6864 goto out;
6865 }
6866
6867 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
6868 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
6869 else
6870 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
6871 if (ret)
6872 mlog_errno(ret);
6873
6874out:
6875 brelse(new_blk_bh);
6876 return ret;
6877}
6878
6879static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
6880{
6881 int type = ocfs2_xattr_get_type(xe);
6882
6883 return type != OCFS2_XATTR_INDEX_SECURITY &&
6884 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
6885 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
6886}
6887
6888int ocfs2_reflink_xattrs(struct inode *old_inode,
6889 struct buffer_head *old_bh,
6890 struct inode *new_inode,
6891 struct buffer_head *new_bh,
6892 bool preserve_security)
6893{
6894 int ret;
6895 struct ocfs2_xattr_reflink args;
6896 struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
6897 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
6898 struct buffer_head *blk_bh = NULL;
6899 struct ocfs2_cached_dealloc_ctxt dealloc;
6900 struct ocfs2_refcount_tree *ref_tree;
6901 struct buffer_head *ref_root_bh = NULL;
6902
6903 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6904 le64_to_cpu(di->i_refcount_loc),
6905 1, &ref_tree, &ref_root_bh);
6906 if (ret) {
6907 mlog_errno(ret);
6908 goto out;
6909 }
6910
6911 ocfs2_init_dealloc_ctxt(&dealloc);
6912
6913 args.old_inode = old_inode;
6914 args.new_inode = new_inode;
6915 args.old_bh = old_bh;
6916 args.new_bh = new_bh;
6917 args.ref_ci = &ref_tree->rf_ci;
6918 args.ref_root_bh = ref_root_bh;
6919 args.dealloc = &dealloc;
6920 if (preserve_security)
6921 args.xattr_reflinked = NULL;
6922 else
6923 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
6924
6925 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6926 ret = ocfs2_reflink_xattr_inline(&args);
6927 if (ret) {
6928 mlog_errno(ret);
6929 goto out_unlock;
6930 }
6931 }
6932
6933 if (!di->i_xattr_loc)
6934 goto out_unlock;
6935
6936 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
6937 &blk_bh);
6938 if (ret < 0) {
6939 mlog_errno(ret);
6940 goto out_unlock;
6941 }
6942
6943 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
6944 if (ret)
6945 mlog_errno(ret);
6946
6947 brelse(blk_bh);
6948
6949out_unlock:
6950 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6951 ref_tree, 1);
6952 brelse(ref_root_bh);
6953
6954 if (ocfs2_dealloc_has_cluster(&dealloc)) {
6955 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
6956 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
5302 } 6957 }
5303 6958
5304out: 6959out:
@@ -5306,6 +6961,51 @@ out:
5306} 6961}
5307 6962
5308/* 6963/*
6964 * Initialize security and acl for a already created inode.
6965 * Used for reflink a non-preserve-security file.
6966 *
6967 * It uses common api like ocfs2_xattr_set, so the caller
6968 * must not hold any lock expect i_mutex.
6969 */
6970int ocfs2_init_security_and_acl(struct inode *dir,
6971 struct inode *inode)
6972{
6973 int ret = 0;
6974 struct buffer_head *dir_bh = NULL;
6975 struct ocfs2_security_xattr_info si = {
6976 .enable = 1,
6977 };
6978
6979 ret = ocfs2_init_security_get(inode, dir, &si);
6980 if (!ret) {
6981 ret = ocfs2_xattr_security_set(inode, si.name,
6982 si.value, si.value_len,
6983 XATTR_CREATE);
6984 if (ret) {
6985 mlog_errno(ret);
6986 goto leave;
6987 }
6988 } else if (ret != -EOPNOTSUPP) {
6989 mlog_errno(ret);
6990 goto leave;
6991 }
6992
6993 ret = ocfs2_inode_lock(dir, &dir_bh, 0);
6994 if (ret) {
6995 mlog_errno(ret);
6996 goto leave;
6997 }
6998
6999 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7000 if (ret)
7001 mlog_errno(ret);
7002
7003 ocfs2_inode_unlock(dir, 0);
7004 brelse(dir_bh);
7005leave:
7006 return ret;
7007}
7008/*
5309 * 'security' attributes support 7009 * 'security' attributes support
5310 */ 7010 */
5311static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, 7011static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1ca7e9a1b7bc..08e36389f56d 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -55,6 +55,8 @@ int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
55 int, const char *, const void *, size_t, int, 55 int, const char *, const void *, size_t, int,
56 struct ocfs2_alloc_context *, 56 struct ocfs2_alloc_context *,
57 struct ocfs2_alloc_context *); 57 struct ocfs2_alloc_context *);
58int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
59 struct ocfs2_dinode *di);
58int ocfs2_xattr_remove(struct inode *, struct buffer_head *); 60int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
59int ocfs2_init_security_get(struct inode *, struct inode *, 61int ocfs2_init_security_get(struct inode *, struct inode *,
60 struct ocfs2_security_xattr_info *); 62 struct ocfs2_security_xattr_info *);
@@ -83,5 +85,16 @@ struct ocfs2_xattr_value_buf {
83 struct ocfs2_xattr_value_root *vb_xv; 85 struct ocfs2_xattr_value_root *vb_xv;
84}; 86};
85 87
86 88int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
89 struct buffer_head *fe_bh,
90 struct ocfs2_caching_info *ref_ci,
91 struct buffer_head *ref_root_bh,
92 struct ocfs2_cached_dealloc_ctxt *dealloc);
93int ocfs2_reflink_xattrs(struct inode *old_inode,
94 struct buffer_head *old_bh,
95 struct inode *new_inode,
96 struct buffer_head *new_bh,
97 bool preserve_security);
98int ocfs2_init_security_and_acl(struct inode *dir,
99 struct inode *inode);
87#endif /* OCFS2_XATTR_H */ 100#endif /* OCFS2_XATTR_H */
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index c7275cfbdcfb..3680bae335b5 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -489,7 +489,7 @@ out:
489 return ret; 489 return ret;
490} 490}
491 491
492struct inode_operations omfs_dir_inops = { 492const struct inode_operations omfs_dir_inops = {
493 .lookup = omfs_lookup, 493 .lookup = omfs_lookup,
494 .mkdir = omfs_mkdir, 494 .mkdir = omfs_mkdir,
495 .rename = omfs_rename, 495 .rename = omfs_rename,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index d17e774eaf45..4845fbb18e6e 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -333,11 +333,11 @@ struct file_operations omfs_file_operations = {
333 .splice_read = generic_file_splice_read, 333 .splice_read = generic_file_splice_read,
334}; 334};
335 335
336struct inode_operations omfs_file_inops = { 336const struct inode_operations omfs_file_inops = {
337 .truncate = omfs_truncate 337 .truncate = omfs_truncate
338}; 338};
339 339
340struct address_space_operations omfs_aops = { 340const struct address_space_operations omfs_aops = {
341 .readpage = omfs_readpage, 341 .readpage = omfs_readpage,
342 .readpages = omfs_readpages, 342 .readpages = omfs_readpages,
343 .writepage = omfs_writepage, 343 .writepage = omfs_writepage,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 379ae5fb4411..f3b7c1541f3a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -278,7 +278,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
278 return 0; 278 return 0;
279} 279}
280 280
281static struct super_operations omfs_sops = { 281static const struct super_operations omfs_sops = {
282 .write_inode = omfs_write_inode, 282 .write_inode = omfs_write_inode,
283 .delete_inode = omfs_delete_inode, 283 .delete_inode = omfs_delete_inode,
284 .put_super = omfs_put_super, 284 .put_super = omfs_put_super,
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 2bc0f0670406..df71039945ac 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -45,15 +45,15 @@ extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
45 45
46/* dir.c */ 46/* dir.c */
47extern struct file_operations omfs_dir_operations; 47extern struct file_operations omfs_dir_operations;
48extern struct inode_operations omfs_dir_inops; 48extern const struct inode_operations omfs_dir_inops;
49extern int omfs_make_empty(struct inode *inode, struct super_block *sb); 49extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
50extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, 50extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
51 u64 fsblock); 51 u64 fsblock);
52 52
53/* file.c */ 53/* file.c */
54extern struct file_operations omfs_file_operations; 54extern struct file_operations omfs_file_operations;
55extern struct inode_operations omfs_file_inops; 55extern const struct inode_operations omfs_file_inops;
56extern struct address_space_operations omfs_aops; 56extern const struct address_space_operations omfs_aops;
57extern void omfs_make_empty_table(struct buffer_head *bh, int offset); 57extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
58extern int omfs_shrink_inode(struct inode *inode); 58extern int omfs_shrink_inode(struct inode *inode);
59 59
diff --git a/fs/open.c b/fs/open.c
index 31191bf513e4..4f01e06227c6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -290,10 +290,9 @@ out:
290 return error; 290 return error;
291} 291}
292 292
293SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length) 293SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
294{ 294{
295 /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ 295 return do_sys_truncate(path, length);
296 return do_sys_truncate(path, (long)length);
297} 296}
298 297
299static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 298static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index fbeaddf595d3..7b685e10cbad 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -581,7 +581,7 @@ try_scan:
581 } 581 }
582 582
583 if (from + size > get_capacity(disk)) { 583 if (from + size > get_capacity(disk)) {
584 struct block_device_operations *bdops = disk->fops; 584 const struct block_device_operations *bdops = disk->fops;
585 unsigned long long capacity; 585 unsigned long long capacity;
586 586
587 printk(KERN_WARNING 587 printk(KERN_WARNING
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 725a650bbbb8..0c6bc602e6c4 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -82,6 +82,7 @@
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/ptrace.h> 83#include <linux/ptrace.h>
84#include <linux/tracehook.h> 84#include <linux/tracehook.h>
85#include <linux/swapops.h>
85 86
86#include <asm/pgtable.h> 87#include <asm/pgtable.h>
87#include <asm/processor.h> 88#include <asm/processor.h>
@@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m,
321 p->nivcsw); 322 p->nivcsw);
322} 323}
323 324
325struct stack_stats {
326 struct vm_area_struct *vma;
327 unsigned long startpage;
328 unsigned long usage;
329};
330
331static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
332 unsigned long end, struct mm_walk *walk)
333{
334 struct stack_stats *ss = walk->private;
335 struct vm_area_struct *vma = ss->vma;
336 pte_t *pte, ptent;
337 spinlock_t *ptl;
338 int ret = 0;
339
340 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
341 for (; addr != end; pte++, addr += PAGE_SIZE) {
342 ptent = *pte;
343
344#ifdef CONFIG_STACK_GROWSUP
345 if (pte_present(ptent) || is_swap_pte(ptent))
346 ss->usage = addr - ss->startpage + PAGE_SIZE;
347#else
348 if (pte_present(ptent) || is_swap_pte(ptent)) {
349 ss->usage = ss->startpage - addr + PAGE_SIZE;
350 pte++;
351 ret = 1;
352 break;
353 }
354#endif
355 }
356 pte_unmap_unlock(pte - 1, ptl);
357 cond_resched();
358 return ret;
359}
360
361static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
362 struct task_struct *task)
363{
364 struct stack_stats ss;
365 struct mm_walk stack_walk = {
366 .pmd_entry = stack_usage_pte_range,
367 .mm = vma->vm_mm,
368 .private = &ss,
369 };
370
371 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
372 return 0;
373
374 ss.vma = vma;
375 ss.startpage = task->stack_start & PAGE_MASK;
376 ss.usage = 0;
377
378#ifdef CONFIG_STACK_GROWSUP
379 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
380 &stack_walk);
381#else
382 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
383 &stack_walk);
384#endif
385 return ss.usage;
386}
387
388static inline void task_show_stack_usage(struct seq_file *m,
389 struct task_struct *task)
390{
391 struct vm_area_struct *vma;
392 struct mm_struct *mm = get_task_mm(task);
393
394 if (mm) {
395 down_read(&mm->mmap_sem);
396 vma = find_vma(mm, task->stack_start);
397 if (vma)
398 seq_printf(m, "Stack usage:\t%lu kB\n",
399 get_stack_usage_in_bytes(vma, task) >> 10);
400
401 up_read(&mm->mmap_sem);
402 mmput(mm);
403 }
404}
405
324int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 406int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
325 struct pid *pid, struct task_struct *task) 407 struct pid *pid, struct task_struct *task)
326{ 408{
@@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
340 task_show_regs(m, task); 422 task_show_regs(m, task);
341#endif 423#endif
342 task_context_switch_counts(m, task); 424 task_context_switch_counts(m, task);
425 task_show_stack_usage(m, task);
343 return 0; 426 return 0;
344} 427}
345 428
@@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
481 rsslim, 564 rsslim,
482 mm ? mm->start_code : 0, 565 mm ? mm->start_code : 0,
483 mm ? mm->end_code : 0, 566 mm ? mm->end_code : 0,
484 (permitted && mm) ? mm->start_stack : 0, 567 (permitted) ? task->stack_start : 0,
485 esp, 568 esp,
486 eip, 569 eip,
487 /* The signal information here is obsolete. 570 /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6658a9..837469a96598 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
447 447
448 do_posix_clock_monotonic_gettime(&uptime); 448 do_posix_clock_monotonic_gettime(&uptime);
449 read_lock(&tasklist_lock); 449 read_lock(&tasklist_lock);
450 points = badness(task, uptime.tv_sec); 450 points = badness(task->group_leader, uptime.tv_sec);
451 read_unlock(&tasklist_lock); 451 read_unlock(&tasklist_lock);
452 return sprintf(buffer, "%lu\n", points); 452 return sprintf(buffer, "%lu\n", points);
453} 453}
@@ -458,7 +458,7 @@ struct limit_names {
458}; 458};
459 459
460static const struct limit_names lnames[RLIM_NLIMITS] = { 460static const struct limit_names lnames[RLIM_NLIMITS] = {
461 [RLIMIT_CPU] = {"Max cpu time", "ms"}, 461 [RLIMIT_CPU] = {"Max cpu time", "seconds"},
462 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 462 [RLIMIT_FSIZE] = {"Max file size", "bytes"},
463 [RLIMIT_DATA] = {"Max data size", "bytes"}, 463 [RLIMIT_DATA] = {"Max data size", "bytes"},
464 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 464 [RLIMIT_STACK] = {"Max stack size", "bytes"},
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1000 char buffer[PROC_NUMBUF]; 1000 char buffer[PROC_NUMBUF];
1001 size_t len; 1001 size_t len;
1002 int oom_adjust; 1002 int oom_adjust = OOM_DISABLE;
1003 unsigned long flags;
1003 1004
1004 if (!task) 1005 if (!task)
1005 return -ESRCH; 1006 return -ESRCH;
1006 oom_adjust = task->oomkilladj; 1007
1008 if (lock_task_sighand(task, &flags)) {
1009 oom_adjust = task->signal->oom_adj;
1010 unlock_task_sighand(task, &flags);
1011 }
1012
1007 put_task_struct(task); 1013 put_task_struct(task);
1008 1014
1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1015 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1015,32 +1021,44 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1015 size_t count, loff_t *ppos) 1021 size_t count, loff_t *ppos)
1016{ 1022{
1017 struct task_struct *task; 1023 struct task_struct *task;
1018 char buffer[PROC_NUMBUF], *end; 1024 char buffer[PROC_NUMBUF];
1019 int oom_adjust; 1025 long oom_adjust;
1026 unsigned long flags;
1027 int err;
1020 1028
1021 memset(buffer, 0, sizeof(buffer)); 1029 memset(buffer, 0, sizeof(buffer));
1022 if (count > sizeof(buffer) - 1) 1030 if (count > sizeof(buffer) - 1)
1023 count = sizeof(buffer) - 1; 1031 count = sizeof(buffer) - 1;
1024 if (copy_from_user(buffer, buf, count)) 1032 if (copy_from_user(buffer, buf, count))
1025 return -EFAULT; 1033 return -EFAULT;
1026 oom_adjust = simple_strtol(buffer, &end, 0); 1034
1035 err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
1036 if (err)
1037 return -EINVAL;
1027 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1038 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1028 oom_adjust != OOM_DISABLE) 1039 oom_adjust != OOM_DISABLE)
1029 return -EINVAL; 1040 return -EINVAL;
1030 if (*end == '\n') 1041
1031 end++;
1032 task = get_proc_task(file->f_path.dentry->d_inode); 1042 task = get_proc_task(file->f_path.dentry->d_inode);
1033 if (!task) 1043 if (!task)
1034 return -ESRCH; 1044 return -ESRCH;
1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1045 if (!lock_task_sighand(task, &flags)) {
1046 put_task_struct(task);
1047 return -ESRCH;
1048 }
1049
1050 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1051 unlock_task_sighand(task, &flags);
1036 put_task_struct(task); 1052 put_task_struct(task);
1037 return -EACCES; 1053 return -EACCES;
1038 } 1054 }
1039 task->oomkilladj = oom_adjust; 1055
1056 task->signal->oom_adj = oom_adjust;
1057
1058 unlock_task_sighand(task, &flags);
1040 put_task_struct(task); 1059 put_task_struct(task);
1041 if (end - buffer == 0) 1060
1042 return -EIO; 1061 return count;
1043 return end - buffer;
1044} 1062}
1045 1063
1046static const struct file_operations proc_oom_adjust_operations = { 1064static const struct file_operations proc_oom_adjust_operations = {
@@ -1169,17 +1187,16 @@ static ssize_t proc_fault_inject_write(struct file * file,
1169 count = sizeof(buffer) - 1; 1187 count = sizeof(buffer) - 1;
1170 if (copy_from_user(buffer, buf, count)) 1188 if (copy_from_user(buffer, buf, count))
1171 return -EFAULT; 1189 return -EFAULT;
1172 make_it_fail = simple_strtol(buffer, &end, 0); 1190 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1173 if (*end == '\n') 1191 if (*end)
1174 end++; 1192 return -EINVAL;
1175 task = get_proc_task(file->f_dentry->d_inode); 1193 task = get_proc_task(file->f_dentry->d_inode);
1176 if (!task) 1194 if (!task)
1177 return -ESRCH; 1195 return -ESRCH;
1178 task->make_it_fail = make_it_fail; 1196 task->make_it_fail = make_it_fail;
1179 put_task_struct(task); 1197 put_task_struct(task);
1180 if (end - buffer == 0) 1198
1181 return -EIO; 1199 return count;
1182 return end - buffer;
1183} 1200}
1184 1201
1185static const struct file_operations proc_fault_inject_operations = { 1202static const struct file_operations proc_fault_inject_operations = {
@@ -2586,9 +2603,6 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2586 dput(dentry); 2603 dput(dentry);
2587 } 2604 }
2588 2605
2589 if (tgid == 0)
2590 goto out;
2591
2592 name.name = buf; 2606 name.name = buf;
2593 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2607 name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2594 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2608 leader = d_hash_and_lookup(mnt->mnt_root, &name);
@@ -2645,17 +2659,16 @@ out:
2645void proc_flush_task(struct task_struct *task) 2659void proc_flush_task(struct task_struct *task)
2646{ 2660{
2647 int i; 2661 int i;
2648 struct pid *pid, *tgid = NULL; 2662 struct pid *pid, *tgid;
2649 struct upid *upid; 2663 struct upid *upid;
2650 2664
2651 pid = task_pid(task); 2665 pid = task_pid(task);
2652 if (thread_group_leader(task)) 2666 tgid = task_tgid(task);
2653 tgid = task_tgid(task);
2654 2667
2655 for (i = 0; i <= pid->level; i++) { 2668 for (i = 0; i <= pid->level; i++) {
2656 upid = &pid->numbers[i]; 2669 upid = &pid->numbers[i];
2657 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2670 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2658 tgid ? tgid->numbers[i].nr : 0); 2671 tgid->numbers[i].nr);
2659 } 2672 }
2660 2673
2661 upid = &pid->numbers[pid->level]; 2674 upid = &pid->numbers[pid->level];
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 59b43a068872..56013371f9f3 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -17,9 +17,15 @@
17#include <linux/elfcore.h> 17#include <linux/elfcore.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/highmem.h> 19#include <linux/highmem.h>
20#include <linux/bootmem.h>
20#include <linux/init.h> 21#include <linux/init.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/io.h> 23#include <asm/io.h>
24#include <linux/list.h>
25#include <linux/ioport.h>
26#include <linux/mm.h>
27#include <linux/memory.h>
28#include <asm/sections.h>
23 29
24#define CORE_STR "CORE" 30#define CORE_STR "CORE"
25 31
@@ -29,17 +35,6 @@
29 35
30static struct proc_dir_entry *proc_root_kcore; 36static struct proc_dir_entry *proc_root_kcore;
31 37
32static int open_kcore(struct inode * inode, struct file * filp)
33{
34 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
35}
36
37static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *);
38
39static const struct file_operations proc_kcore_operations = {
40 .read = read_kcore,
41 .open = open_kcore,
42};
43 38
44#ifndef kc_vaddr_to_offset 39#ifndef kc_vaddr_to_offset
45#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) 40#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
@@ -57,18 +52,19 @@ struct memelfnote
57 void *data; 52 void *data;
58}; 53};
59 54
60static struct kcore_list *kclist; 55static LIST_HEAD(kclist_head);
61static DEFINE_RWLOCK(kclist_lock); 56static DEFINE_RWLOCK(kclist_lock);
57static int kcore_need_update = 1;
62 58
63void 59void
64kclist_add(struct kcore_list *new, void *addr, size_t size) 60kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
65{ 61{
66 new->addr = (unsigned long)addr; 62 new->addr = (unsigned long)addr;
67 new->size = size; 63 new->size = size;
64 new->type = type;
68 65
69 write_lock(&kclist_lock); 66 write_lock(&kclist_lock);
70 new->next = kclist; 67 list_add_tail(&new->list, &kclist_head);
71 kclist = new;
72 write_unlock(&kclist_lock); 68 write_unlock(&kclist_lock);
73} 69}
74 70
@@ -80,7 +76,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
80 *nphdr = 1; /* PT_NOTE */ 76 *nphdr = 1; /* PT_NOTE */
81 size = 0; 77 size = 0;
82 78
83 for (m=kclist; m; m=m->next) { 79 list_for_each_entry(m, &kclist_head, list) {
84 try = kc_vaddr_to_offset((size_t)m->addr + m->size); 80 try = kc_vaddr_to_offset((size_t)m->addr + m->size);
85 if (try > size) 81 if (try > size)
86 size = try; 82 size = try;
@@ -97,6 +93,177 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
97 return size + *elf_buflen; 93 return size + *elf_buflen;
98} 94}
99 95
96static void free_kclist_ents(struct list_head *head)
97{
98 struct kcore_list *tmp, *pos;
99
100 list_for_each_entry_safe(pos, tmp, head, list) {
101 list_del(&pos->list);
102 kfree(pos);
103 }
104}
105/*
106 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
107 */
108static void __kcore_update_ram(struct list_head *list)
109{
110 int nphdr;
111 size_t size;
112 struct kcore_list *tmp, *pos;
113 LIST_HEAD(garbage);
114
115 write_lock(&kclist_lock);
116 if (kcore_need_update) {
117 list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
118 if (pos->type == KCORE_RAM
119 || pos->type == KCORE_VMEMMAP)
120 list_move(&pos->list, &garbage);
121 }
122 list_splice_tail(list, &kclist_head);
123 } else
124 list_splice(list, &garbage);
125 kcore_need_update = 0;
126 proc_root_kcore->size = get_kcore_size(&nphdr, &size);
127 write_unlock(&kclist_lock);
128
129 free_kclist_ents(&garbage);
130}
131
132
133#ifdef CONFIG_HIGHMEM
134/*
135 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
136 * because memory hole is not as big as !HIGHMEM case.
137 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
138 */
139static int kcore_update_ram(void)
140{
141 LIST_HEAD(head);
142 struct kcore_list *ent;
143 int ret = 0;
144
145 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
146 if (!ent)
147 return -ENOMEM;
148 ent->addr = (unsigned long)__va(0);
149 ent->size = max_low_pfn << PAGE_SHIFT;
150 ent->type = KCORE_RAM;
151 list_add(&ent->list, &head);
152 __kcore_update_ram(&head);
153 return ret;
154}
155
156#else /* !CONFIG_HIGHMEM */
157
158#ifdef CONFIG_SPARSEMEM_VMEMMAP
159/* calculate vmemmap's address from given system ram pfn and register it */
160int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
161{
162 unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
163 unsigned long nr_pages = ent->size >> PAGE_SHIFT;
164 unsigned long start, end;
165 struct kcore_list *vmm, *tmp;
166
167
168 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
169 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
170 end = ALIGN(end, PAGE_SIZE);
171 /* overlap check (because we have to align page */
172 list_for_each_entry(tmp, head, list) {
173 if (tmp->type != KCORE_VMEMMAP)
174 continue;
175 if (start < tmp->addr + tmp->size)
176 if (end > tmp->addr)
177 end = tmp->addr;
178 }
179 if (start < end) {
180 vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
181 if (!vmm)
182 return 0;
183 vmm->addr = start;
184 vmm->size = end - start;
185 vmm->type = KCORE_VMEMMAP;
186 list_add_tail(&vmm->list, head);
187 }
188 return 1;
189
190}
191#else
192int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
193{
194 return 1;
195}
196
197#endif
198
199static int
200kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
201{
202 struct list_head *head = (struct list_head *)arg;
203 struct kcore_list *ent;
204
205 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
206 if (!ent)
207 return -ENOMEM;
208 ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
209 ent->size = nr_pages << PAGE_SHIFT;
210
211 /* Sanity check: Can happen in 32bit arch...maybe */
212 if (ent->addr < (unsigned long) __va(0))
213 goto free_out;
214
215 /* cut not-mapped area. ....from ppc-32 code. */
216 if (ULONG_MAX - ent->addr < ent->size)
217 ent->size = ULONG_MAX - ent->addr;
218
219 /* cut when vmalloc() area is higher than direct-map area */
220 if (VMALLOC_START > (unsigned long)__va(0)) {
221 if (ent->addr > VMALLOC_START)
222 goto free_out;
223 if (VMALLOC_START - ent->addr < ent->size)
224 ent->size = VMALLOC_START - ent->addr;
225 }
226
227 ent->type = KCORE_RAM;
228 list_add_tail(&ent->list, head);
229
230 if (!get_sparsemem_vmemmap_info(ent, head)) {
231 list_del(&ent->list);
232 goto free_out;
233 }
234
235 return 0;
236free_out:
237 kfree(ent);
238 return 1;
239}
240
241static int kcore_update_ram(void)
242{
243 int nid, ret;
244 unsigned long end_pfn;
245 LIST_HEAD(head);
246
247 /* Not inialized....update now */
248 /* find out "max pfn" */
249 end_pfn = 0;
250 for_each_node_state(nid, N_HIGH_MEMORY) {
251 unsigned long node_end;
252 node_end = NODE_DATA(nid)->node_start_pfn +
253 NODE_DATA(nid)->node_spanned_pages;
254 if (end_pfn < node_end)
255 end_pfn = node_end;
256 }
257 /* scan 0 to max_pfn */
258 ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
259 if (ret) {
260 free_kclist_ents(&head);
261 return -ENOMEM;
262 }
263 __kcore_update_ram(&head);
264 return ret;
265}
266#endif /* CONFIG_HIGHMEM */
100 267
101/*****************************************************************************/ 268/*****************************************************************************/
102/* 269/*
@@ -192,7 +359,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
192 nhdr->p_align = 0; 359 nhdr->p_align = 0;
193 360
194 /* setup ELF PT_LOAD program header for every area */ 361 /* setup ELF PT_LOAD program header for every area */
195 for (m=kclist; m; m=m->next) { 362 list_for_each_entry(m, &kclist_head, list) {
196 phdr = (struct elf_phdr *) bufp; 363 phdr = (struct elf_phdr *) bufp;
197 bufp += sizeof(struct elf_phdr); 364 bufp += sizeof(struct elf_phdr);
198 offset += sizeof(struct elf_phdr); 365 offset += sizeof(struct elf_phdr);
@@ -265,7 +432,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
265 unsigned long start; 432 unsigned long start;
266 433
267 read_lock(&kclist_lock); 434 read_lock(&kclist_lock);
268 proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen); 435 size = get_kcore_size(&nphdr, &elf_buflen);
436
269 if (buflen == 0 || *fpos >= size) { 437 if (buflen == 0 || *fpos >= size) {
270 read_unlock(&kclist_lock); 438 read_unlock(&kclist_lock);
271 return 0; 439 return 0;
@@ -317,7 +485,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
317 struct kcore_list *m; 485 struct kcore_list *m;
318 486
319 read_lock(&kclist_lock); 487 read_lock(&kclist_lock);
320 for (m=kclist; m; m=m->next) { 488 list_for_each_entry(m, &kclist_head, list) {
321 if (start >= m->addr && start < (m->addr+m->size)) 489 if (start >= m->addr && start < (m->addr+m->size))
322 break; 490 break;
323 } 491 }
@@ -326,45 +494,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
326 if (m == NULL) { 494 if (m == NULL) {
327 if (clear_user(buffer, tsz)) 495 if (clear_user(buffer, tsz))
328 return -EFAULT; 496 return -EFAULT;
329 } else if (is_vmalloc_addr((void *)start)) { 497 } else if (is_vmalloc_or_module_addr((void *)start)) {
330 char * elf_buf; 498 char * elf_buf;
331 struct vm_struct *m;
332 unsigned long curstart = start;
333 unsigned long cursize = tsz;
334 499
335 elf_buf = kzalloc(tsz, GFP_KERNEL); 500 elf_buf = kzalloc(tsz, GFP_KERNEL);
336 if (!elf_buf) 501 if (!elf_buf)
337 return -ENOMEM; 502 return -ENOMEM;
338 503 vread(elf_buf, (char *)start, tsz);
339 read_lock(&vmlist_lock); 504 /* we have to zero-fill user buffer even if no read */
340 for (m=vmlist; m && cursize; m=m->next) {
341 unsigned long vmstart;
342 unsigned long vmsize;
343 unsigned long msize = m->size - PAGE_SIZE;
344
345 if (((unsigned long)m->addr + msize) <
346 curstart)
347 continue;
348 if ((unsigned long)m->addr > (curstart +
349 cursize))
350 break;
351 vmstart = (curstart < (unsigned long)m->addr ?
352 (unsigned long)m->addr : curstart);
353 if (((unsigned long)m->addr + msize) >
354 (curstart + cursize))
355 vmsize = curstart + cursize - vmstart;
356 else
357 vmsize = (unsigned long)m->addr +
358 msize - vmstart;
359 curstart = vmstart + vmsize;
360 cursize -= vmsize;
361 /* don't dump ioremap'd stuff! (TA) */
362 if (m->flags & VM_IOREMAP)
363 continue;
364 memcpy(elf_buf + (vmstart - start),
365 (char *)vmstart, vmsize);
366 }
367 read_unlock(&vmlist_lock);
368 if (copy_to_user(buffer, elf_buf, tsz)) { 505 if (copy_to_user(buffer, elf_buf, tsz)) {
369 kfree(elf_buf); 506 kfree(elf_buf);
370 return -EFAULT; 507 return -EFAULT;
@@ -402,12 +539,96 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
402 return acc; 539 return acc;
403} 540}
404 541
542
543static int open_kcore(struct inode *inode, struct file *filp)
544{
545 if (!capable(CAP_SYS_RAWIO))
546 return -EPERM;
547 if (kcore_need_update)
548 kcore_update_ram();
549 if (i_size_read(inode) != proc_root_kcore->size) {
550 mutex_lock(&inode->i_mutex);
551 i_size_write(inode, proc_root_kcore->size);
552 mutex_unlock(&inode->i_mutex);
553 }
554 return 0;
555}
556
557
558static const struct file_operations proc_kcore_operations = {
559 .read = read_kcore,
560 .open = open_kcore,
561};
562
563#ifdef CONFIG_MEMORY_HOTPLUG
564/* just remember that we have to update kcore */
565static int __meminit kcore_callback(struct notifier_block *self,
566 unsigned long action, void *arg)
567{
568 switch (action) {
569 case MEM_ONLINE:
570 case MEM_OFFLINE:
571 write_lock(&kclist_lock);
572 kcore_need_update = 1;
573 write_unlock(&kclist_lock);
574 }
575 return NOTIFY_OK;
576}
577#endif
578
579
580static struct kcore_list kcore_vmalloc;
581
582#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
583static struct kcore_list kcore_text;
584/*
585 * If defined, special segment is used for mapping kernel text instead of
586 * direct-map area. We need to create special TEXT section.
587 */
588static void __init proc_kcore_text_init(void)
589{
590 kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT);
591}
592#else
593static void __init proc_kcore_text_init(void)
594{
595}
596#endif
597
598#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
599/*
600 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
601 */
602struct kcore_list kcore_modules;
603static void __init add_modules_range(void)
604{
605 kclist_add(&kcore_modules, (void *)MODULES_VADDR,
606 MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
607}
608#else
609static void __init add_modules_range(void)
610{
611}
612#endif
613
405static int __init proc_kcore_init(void) 614static int __init proc_kcore_init(void)
406{ 615{
407 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); 616 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
408 if (proc_root_kcore) 617 &proc_kcore_operations);
409 proc_root_kcore->size = 618 if (!proc_root_kcore) {
410 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 619 printk(KERN_ERR "couldn't create /proc/kcore\n");
620 return 0; /* Always returns 0. */
621 }
622 /* Store text area if it's special */
623 proc_kcore_text_init();
624 /* Store vmalloc area */
625 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
626 VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
627 add_modules_range();
628 /* Store direct-map area from physical memory map */
629 kcore_update_ram();
630 hotplug_memory_notifier(kcore_callback, 0);
631
411 return 0; 632 return 0;
412} 633}
413module_init(proc_kcore_init); 634module_init(proc_kcore_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d47fae..171e052c07b3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -81,9 +81,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
81 "Writeback: %8lu kB\n" 81 "Writeback: %8lu kB\n"
82 "AnonPages: %8lu kB\n" 82 "AnonPages: %8lu kB\n"
83 "Mapped: %8lu kB\n" 83 "Mapped: %8lu kB\n"
84 "Shmem: %8lu kB\n"
84 "Slab: %8lu kB\n" 85 "Slab: %8lu kB\n"
85 "SReclaimable: %8lu kB\n" 86 "SReclaimable: %8lu kB\n"
86 "SUnreclaim: %8lu kB\n" 87 "SUnreclaim: %8lu kB\n"
88 "KernelStack: %8lu kB\n"
87 "PageTables: %8lu kB\n" 89 "PageTables: %8lu kB\n"
88#ifdef CONFIG_QUICKLIST 90#ifdef CONFIG_QUICKLIST
89 "Quicklists: %8lu kB\n" 91 "Quicklists: %8lu kB\n"
@@ -124,10 +126,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
124 K(global_page_state(NR_WRITEBACK)), 126 K(global_page_state(NR_WRITEBACK)),
125 K(global_page_state(NR_ANON_PAGES)), 127 K(global_page_state(NR_ANON_PAGES)),
126 K(global_page_state(NR_FILE_MAPPED)), 128 K(global_page_state(NR_FILE_MAPPED)),
129 K(global_page_state(NR_SHMEM)),
127 K(global_page_state(NR_SLAB_RECLAIMABLE) + 130 K(global_page_state(NR_SLAB_RECLAIMABLE) +
128 global_page_state(NR_SLAB_UNRECLAIMABLE)), 131 global_page_state(NR_SLAB_UNRECLAIMABLE)),
129 K(global_page_state(NR_SLAB_RECLAIMABLE)), 132 K(global_page_state(NR_SLAB_RECLAIMABLE)),
130 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 133 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
134 global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
131 K(global_page_state(NR_PAGETABLE)), 135 K(global_page_state(NR_PAGETABLE)),
132#ifdef CONFIG_QUICKLIST 136#ifdef CONFIG_QUICKLIST
133 K(quicklist_total_size()), 137 K(quicklist_total_size()),
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 7e14d1a04001..9fe7d7ebe115 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
109 return rb_next((struct rb_node *) v); 109 return rb_next((struct rb_node *) v);
110} 110}
111 111
112static struct seq_operations proc_nommu_region_list_seqop = { 112static const struct seq_operations proc_nommu_region_list_seqop = {
113 .start = nommu_region_list_start, 113 .start = nommu_region_list_start,
114 .next = nommu_region_list_next, 114 .next = nommu_region_list_next,
115 .stop = nommu_region_list_stop, 115 .stop = nommu_region_list_stop,
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2707c6c7a20f..2281c2cbfe2b 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -2,6 +2,7 @@
2#include <linux/compiler.h> 2#include <linux/compiler.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/ksm.h>
5#include <linux/mm.h> 6#include <linux/mm.h>
6#include <linux/mmzone.h> 7#include <linux/mmzone.h>
7#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
@@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = {
95#define KPF_UNEVICTABLE 18 96#define KPF_UNEVICTABLE 18
96#define KPF_NOPAGE 20 97#define KPF_NOPAGE 20
97 98
99#define KPF_KSM 21
100
98/* kernel hacking assistances 101/* kernel hacking assistances
99 * WARNING: subject to change, never rely on them! 102 * WARNING: subject to change, never rely on them!
100 */ 103 */
@@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page)
137 u |= 1 << KPF_MMAP; 140 u |= 1 << KPF_MMAP;
138 if (PageAnon(page)) 141 if (PageAnon(page))
139 u |= 1 << KPF_ANON; 142 u |= 1 << KPF_ANON;
143 if (PageKsm(page))
144 u |= 1 << KPF_KSM;
140 145
141 /* 146 /*
142 * compound pages: export both head/tail info 147 * compound pages: export both head/tail info
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9bd8be1d235c..2a1bef9203c6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
243 } else if (vma->vm_start <= mm->start_stack && 243 } else if (vma->vm_start <= mm->start_stack &&
244 vma->vm_end >= mm->start_stack) { 244 vma->vm_end >= mm->start_stack) {
245 name = "[stack]"; 245 name = "[stack]";
246 } else {
247 unsigned long stack_start;
248 struct proc_maps_private *pmp;
249
250 pmp = m->private;
251 stack_start = pmp->task->stack_start;
252
253 if (vma->vm_start <= stack_start &&
254 vma->vm_end >= stack_start) {
255 pad_len_spaces(m, len);
256 seq_printf(m,
257 "[threadstack:%08lx]",
258#ifdef CONFIG_STACK_GROWSUP
259 vma->vm_end - stack_start
260#else
261 stack_start - vma->vm_start
262#endif
263 );
264 }
246 } 265 }
247 } else { 266 } else {
248 name = "[vdso]"; 267 name = "[vdso]";
@@ -465,23 +484,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
465 return 0; 484 return 0;
466} 485}
467 486
487#define CLEAR_REFS_ALL 1
488#define CLEAR_REFS_ANON 2
489#define CLEAR_REFS_MAPPED 3
490
468static ssize_t clear_refs_write(struct file *file, const char __user *buf, 491static ssize_t clear_refs_write(struct file *file, const char __user *buf,
469 size_t count, loff_t *ppos) 492 size_t count, loff_t *ppos)
470{ 493{
471 struct task_struct *task; 494 struct task_struct *task;
472 char buffer[PROC_NUMBUF], *end; 495 char buffer[PROC_NUMBUF];
473 struct mm_struct *mm; 496 struct mm_struct *mm;
474 struct vm_area_struct *vma; 497 struct vm_area_struct *vma;
498 long type;
475 499
476 memset(buffer, 0, sizeof(buffer)); 500 memset(buffer, 0, sizeof(buffer));
477 if (count > sizeof(buffer) - 1) 501 if (count > sizeof(buffer) - 1)
478 count = sizeof(buffer) - 1; 502 count = sizeof(buffer) - 1;
479 if (copy_from_user(buffer, buf, count)) 503 if (copy_from_user(buffer, buf, count))
480 return -EFAULT; 504 return -EFAULT;
481 if (!simple_strtol(buffer, &end, 0)) 505 if (strict_strtol(strstrip(buffer), 10, &type))
506 return -EINVAL;
507 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
482 return -EINVAL; 508 return -EINVAL;
483 if (*end == '\n')
484 end++;
485 task = get_proc_task(file->f_path.dentry->d_inode); 509 task = get_proc_task(file->f_path.dentry->d_inode);
486 if (!task) 510 if (!task)
487 return -ESRCH; 511 return -ESRCH;
@@ -494,18 +518,31 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
494 down_read(&mm->mmap_sem); 518 down_read(&mm->mmap_sem);
495 for (vma = mm->mmap; vma; vma = vma->vm_next) { 519 for (vma = mm->mmap; vma; vma = vma->vm_next) {
496 clear_refs_walk.private = vma; 520 clear_refs_walk.private = vma;
497 if (!is_vm_hugetlb_page(vma)) 521 if (is_vm_hugetlb_page(vma))
498 walk_page_range(vma->vm_start, vma->vm_end, 522 continue;
499 &clear_refs_walk); 523 /*
524 * Writing 1 to /proc/pid/clear_refs affects all pages.
525 *
526 * Writing 2 to /proc/pid/clear_refs only affects
527 * Anonymous pages.
528 *
529 * Writing 3 to /proc/pid/clear_refs only affects file
530 * mapped pages.
531 */
532 if (type == CLEAR_REFS_ANON && vma->vm_file)
533 continue;
534 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
535 continue;
536 walk_page_range(vma->vm_start, vma->vm_end,
537 &clear_refs_walk);
500 } 538 }
501 flush_tlb_mm(mm); 539 flush_tlb_mm(mm);
502 up_read(&mm->mmap_sem); 540 up_read(&mm->mmap_sem);
503 mmput(mm); 541 mmput(mm);
504 } 542 }
505 put_task_struct(task); 543 put_task_struct(task);
506 if (end - buffer == 0) 544
507 return -EIO; 545 return count;
508 return end - buffer;
509} 546}
510 547
511const struct file_operations proc_clear_refs_operations = { 548const struct file_operations proc_clear_refs_operations = {
diff --git a/fs/qnx4/Kconfig b/fs/qnx4/Kconfig
index be8e0e1445b6..5f6089994042 100644
--- a/fs/qnx4/Kconfig
+++ b/fs/qnx4/Kconfig
@@ -6,20 +6,9 @@ config QNX4FS_FS
6 QNX 4 and QNX 6 (the latter is also called QNX RTP). 6 QNX 4 and QNX 6 (the latter is also called QNX RTP).
7 Further information is available at <http://www.qnx.com/>. 7 Further information is available at <http://www.qnx.com/>.
8 Say Y if you intend to mount QNX hard disks or floppies. 8 Say Y if you intend to mount QNX hard disks or floppies.
9 Unless you say Y to "QNX4FS read-write support" below, you will
10 only be able to read these file systems.
11 9
12 To compile this file system support as a module, choose M here: the 10 To compile this file system support as a module, choose M here: the
13 module will be called qnx4. 11 module will be called qnx4.
14 12
15 If you don't know whether you need it, then you don't need it: 13 If you don't know whether you need it, then you don't need it:
16 answer N. 14 answer N.
17
18config QNX4FS_RW
19 bool "QNX4FS write support (DANGEROUS)"
20 depends on QNX4FS_FS && EXPERIMENTAL && BROKEN
21 help
22 Say Y if you want to test write support for QNX4 file systems.
23
24 It's currently broken, so for now:
25 answer N.
diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile
index e4d408cc5473..4a283b3f87f8 100644
--- a/fs/qnx4/Makefile
+++ b/fs/qnx4/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_QNX4FS_FS) += qnx4.o 5obj-$(CONFIG_QNX4FS_FS) += qnx4.o
6 6
7qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o 7qnx4-objs := inode.o dir.o namei.o bitmap.o
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index e1cd061a25f7..0afba069d567 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -78,84 +78,3 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb)
78 78
79 return total_free; 79 return total_free;
80} 80}
81
82#ifdef CONFIG_QNX4FS_RW
83
84int qnx4_is_free(struct super_block *sb, long block)
85{
86 int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1;
87 int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size);
88 struct buffer_head *bh;
89 const char *g;
90 int ret = -EIO;
91
92 start += block / (QNX4_BLOCK_SIZE * 8);
93 QNX4DEBUG(("qnx4: is_free requesting block [%lu], bitmap in block [%lu]\n",
94 (unsigned long) block, (unsigned long) start));
95 (void) size; /* CHECKME */
96 bh = sb_bread(sb, start);
97 if (bh == NULL) {
98 return -EIO;
99 }
100 g = bh->b_data + (block % QNX4_BLOCK_SIZE);
101 if (((*g) & (1 << (block % 8))) == 0) {
102 QNX4DEBUG(("qnx4: is_free -> block is free\n"));
103 ret = 1;
104 } else {
105 QNX4DEBUG(("qnx4: is_free -> block is busy\n"));
106 ret = 0;
107 }
108 brelse(bh);
109
110 return ret;
111}
112
113int qnx4_set_bitmap(struct super_block *sb, long block, int busy)
114{
115 int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1;
116 int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size);
117 struct buffer_head *bh;
118 char *g;
119
120 start += block / (QNX4_BLOCK_SIZE * 8);
121 QNX4DEBUG(("qnx4: set_bitmap requesting block [%lu], bitmap in block [%lu]\n",
122 (unsigned long) block, (unsigned long) start));
123 (void) size; /* CHECKME */
124 bh = sb_bread(sb, start);
125 if (bh == NULL) {
126 return -EIO;
127 }
128 g = bh->b_data + (block % QNX4_BLOCK_SIZE);
129 if (busy == 0) {
130 (*g) &= ~(1 << (block % 8));
131 } else {
132 (*g) |= (1 << (block % 8));
133 }
134 mark_buffer_dirty(bh);
135 brelse(bh);
136
137 return 0;
138}
139
140static void qnx4_clear_inode(struct inode *inode)
141{
142 struct qnx4_inode_entry *qnx4_ino = qnx4_raw_inode(inode);
143 /* What for? */
144 memset(qnx4_ino->di_fname, 0, sizeof qnx4_ino->di_fname);
145 qnx4_ino->di_size = 0;
146 qnx4_ino->di_num_xtnts = 0;
147 qnx4_ino->di_mode = 0;
148 qnx4_ino->di_status = 0;
149}
150
151void qnx4_free_inode(struct inode *inode)
152{
153 if (inode->i_ino < 1) {
154 printk("free_inode: inode 0 or nonexistent inode\n");
155 return;
156 }
157 qnx4_clear_inode(inode);
158 clear_inode(inode);
159}
160
161#endif
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 003c68f3238b..86cc39cb1398 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -85,9 +85,4 @@ const struct file_operations qnx4_dir_operations =
85const struct inode_operations qnx4_dir_inode_operations = 85const struct inode_operations qnx4_dir_inode_operations =
86{ 86{
87 .lookup = qnx4_lookup, 87 .lookup = qnx4_lookup,
88#ifdef CONFIG_QNX4FS_RW
89 .create = qnx4_create,
90 .unlink = qnx4_unlink,
91 .rmdir = qnx4_rmdir,
92#endif
93}; 88};
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
deleted file mode 100644
index 09b170ac936c..000000000000
--- a/fs/qnx4/file.c
+++ /dev/null
@@ -1,40 +0,0 @@
1/*
2 * QNX4 file system, Linux implementation.
3 *
4 * Version : 0.2.1
5 *
6 * Using parts of the xiafs filesystem.
7 *
8 * History :
9 *
10 * 25-05-1998 by Richard Frowijn : first release.
11 * 21-06-1998 by Frank Denis : wrote qnx4_readpage to use generic_file_read.
12 * 27-06-1998 by Frank Denis : file overwriting.
13 */
14
15#include "qnx4.h"
16
17/*
18 * We have mostly NULL's here: the current defaults are ok for
19 * the qnx4 filesystem.
20 */
21const struct file_operations qnx4_file_operations =
22{
23 .llseek = generic_file_llseek,
24 .read = do_sync_read,
25 .aio_read = generic_file_aio_read,
26 .mmap = generic_file_mmap,
27 .splice_read = generic_file_splice_read,
28#ifdef CONFIG_QNX4FS_RW
29 .write = do_sync_write,
30 .aio_write = generic_file_aio_write,
31 .fsync = simple_fsync,
32#endif
33};
34
35const struct inode_operations qnx4_file_inode_operations =
36{
37#ifdef CONFIG_QNX4FS_RW
38 .truncate = qnx4_truncate,
39#endif
40};
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 681df5fcd161..d2cd1798d8c4 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -28,73 +28,6 @@
28 28
29static const struct super_operations qnx4_sops; 29static const struct super_operations qnx4_sops;
30 30
31#ifdef CONFIG_QNX4FS_RW
32
33static void qnx4_delete_inode(struct inode *inode)
34{
35 QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
36 truncate_inode_pages(&inode->i_data, 0);
37 inode->i_size = 0;
38 qnx4_truncate(inode);
39 lock_kernel();
40 qnx4_free_inode(inode);
41 unlock_kernel();
42}
43
44static int qnx4_write_inode(struct inode *inode, int do_sync)
45{
46 struct qnx4_inode_entry *raw_inode;
47 int block, ino;
48 struct buffer_head *bh;
49 ino = inode->i_ino;
50
51 QNX4DEBUG(("qnx4: write inode 1.\n"));
52 if (inode->i_nlink == 0) {
53 return 0;
54 }
55 if (!ino) {
56 printk("qnx4: bad inode number on dev %s: %d is out of range\n",
57 inode->i_sb->s_id, ino);
58 return -EIO;
59 }
60 QNX4DEBUG(("qnx4: write inode 2.\n"));
61 block = ino / QNX4_INODES_PER_BLOCK;
62 lock_kernel();
63 if (!(bh = sb_bread(inode->i_sb, block))) {
64 printk("qnx4: major problem: unable to read inode from dev "
65 "%s\n", inode->i_sb->s_id);
66 unlock_kernel();
67 return -EIO;
68 }
69 raw_inode = ((struct qnx4_inode_entry *) bh->b_data) +
70 (ino % QNX4_INODES_PER_BLOCK);
71 raw_inode->di_mode = cpu_to_le16(inode->i_mode);
72 raw_inode->di_uid = cpu_to_le16(fs_high2lowuid(inode->i_uid));
73 raw_inode->di_gid = cpu_to_le16(fs_high2lowgid(inode->i_gid));
74 raw_inode->di_nlink = cpu_to_le16(inode->i_nlink);
75 raw_inode->di_size = cpu_to_le32(inode->i_size);
76 raw_inode->di_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
77 raw_inode->di_atime = cpu_to_le32(inode->i_atime.tv_sec);
78 raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
79 raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks);
80 mark_buffer_dirty(bh);
81 if (do_sync) {
82 sync_dirty_buffer(bh);
83 if (buffer_req(bh) && !buffer_uptodate(bh)) {
84 printk("qnx4: IO error syncing inode [%s:%08x]\n",
85 inode->i_sb->s_id, ino);
86 brelse(bh);
87 unlock_kernel();
88 return -EIO;
89 }
90 }
91 brelse(bh);
92 unlock_kernel();
93 return 0;
94}
95
96#endif
97
98static void qnx4_put_super(struct super_block *sb); 31static void qnx4_put_super(struct super_block *sb);
99static struct inode *qnx4_alloc_inode(struct super_block *sb); 32static struct inode *qnx4_alloc_inode(struct super_block *sb);
100static void qnx4_destroy_inode(struct inode *inode); 33static void qnx4_destroy_inode(struct inode *inode);
@@ -108,10 +41,6 @@ static const struct super_operations qnx4_sops =
108 .put_super = qnx4_put_super, 41 .put_super = qnx4_put_super,
109 .statfs = qnx4_statfs, 42 .statfs = qnx4_statfs,
110 .remount_fs = qnx4_remount, 43 .remount_fs = qnx4_remount,
111#ifdef CONFIG_QNX4FS_RW
112 .write_inode = qnx4_write_inode,
113 .delete_inode = qnx4_delete_inode,
114#endif
115}; 44};
116 45
117static int qnx4_remount(struct super_block *sb, int *flags, char *data) 46static int qnx4_remount(struct super_block *sb, int *flags, char *data)
@@ -120,15 +49,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
120 49
121 qs = qnx4_sb(sb); 50 qs = qnx4_sb(sb);
122 qs->Version = QNX4_VERSION; 51 qs->Version = QNX4_VERSION;
123#ifndef CONFIG_QNX4FS_RW
124 *flags |= MS_RDONLY; 52 *flags |= MS_RDONLY;
125#endif
126 if (*flags & MS_RDONLY) {
127 return 0;
128 }
129
130 mark_buffer_dirty(qs->sb_buf);
131
132 return 0; 53 return 0;
133} 54}
134 55
@@ -354,9 +275,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
354 } 275 }
355 s->s_op = &qnx4_sops; 276 s->s_op = &qnx4_sops;
356 s->s_magic = QNX4_SUPER_MAGIC; 277 s->s_magic = QNX4_SUPER_MAGIC;
357#ifndef CONFIG_QNX4FS_RW
358 s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ 278 s->s_flags |= MS_RDONLY; /* Yup, read-only yet */
359#endif
360 qnx4_sb(s)->sb_buf = bh; 279 qnx4_sb(s)->sb_buf = bh;
361 qnx4_sb(s)->sb = (struct qnx4_super_block *) bh->b_data; 280 qnx4_sb(s)->sb = (struct qnx4_super_block *) bh->b_data;
362 281
@@ -489,8 +408,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
489 408
490 memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); 409 memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);
491 if (S_ISREG(inode->i_mode)) { 410 if (S_ISREG(inode->i_mode)) {
492 inode->i_op = &qnx4_file_inode_operations; 411 inode->i_fop = &generic_ro_fops;
493 inode->i_fop = &qnx4_file_operations;
494 inode->i_mapping->a_ops = &qnx4_aops; 412 inode->i_mapping->a_ops = &qnx4_aops;
495 qnx4_i(inode)->mmu_private = inode->i_size; 413 qnx4_i(inode)->mmu_private = inode->i_size;
496 } else if (S_ISDIR(inode->i_mode)) { 414 } else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 5972ed214937..ae1e7edbacd6 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -134,108 +134,3 @@ out:
134 134
135 return NULL; 135 return NULL;
136} 136}
137
138#ifdef CONFIG_QNX4FS_RW
139int qnx4_create(struct inode *dir, struct dentry *dentry, int mode,
140 struct nameidata *nd)
141{
142 QNX4DEBUG(("qnx4: qnx4_create\n"));
143 if (dir == NULL) {
144 return -ENOENT;
145 }
146 return -ENOSPC;
147}
148
149int qnx4_rmdir(struct inode *dir, struct dentry *dentry)
150{
151 struct buffer_head *bh;
152 struct qnx4_inode_entry *de;
153 struct inode *inode;
154 int retval;
155 int ino;
156
157 QNX4DEBUG(("qnx4: qnx4_rmdir [%s]\n", dentry->d_name.name));
158 lock_kernel();
159 bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name,
160 &de, &ino);
161 if (bh == NULL) {
162 unlock_kernel();
163 return -ENOENT;
164 }
165 inode = dentry->d_inode;
166 if (inode->i_ino != ino) {
167 retval = -EIO;
168 goto end_rmdir;
169 }
170#if 0
171 if (!empty_dir(inode)) {
172 retval = -ENOTEMPTY;
173 goto end_rmdir;
174 }
175#endif
176 if (inode->i_nlink != 2) {
177 QNX4DEBUG(("empty directory has nlink!=2 (%d)\n", inode->i_nlink));
178 }
179 QNX4DEBUG(("qnx4: deleting directory\n"));
180 de->di_status = 0;
181 memset(de->di_fname, 0, sizeof de->di_fname);
182 de->di_mode = 0;
183 mark_buffer_dirty_inode(bh, dir);
184 clear_nlink(inode);
185 mark_inode_dirty(inode);
186 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
187 inode_dec_link_count(dir);
188 retval = 0;
189
190 end_rmdir:
191 brelse(bh);
192
193 unlock_kernel();
194 return retval;
195}
196
197int qnx4_unlink(struct inode *dir, struct dentry *dentry)
198{
199 struct buffer_head *bh;
200 struct qnx4_inode_entry *de;
201 struct inode *inode;
202 int retval;
203 int ino;
204
205 QNX4DEBUG(("qnx4: qnx4_unlink [%s]\n", dentry->d_name.name));
206 lock_kernel();
207 bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name,
208 &de, &ino);
209 if (bh == NULL) {
210 unlock_kernel();
211 return -ENOENT;
212 }
213 inode = dentry->d_inode;
214 if (inode->i_ino != ino) {
215 retval = -EIO;
216 goto end_unlink;
217 }
218 retval = -EPERM;
219 if (!inode->i_nlink) {
220 QNX4DEBUG(("Deleting nonexistent file (%s:%lu), %d\n",
221 inode->i_sb->s_id,
222 inode->i_ino, inode->i_nlink));
223 inode->i_nlink = 1;
224 }
225 de->di_status = 0;
226 memset(de->di_fname, 0, sizeof de->di_fname);
227 de->di_mode = 0;
228 mark_buffer_dirty_inode(bh, dir);
229 dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
230 mark_inode_dirty(dir);
231 inode->i_ctime = dir->i_ctime;
232 inode_dec_link_count(inode);
233 retval = 0;
234
235end_unlink:
236 unlock_kernel();
237 brelse(bh);
238
239 return retval;
240}
241#endif
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 9efc089454f6..33a60858203b 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -29,17 +29,9 @@ extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
29 29
30extern struct buffer_head *qnx4_bread(struct inode *, int, int); 30extern struct buffer_head *qnx4_bread(struct inode *, int, int);
31 31
32extern const struct inode_operations qnx4_file_inode_operations;
33extern const struct inode_operations qnx4_dir_inode_operations; 32extern const struct inode_operations qnx4_dir_inode_operations;
34extern const struct file_operations qnx4_file_operations;
35extern const struct file_operations qnx4_dir_operations; 33extern const struct file_operations qnx4_dir_operations;
36extern int qnx4_is_free(struct super_block *sb, long block); 34extern int qnx4_is_free(struct super_block *sb, long block);
37extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
38extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
39extern void qnx4_truncate(struct inode *inode);
40extern void qnx4_free_inode(struct inode *inode);
41extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
42extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
43 35
44static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) 36static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
45{ 37{
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c
deleted file mode 100644
index d94d9ee241fe..000000000000
--- a/fs/qnx4/truncate.c
+++ /dev/null
@@ -1,34 +0,0 @@
1/*
2 * QNX4 file system, Linux implementation.
3 *
4 * Version : 0.1
5 *
6 * Using parts of the xiafs filesystem.
7 *
8 * History :
9 *
10 * 30-06-1998 by Frank DENIS : ugly filler.
11 */
12
13#include <linux/smp_lock.h>
14#include "qnx4.h"
15
16#ifdef CONFIG_QNX4FS_RW
17
18void qnx4_truncate(struct inode *inode)
19{
20 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
21 S_ISLNK(inode->i_mode))) {
22 return;
23 }
24 lock_kernel();
25 if (!(S_ISDIR(inode->i_mode))) {
26 /* TODO */
27 }
28 QNX4DEBUG(("qnx4: qnx4_truncate called\n"));
29 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
30 mark_inode_dirty(inode);
31 unlock_kernel();
32}
33
34#endif
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 38f7bd559f35..39b49c42a7ed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1839,7 +1839,7 @@ EXPORT_SYMBOL(dquot_commit_info);
1839/* 1839/*
1840 * Definitions of diskquota operations. 1840 * Definitions of diskquota operations.
1841 */ 1841 */
1842struct dquot_operations dquot_operations = { 1842const struct dquot_operations dquot_operations = {
1843 .initialize = dquot_initialize, 1843 .initialize = dquot_initialize,
1844 .drop = dquot_drop, 1844 .drop = dquot_drop,
1845 .alloc_space = dquot_alloc_space, 1845 .alloc_space = dquot_alloc_space,
@@ -2461,7 +2461,7 @@ out:
2461} 2461}
2462EXPORT_SYMBOL(vfs_set_dqinfo); 2462EXPORT_SYMBOL(vfs_set_dqinfo);
2463 2463
2464struct quotactl_ops vfs_quotactl_ops = { 2464const struct quotactl_ops vfs_quotactl_ops = {
2465 .quota_on = vfs_quota_on, 2465 .quota_on = vfs_quota_on,
2466 .quota_off = vfs_quota_off, 2466 .quota_off = vfs_quota_off,
2467 .quota_sync = vfs_quota_sync, 2467 .quota_sync = vfs_quota_sync,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a7f0110fca4c..a6090aa1a7c1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -34,12 +34,10 @@
34#include <linux/ramfs.h> 34#include <linux/ramfs.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/parser.h> 36#include <linux/parser.h>
37#include <linux/magic.h>
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
38#include "internal.h" 39#include "internal.h"
39 40
40/* some random number */
41#define RAMFS_MAGIC 0x858458f6
42
43#define RAMFS_DEFAULT_MODE 0755 41#define RAMFS_DEFAULT_MODE 0755
44 42
45static const struct super_operations ramfs_ops; 43static const struct super_operations ramfs_ops;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74d6a8a..f0ad05f38022 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -612,7 +612,7 @@ static int reiserfs_mark_dquot_dirty(struct dquot *);
612static int reiserfs_write_info(struct super_block *, int); 612static int reiserfs_write_info(struct super_block *, int);
613static int reiserfs_quota_on(struct super_block *, int, int, char *, int); 613static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
614 614
615static struct dquot_operations reiserfs_quota_operations = { 615static const struct dquot_operations reiserfs_quota_operations = {
616 .initialize = dquot_initialize, 616 .initialize = dquot_initialize,
617 .drop = dquot_drop, 617 .drop = dquot_drop,
618 .alloc_space = dquot_alloc_space, 618 .alloc_space = dquot_alloc_space,
@@ -629,7 +629,7 @@ static struct dquot_operations reiserfs_quota_operations = {
629 .destroy_dquot = dquot_destroy, 629 .destroy_dquot = dquot_destroy,
630}; 630};
631 631
632static struct quotactl_ops reiserfs_qctl_operations = { 632static const struct quotactl_ops reiserfs_qctl_operations = {
633 .quota_on = reiserfs_quota_on, 633 .quota_on = reiserfs_quota_on,
634 .quota_off = vfs_quota_off, 634 .quota_off = vfs_quota_off,
635 .quota_sync = vfs_quota_sync, 635 .quota_sync = vfs_quota_sync,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 4ab3c03d8f95..47f132df0c3f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -284,7 +284,7 @@ static const struct file_operations romfs_dir_operations = {
284 .readdir = romfs_readdir, 284 .readdir = romfs_readdir,
285}; 285};
286 286
287static struct inode_operations romfs_dir_inode_operations = { 287static const struct inode_operations romfs_dir_inode_operations = {
288 .lookup = romfs_lookup, 288 .lookup = romfs_lookup,
289}; 289};
290 290
diff --git a/fs/select.c b/fs/select.c
index 8084834e123e..a201fc370223 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -41,22 +41,28 @@
41 * better solutions.. 41 * better solutions..
42 */ 42 */
43 43
44#define MAX_SLACK (100 * NSEC_PER_MSEC)
45
44static long __estimate_accuracy(struct timespec *tv) 46static long __estimate_accuracy(struct timespec *tv)
45{ 47{
46 long slack; 48 long slack;
47 int divfactor = 1000; 49 int divfactor = 1000;
48 50
51 if (tv->tv_sec < 0)
52 return 0;
53
49 if (task_nice(current) > 0) 54 if (task_nice(current) > 0)
50 divfactor = divfactor / 5; 55 divfactor = divfactor / 5;
51 56
57 if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
58 return MAX_SLACK;
59
52 slack = tv->tv_nsec / divfactor; 60 slack = tv->tv_nsec / divfactor;
53 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); 61 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
54 62
55 if (slack > 100 * NSEC_PER_MSEC) 63 if (slack > MAX_SLACK)
56 slack = 100 * NSEC_PER_MSEC; 64 return MAX_SLACK;
57 65
58 if (slack < 0)
59 slack = 0;
60 return slack; 66 return slack;
61} 67}
62 68
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 9468168b9af5..71c29b6670b4 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -509,7 +509,7 @@ date_unix2dos(struct smb_sb_info *server,
509 month = 2; 509 month = 2;
510 } else { 510 } else {
511 nl_day = (year & 3) || day <= 59 ? day : day - 1; 511 nl_day = (year & 3) || day <= 59 ? day : day - 1;
512 for (month = 0; month < 12; month++) 512 for (month = 1; month < 12; month++)
513 if (day_n[month] > nl_day) 513 if (day_n[month] > nl_day)
514 break; 514 break;
515 } 515 }
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index cb5fc57e370b..6c197ef53add 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -44,7 +44,7 @@
44#include "squashfs.h" 44#include "squashfs.h"
45 45
46static struct file_system_type squashfs_fs_type; 46static struct file_system_type squashfs_fs_type;
47static struct super_operations squashfs_super_ops; 47static const struct super_operations squashfs_super_ops;
48 48
49static int supported_squashfs_filesystem(short major, short minor, short comp) 49static int supported_squashfs_filesystem(short major, short minor, short comp)
50{ 50{
@@ -444,7 +444,7 @@ static struct file_system_type squashfs_fs_type = {
444 .fs_flags = FS_REQUIRES_DEV 444 .fs_flags = FS_REQUIRES_DEV
445}; 445};
446 446
447static struct super_operations squashfs_super_ops = { 447static const struct super_operations squashfs_super_ops = {
448 .alloc_inode = squashfs_alloc_inode, 448 .alloc_inode = squashfs_alloc_inode,
449 .destroy_inode = squashfs_destroy_inode, 449 .destroy_inode = squashfs_destroy_inode,
450 .statfs = squashfs_statfs, 450 .statfs = squashfs_statfs,
diff --git a/fs/super.c b/fs/super.c
index b03fea8fbfb6..0e7207b9815c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,7 @@ DEFINE_SPINLOCK(sb_lock);
54static struct super_block *alloc_super(struct file_system_type *type) 54static struct super_block *alloc_super(struct file_system_type *type)
55{ 55{
56 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 56 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
57 static struct super_operations default_op; 57 static const struct super_operations default_op;
58 58
59 if (s) { 59 if (s) {
60 if (security_sb_alloc(s)) { 60 if (security_sb_alloc(s)) {
diff --git a/fs/sync.c b/fs/sync.c
index c08467a5d7cb..d104591b066b 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -183,6 +183,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
183 ret = err; 183 ret = err;
184 return ret; 184 return ret;
185} 185}
186EXPORT_SYMBOL(file_fsync);
186 187
187/** 188/**
188 * vfs_fsync_range - helper to sync a range of data & metadata to disk 189 * vfs_fsync_range - helper to sync a range of data & metadata to disk
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index ee1ce68fd98b..076ca50e9933 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -715,7 +715,7 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
715 * ubifs_get_free_space - return amount of free space. 715 * ubifs_get_free_space - return amount of free space.
716 * @c: UBIFS file-system description object 716 * @c: UBIFS file-system description object
717 * 717 *
718 * This function calculates and retuns amount of free space to report to 718 * This function calculates and returns amount of free space to report to
719 * user-space. 719 * user-space.
720 */ 720 */
721long long ubifs_get_free_space(struct ubifs_info *c) 721long long ubifs_get_free_space(struct ubifs_info *c)
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index f3a7945527fb..4775af401167 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -510,7 +510,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
510 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; 510 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
511 int first = 1, iip; 511 int first = 1, iip;
512 struct ubifs_debug_info *d = c->dbg; 512 struct ubifs_debug_info *d = c->dbg;
513 union ubifs_key lower_key, upper_key, l_key, u_key; 513 union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
514 unsigned long long uninitialized_var(last_sqnum); 514 unsigned long long uninitialized_var(last_sqnum);
515 struct ubifs_idx_node *idx; 515 struct ubifs_idx_node *idx;
516 struct list_head list; 516 struct list_head list;
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index ce2cd8343618..dbc093afd946 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -210,6 +210,20 @@ const char *dbg_cstate(int cmt_state)
210 } 210 }
211} 211}
212 212
213const char *dbg_jhead(int jhead)
214{
215 switch (jhead) {
216 case GCHD:
217 return "0 (GC)";
218 case BASEHD:
219 return "1 (base)";
220 case DATAHD:
221 return "2 (data)";
222 default:
223 return "unknown journal head";
224 }
225}
226
213static void dump_ch(const struct ubifs_ch *ch) 227static void dump_ch(const struct ubifs_ch *ch)
214{ 228{
215 printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); 229 printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic));
@@ -623,8 +637,9 @@ void dbg_dump_budg(struct ubifs_info *c)
623 /* If we are in R/O mode, journal heads do not exist */ 637 /* If we are in R/O mode, journal heads do not exist */
624 if (c->jheads) 638 if (c->jheads)
625 for (i = 0; i < c->jhead_cnt; i++) 639 for (i = 0; i < c->jhead_cnt; i++)
626 printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", 640 printk(KERN_DEBUG "\tjhead %s\t LEB %d\n",
627 c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); 641 dbg_jhead(c->jheads[i].wbuf.jhead),
642 c->jheads[i].wbuf.lnum);
628 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { 643 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
629 bud = rb_entry(rb, struct ubifs_bud, rb); 644 bud = rb_entry(rb, struct ubifs_bud, rb);
630 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); 645 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
@@ -648,9 +663,90 @@ void dbg_dump_budg(struct ubifs_info *c)
648 663
649void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) 664void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
650{ 665{
651 printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " 666 int i, spc, dark = 0, dead = 0;
652 "flags %#x\n", lp->lnum, lp->free, lp->dirty, 667 struct rb_node *rb;
653 c->leb_size - lp->free - lp->dirty, lp->flags); 668 struct ubifs_bud *bud;
669
670 spc = lp->free + lp->dirty;
671 if (spc < c->dead_wm)
672 dead = spc;
673 else
674 dark = ubifs_calc_dark(c, spc);
675
676 if (lp->flags & LPROPS_INDEX)
677 printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
678 "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
679 lp->dirty, c->leb_size - spc, spc, lp->flags);
680 else
681 printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
682 "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
683 "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
684 c->leb_size - spc, spc, dark, dead,
685 (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
686
687 if (lp->flags & LPROPS_TAKEN) {
688 if (lp->flags & LPROPS_INDEX)
689 printk(KERN_CONT "index, taken");
690 else
691 printk(KERN_CONT "taken");
692 } else {
693 const char *s;
694
695 if (lp->flags & LPROPS_INDEX) {
696 switch (lp->flags & LPROPS_CAT_MASK) {
697 case LPROPS_DIRTY_IDX:
698 s = "dirty index";
699 break;
700 case LPROPS_FRDI_IDX:
701 s = "freeable index";
702 break;
703 default:
704 s = "index";
705 }
706 } else {
707 switch (lp->flags & LPROPS_CAT_MASK) {
708 case LPROPS_UNCAT:
709 s = "not categorized";
710 break;
711 case LPROPS_DIRTY:
712 s = "dirty";
713 break;
714 case LPROPS_FREE:
715 s = "free";
716 break;
717 case LPROPS_EMPTY:
718 s = "empty";
719 break;
720 case LPROPS_FREEABLE:
721 s = "freeable";
722 break;
723 default:
724 s = NULL;
725 break;
726 }
727 }
728 printk(KERN_CONT "%s", s);
729 }
730
731 for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) {
732 bud = rb_entry(rb, struct ubifs_bud, rb);
733 if (bud->lnum == lp->lnum) {
734 int head = 0;
735 for (i = 0; i < c->jhead_cnt; i++) {
736 if (lp->lnum == c->jheads[i].wbuf.lnum) {
737 printk(KERN_CONT ", jhead %s",
738 dbg_jhead(i));
739 head = 1;
740 }
741 }
742 if (!head)
743 printk(KERN_CONT ", bud of jhead %s",
744 dbg_jhead(bud->jhead));
745 }
746 }
747 if (lp->lnum == c->gc_lnum)
748 printk(KERN_CONT ", GC LEB");
749 printk(KERN_CONT ")\n");
654} 750}
655 751
656void dbg_dump_lprops(struct ubifs_info *c) 752void dbg_dump_lprops(struct ubifs_info *c)
@@ -724,7 +820,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
724 820
725 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 821 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
726 current->pid, lnum); 822 current->pid, lnum);
727 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); 823 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0);
728 if (IS_ERR(sleb)) { 824 if (IS_ERR(sleb)) {
729 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 825 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
730 return; 826 return;
@@ -909,8 +1005,10 @@ out:
909 ubifs_msg("saved lprops statistics dump"); 1005 ubifs_msg("saved lprops statistics dump");
910 dbg_dump_lstats(&d->saved_lst); 1006 dbg_dump_lstats(&d->saved_lst);
911 ubifs_get_lp_stats(c, &lst); 1007 ubifs_get_lp_stats(c, &lst);
1008
912 ubifs_msg("current lprops statistics dump"); 1009 ubifs_msg("current lprops statistics dump");
913 dbg_dump_lstats(&d->saved_lst); 1010 dbg_dump_lstats(&lst);
1011
914 spin_lock(&c->space_lock); 1012 spin_lock(&c->space_lock);
915 dbg_dump_budg(c); 1013 dbg_dump_budg(c);
916 spin_unlock(&c->space_lock); 1014 spin_unlock(&c->space_lock);
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index c1cd73b2e06e..29d960101ea6 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -271,6 +271,7 @@ void ubifs_debugging_exit(struct ubifs_info *c);
271/* Dump functions */ 271/* Dump functions */
272const char *dbg_ntype(int type); 272const char *dbg_ntype(int type);
273const char *dbg_cstate(int cmt_state); 273const char *dbg_cstate(int cmt_state);
274const char *dbg_jhead(int jhead);
274const char *dbg_get_key_dump(const struct ubifs_info *c, 275const char *dbg_get_key_dump(const struct ubifs_info *c,
275 const union ubifs_key *key); 276 const union ubifs_key *key);
276void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); 277void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
@@ -321,6 +322,8 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
321int dbg_check_lprops(struct ubifs_info *c); 322int dbg_check_lprops(struct ubifs_info *c);
322int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 323int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
323 int row, int col); 324 int row, int col);
325int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
326 loff_t size);
324 327
325/* Force the use of in-the-gaps method for testing */ 328/* Force the use of in-the-gaps method for testing */
326 329
@@ -425,6 +428,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
425 428
426#define dbg_ntype(type) "" 429#define dbg_ntype(type) ""
427#define dbg_cstate(cmt_state) "" 430#define dbg_cstate(cmt_state) ""
431#define dbg_jhead(jhead) ""
428#define dbg_get_key_dump(c, key) ({}) 432#define dbg_get_key_dump(c, key) ({})
429#define dbg_dump_inode(c, inode) ({}) 433#define dbg_dump_inode(c, inode) ({})
430#define dbg_dump_node(c, node) ({}) 434#define dbg_dump_node(c, node) ({})
@@ -460,6 +464,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
460#define dbg_check_heap(c, heap, cat, add_pos) ({}) 464#define dbg_check_heap(c, heap, cat, add_pos) ({})
461#define dbg_check_lprops(c) 0 465#define dbg_check_lprops(c) 0
462#define dbg_check_lpt_nodes(c, cnode, row, col) 0 466#define dbg_check_lpt_nodes(c, cnode, row, col) 0
467#define dbg_check_inode_size(c, inode, size) 0
463#define dbg_force_in_the_gaps_enabled 0 468#define dbg_force_in_the_gaps_enabled 0
464#define dbg_force_in_the_gaps() 0 469#define dbg_force_in_the_gaps() 0
465#define dbg_failure_mode 0 470#define dbg_failure_mode 0
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6d34dc7e33e1..2e6481a7701c 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -21,34 +21,32 @@
21 */ 21 */
22 22
23/* 23/*
24 * This file implements VFS file and inode operations of regular files, device 24 * This file implements VFS file and inode operations for regular files, device
25 * nodes and symlinks as well as address space operations. 25 * nodes and symlinks as well as address space operations.
26 * 26 *
27 * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the 27 * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
28 * page is dirty and is used for budgeting purposes - dirty pages should not be 28 * the page is dirty and is used for optimization purposes - dirty pages are
29 * budgeted. The PG_checked flag is set if full budgeting is required for the 29 * not budgeted so the flag shows that 'ubifs_write_end()' should not release
30 * page e.g., when it corresponds to a file hole or it is just beyond the file 30 * the budget for this page. The @PG_checked flag is set if full budgeting is
31 * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to 31 * required for the page e.g., when it corresponds to a file hole or it is
32 * fail in this function, and the budget is released in 'ubifs_write_end()'. So 32 * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
33 * the PG_private and PG_checked flags carry the information about how the page 33 * it is OK to fail in this function, and the budget is released in
34 * was budgeted, to make it possible to release the budget properly. 34 * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
35 * information about how the page was budgeted, to make it possible to release
36 * the budget properly.
35 * 37 *
36 * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations 38 * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
37 * we implement. However, this is not true for '->writepage()', which might be 39 * implement. However, this is not true for 'ubifs_writepage()', which may be
38 * called with 'i_mutex' unlocked. For example, when pdflush is performing 40 * called with @i_mutex unlocked. For example, when pdflush is doing background
39 * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the 41 * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
40 * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is 42 * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
41 * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim 43 * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
42 * path'. So, in '->writepage()' we are only guaranteed that the page is 44 * we are only guaranteed that the page is locked.
43 * locked.
44 * 45 *
45 * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., 46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
46 * readahead path does not have it locked ("sys_read -> generic_file_aio_read 47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
47 * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is 48 * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
48 * not set as well. However, UBIFS disables readahead. 49 * set as well. However, UBIFS disables readahead.
49 *
50 * This, for example means that there might be 2 concurrent '->writepage()'
51 * calls for the same inode, but different inode dirty pages.
52 */ 50 */
53 51
54#include "ubifs.h" 52#include "ubifs.h"
@@ -449,9 +447,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
449 /* 447 /*
450 * We change whole page so no need to load it. But we 448 * We change whole page so no need to load it. But we
451 * have to set the @PG_checked flag to make the further 449 * have to set the @PG_checked flag to make the further
452 * code the page is new. This might be not true, but it 450 * code know that the page is new. This might be not
453 * is better to budget more that to read the page from 451 * true, but it is better to budget more than to read
454 * the media. 452 * the page from the media.
455 */ 453 */
456 SetPageChecked(page); 454 SetPageChecked(page);
457 skipped_read = 1; 455 skipped_read = 1;
@@ -497,8 +495,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
497 } 495 }
498 496
499 /* 497 /*
500 * Whee, we aquired budgeting quickly - without involving 498 * Whee, we acquired budgeting quickly - without involving
501 * garbage-collection, committing or forceing write-back. We return 499 * garbage-collection, committing or forcing write-back. We return
502 * with @ui->ui_mutex locked if we are appending pages, and unlocked 500 * with @ui->ui_mutex locked if we are appending pages, and unlocked
503 * otherwise. This is an optimization (slightly hacky though). 501 * otherwise. This is an optimization (slightly hacky though).
504 */ 502 */
@@ -562,7 +560,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
562 560
563 /* 561 /*
564 * Return 0 to force VFS to repeat the whole operation, or the 562 * Return 0 to force VFS to repeat the whole operation, or the
565 * error code if 'do_readpage()' failes. 563 * error code if 'do_readpage()' fails.
566 */ 564 */
567 copied = do_readpage(page); 565 copied = do_readpage(page);
568 goto out; 566 goto out;
@@ -1175,11 +1173,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
1175 ui->ui_size = inode->i_size; 1173 ui->ui_size = inode->i_size;
1176 /* Truncation changes inode [mc]time */ 1174 /* Truncation changes inode [mc]time */
1177 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); 1175 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1178 /* The other attributes may be changed at the same time as well */ 1176 /* Other attributes may be changed at the same time as well */
1179 do_attr_changes(inode, attr); 1177 do_attr_changes(inode, attr);
1180
1181 err = ubifs_jnl_truncate(c, inode, old_size, new_size); 1178 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
1182 mutex_unlock(&ui->ui_mutex); 1179 mutex_unlock(&ui->ui_mutex);
1180
1183out_budg: 1181out_budg:
1184 if (budgeted) 1182 if (budgeted)
1185 ubifs_release_budget(c, &req); 1183 ubifs_release_budget(c, &req);
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index f0f5f15d384e..618c2701d3a7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -529,7 +529,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
529 * We scan the entire LEB even though we only really need to scan up to 529 * We scan the entire LEB even though we only really need to scan up to
530 * (c->leb_size - lp->free). 530 * (c->leb_size - lp->free).
531 */ 531 */
532 sleb = ubifs_scan(c, lnum, 0, c->sbuf); 532 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
533 if (IS_ERR(sleb)) 533 if (IS_ERR(sleb))
534 return PTR_ERR(sleb); 534 return PTR_ERR(sleb);
535 535
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 762a7d6cec73..e589fedaf1ef 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -297,7 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
297{ 297{
298 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); 298 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
299 299
300 dbg_io("jhead %d", wbuf->jhead); 300 dbg_io("jhead %s", dbg_jhead(wbuf->jhead));
301 wbuf->need_sync = 1; 301 wbuf->need_sync = 1;
302 wbuf->c->need_wbuf_sync = 1; 302 wbuf->c->need_wbuf_sync = 1;
303 ubifs_wake_up_bgt(wbuf->c); 303 ubifs_wake_up_bgt(wbuf->c);
@@ -314,7 +314,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
314 314
315 if (wbuf->no_timer) 315 if (wbuf->no_timer)
316 return; 316 return;
317 dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead, 317 dbg_io("set timer for jhead %s, %llu-%llu millisecs",
318 dbg_jhead(wbuf->jhead),
318 div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), 319 div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
319 div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, 320 div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
320 USEC_PER_SEC)); 321 USEC_PER_SEC));
@@ -351,8 +352,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
351 /* Write-buffer is empty or not seeked */ 352 /* Write-buffer is empty or not seeked */
352 return 0; 353 return 0;
353 354
354 dbg_io("LEB %d:%d, %d bytes, jhead %d", 355 dbg_io("LEB %d:%d, %d bytes, jhead %s",
355 wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead); 356 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
356 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); 357 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
357 ubifs_assert(!(wbuf->avail & 7)); 358 ubifs_assert(!(wbuf->avail & 7));
358 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); 359 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
@@ -401,7 +402,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
401{ 402{
402 const struct ubifs_info *c = wbuf->c; 403 const struct ubifs_info *c = wbuf->c;
403 404
404 dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead); 405 dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
405 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); 406 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
406 ubifs_assert(offs >= 0 && offs <= c->leb_size); 407 ubifs_assert(offs >= 0 && offs <= c->leb_size);
407 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 408 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
@@ -508,9 +509,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
508 struct ubifs_info *c = wbuf->c; 509 struct ubifs_info *c = wbuf->c;
509 int err, written, n, aligned_len = ALIGN(len, 8), offs; 510 int err, written, n, aligned_len = ALIGN(len, 8), offs;
510 511
511 dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len, 512 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
512 dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead, 513 dbg_ntype(((struct ubifs_ch *)buf)->node_type),
513 wbuf->lnum, wbuf->offs + wbuf->used); 514 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
514 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 515 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
515 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 516 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
516 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 517 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
@@ -535,8 +536,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
535 memcpy(wbuf->buf + wbuf->used, buf, len); 536 memcpy(wbuf->buf + wbuf->used, buf, len);
536 537
537 if (aligned_len == wbuf->avail) { 538 if (aligned_len == wbuf->avail) {
538 dbg_io("flush jhead %d wbuf to LEB %d:%d", 539 dbg_io("flush jhead %s wbuf to LEB %d:%d",
539 wbuf->jhead, wbuf->lnum, wbuf->offs); 540 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
540 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 541 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
541 wbuf->offs, c->min_io_size, 542 wbuf->offs, c->min_io_size,
542 wbuf->dtype); 543 wbuf->dtype);
@@ -564,8 +565,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
564 * minimal I/O unit. We have to fill and flush write-buffer and switch 565 * minimal I/O unit. We have to fill and flush write-buffer and switch
565 * to the next min. I/O unit. 566 * to the next min. I/O unit.
566 */ 567 */
567 dbg_io("flush jhead %d wbuf to LEB %d:%d", 568 dbg_io("flush jhead %s wbuf to LEB %d:%d",
568 wbuf->jhead, wbuf->lnum, wbuf->offs); 569 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
569 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 570 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
570 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 571 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
571 c->min_io_size, wbuf->dtype); 572 c->min_io_size, wbuf->dtype);
@@ -698,8 +699,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
698 int err, rlen, overlap; 699 int err, rlen, overlap;
699 struct ubifs_ch *ch = buf; 700 struct ubifs_ch *ch = buf;
700 701
701 dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs, 702 dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
702 dbg_ntype(type), len, wbuf->jhead); 703 dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
703 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 704 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
704 ubifs_assert(!(offs & 7) && offs < c->leb_size); 705 ubifs_assert(!(offs & 7) && offs < c->leb_size);
705 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 706 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 64b5f3a309f5..d321baeca68d 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -158,7 +158,7 @@ again:
158 * some. But the write-buffer mutex has to be unlocked because 158 * some. But the write-buffer mutex has to be unlocked because
159 * GC also takes it. 159 * GC also takes it.
160 */ 160 */
161 dbg_jnl("no free space jhead %d, run GC", jhead); 161 dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead));
162 mutex_unlock(&wbuf->io_mutex); 162 mutex_unlock(&wbuf->io_mutex);
163 163
164 lnum = ubifs_garbage_collect(c, 0); 164 lnum = ubifs_garbage_collect(c, 0);
@@ -173,7 +173,8 @@ again:
173 * because we dropped @wbuf->io_mutex, so try once 173 * because we dropped @wbuf->io_mutex, so try once
174 * again. 174 * again.
175 */ 175 */
176 dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); 176 dbg_jnl("GC couldn't make a free LEB for jhead %s",
177 dbg_jhead(jhead));
177 if (retries++ < 2) { 178 if (retries++ < 2) {
178 dbg_jnl("retry (%d)", retries); 179 dbg_jnl("retry (%d)", retries);
179 goto again; 180 goto again;
@@ -184,7 +185,7 @@ again:
184 } 185 }
185 186
186 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 187 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
187 dbg_jnl("got LEB %d for jhead %d", lnum, jhead); 188 dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead));
188 avail = c->leb_size - wbuf->offs - wbuf->used; 189 avail = c->leb_size - wbuf->offs - wbuf->used;
189 190
190 if (wbuf->lnum != -1 && avail >= len) { 191 if (wbuf->lnum != -1 && avail >= len) {
@@ -255,7 +256,8 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
255 *lnum = c->jheads[jhead].wbuf.lnum; 256 *lnum = c->jheads[jhead].wbuf.lnum;
256 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; 257 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
257 258
258 dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); 259 dbg_jnl("jhead %s, LEB %d:%d, len %d",
260 dbg_jhead(jhead), *lnum, *offs, len);
259 ubifs_prepare_node(c, node, len, 0); 261 ubifs_prepare_node(c, node, len, 0);
260 262
261 return ubifs_wbuf_write_nolock(wbuf, node, len); 263 return ubifs_wbuf_write_nolock(wbuf, node, len);
@@ -285,7 +287,8 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
285 287
286 *lnum = c->jheads[jhead].wbuf.lnum; 288 *lnum = c->jheads[jhead].wbuf.lnum;
287 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; 289 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
288 dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); 290 dbg_jnl("jhead %s, LEB %d:%d, len %d",
291 dbg_jhead(jhead), *lnum, *offs, len);
289 292
290 err = ubifs_wbuf_write_nolock(wbuf, buf, len); 293 err = ubifs_wbuf_write_nolock(wbuf, buf, len);
291 if (err) 294 if (err)
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 5fa27ea031ba..0f530c684f0b 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -229,23 +229,6 @@ static inline void xent_key_init(const struct ubifs_info *c,
229} 229}
230 230
231/** 231/**
232 * xent_key_init_hash - initialize extended attribute entry key without
233 * re-calculating hash function.
234 * @c: UBIFS file-system description object
235 * @key: key to initialize
236 * @inum: host inode number
237 * @hash: extended attribute entry name hash
238 */
239static inline void xent_key_init_hash(const struct ubifs_info *c,
240 union ubifs_key *key, ino_t inum,
241 uint32_t hash)
242{
243 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
244 key->u32[0] = inum;
245 key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
246}
247
248/**
249 * xent_key_init_flash - initialize on-flash extended attribute entry key. 232 * xent_key_init_flash - initialize on-flash extended attribute entry key.
250 * @c: UBIFS file-system description object 233 * @c: UBIFS file-system description object
251 * @k: key to initialize 234 * @k: key to initialize
@@ -295,22 +278,15 @@ static inline void data_key_init(const struct ubifs_info *c,
295} 278}
296 279
297/** 280/**
298 * data_key_init_flash - initialize on-flash data key. 281 * highest_data_key - get the highest possible data key for an inode.
299 * @c: UBIFS file-system description object 282 * @c: UBIFS file-system description object
300 * @k: key to initialize 283 * @key: key to initialize
301 * @inum: inode number 284 * @inum: inode number
302 * @block: block number
303 */ 285 */
304static inline void data_key_init_flash(const struct ubifs_info *c, void *k, 286static inline void highest_data_key(const struct ubifs_info *c,
305 ino_t inum, unsigned int block) 287 union ubifs_key *key, ino_t inum)
306{ 288{
307 union ubifs_key *key = k; 289 data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK);
308
309 ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
310 key->j32[0] = cpu_to_le32(inum);
311 key->j32[1] = cpu_to_le32(block |
312 (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
313 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
314} 290}
315 291
316/** 292/**
@@ -554,4 +530,5 @@ static inline unsigned long long key_max_inode_size(const struct ubifs_info *c)
554 return 0; 530 return 0;
555 } 531 }
556} 532}
533
557#endif /* !__UBIFS_KEY_H__ */ 534#endif /* !__UBIFS_KEY_H__ */
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 56e33772a1ee..c345e125f42c 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -169,8 +169,8 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
169 */ 169 */
170 c->bud_bytes += c->leb_size - bud->start; 170 c->bud_bytes += c->leb_size - bud->start;
171 171
172 dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, 172 dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
173 bud->start, bud->jhead, c->bud_bytes); 173 bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
174 spin_unlock(&c->buds_lock); 174 spin_unlock(&c->buds_lock);
175} 175}
176 176
@@ -355,16 +355,16 @@ static void remove_buds(struct ubifs_info *c)
355 * heads (non-closed buds). 355 * heads (non-closed buds).
356 */ 356 */
357 c->cmt_bud_bytes += wbuf->offs - bud->start; 357 c->cmt_bud_bytes += wbuf->offs - bud->start;
358 dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " 358 dbg_log("preserve %d:%d, jhead %s, bud bytes %d, "
359 "cmt_bud_bytes %lld", bud->lnum, bud->start, 359 "cmt_bud_bytes %lld", bud->lnum, bud->start,
360 bud->jhead, wbuf->offs - bud->start, 360 dbg_jhead(bud->jhead), wbuf->offs - bud->start,
361 c->cmt_bud_bytes); 361 c->cmt_bud_bytes);
362 bud->start = wbuf->offs; 362 bud->start = wbuf->offs;
363 } else { 363 } else {
364 c->cmt_bud_bytes += c->leb_size - bud->start; 364 c->cmt_bud_bytes += c->leb_size - bud->start;
365 dbg_log("remove %d:%d, jhead %d, bud bytes %d, " 365 dbg_log("remove %d:%d, jhead %s, bud bytes %d, "
366 "cmt_bud_bytes %lld", bud->lnum, bud->start, 366 "cmt_bud_bytes %lld", bud->lnum, bud->start,
367 bud->jhead, c->leb_size - bud->start, 367 dbg_jhead(bud->jhead), c->leb_size - bud->start,
368 c->cmt_bud_bytes); 368 c->cmt_bud_bytes);
369 rb_erase(p1, &c->buds); 369 rb_erase(p1, &c->buds);
370 /* 370 /*
@@ -429,7 +429,8 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
429 if (lnum == -1 || offs == c->leb_size) 429 if (lnum == -1 || offs == c->leb_size)
430 continue; 430 continue;
431 431
432 dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); 432 dbg_log("add ref to LEB %d:%d for jhead %s",
433 lnum, offs, dbg_jhead(i));
433 ref = buf + len; 434 ref = buf + len;
434 ref->ch.node_type = UBIFS_REF_NODE; 435 ref->ch.node_type = UBIFS_REF_NODE;
435 ref->lnum = cpu_to_le32(lnum); 436 ref->lnum = cpu_to_le32(lnum);
@@ -695,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
695 lnum = c->ltail_lnum; 696 lnum = c->ltail_lnum;
696 write_lnum = lnum; 697 write_lnum = lnum;
697 while (1) { 698 while (1) {
698 sleb = ubifs_scan(c, lnum, 0, c->sbuf); 699 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
699 if (IS_ERR(sleb)) { 700 if (IS_ERR(sleb)) {
700 err = PTR_ERR(sleb); 701 err = PTR_ERR(sleb);
701 goto out_free; 702 goto out_free;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 4cdd284dea56..4d4ca388889b 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
281 case LPROPS_FREE: 281 case LPROPS_FREE:
282 if (add_to_lpt_heap(c, lprops, cat)) 282 if (add_to_lpt_heap(c, lprops, cat))
283 break; 283 break;
284 /* No more room on heap so make it uncategorized */ 284 /* No more room on heap so make it un-categorized */
285 cat = LPROPS_UNCAT; 285 cat = LPROPS_UNCAT;
286 /* Fall through */ 286 /* Fall through */
287 case LPROPS_UNCAT: 287 case LPROPS_UNCAT:
@@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
375 * @lprops: LEB properties 375 * @lprops: LEB properties
376 * 376 *
377 * A LEB may have fallen off of the bottom of a heap, and ended up as 377 * A LEB may have fallen off of the bottom of a heap, and ended up as
378 * uncategorized even though it has enough space for us now. If that is the case 378 * un-categorized even though it has enough space for us now. If that is the
379 * this function will put the LEB back onto a heap. 379 * case this function will put the LEB back onto a heap.
380 */ 380 */
381void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) 381void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
382{ 382{
@@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct ubifs_info *c,
436/** 436/**
437 * change_category - change LEB properties category. 437 * change_category - change LEB properties category.
438 * @c: UBIFS file-system description object 438 * @c: UBIFS file-system description object
439 * @lprops: LEB properties to recategorize 439 * @lprops: LEB properties to re-categorize
440 * 440 *
441 * LEB properties are categorized to enable fast find operations. When the LEB 441 * LEB properties are categorized to enable fast find operations. When the LEB
442 * properties change they must be recategorized. 442 * properties change they must be re-categorized.
443 */ 443 */
444static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) 444static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
445{ 445{
@@ -461,21 +461,18 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
461} 461}
462 462
463/** 463/**
464 * calc_dark - calculate LEB dark space size. 464 * ubifs_calc_dark - calculate LEB dark space size.
465 * @c: the UBIFS file-system description object 465 * @c: the UBIFS file-system description object
466 * @spc: amount of free and dirty space in the LEB 466 * @spc: amount of free and dirty space in the LEB
467 * 467 *
468 * This function calculates amount of dark space in an LEB which has @spc bytes 468 * This function calculates and returns amount of dark space in an LEB which
469 * of free and dirty space. Returns the calculations result. 469 * has @spc bytes of free and dirty space.
470 * 470 *
471 * Dark space is the space which is not always usable - it depends on which 471 * UBIFS is trying to account the space which might not be usable, and this
472 * nodes are written in which order. E.g., if an LEB has only 512 free bytes, 472 * space is called "dark space". For example, if an LEB has only %512 free
473 * it is dark space, because it cannot fit a large data node. So UBIFS cannot 473 * bytes, it is dark space, because it cannot fit a large data node.
474 * count on this LEB and treat these 512 bytes as usable because it is not true
475 * if, for example, only big chunks of uncompressible data will be written to
476 * the FS.
477 */ 474 */
478static int calc_dark(struct ubifs_info *c, int spc) 475int ubifs_calc_dark(const struct ubifs_info *c, int spc)
479{ 476{
480 ubifs_assert(!(spc & 7)); 477 ubifs_assert(!(spc & 7));
481 478
@@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
518 * @free: new free space amount 515 * @free: new free space amount
519 * @dirty: new dirty space amount 516 * @dirty: new dirty space amount
520 * @flags: new flags 517 * @flags: new flags
521 * @idx_gc_cnt: change to the count of idx_gc list 518 * @idx_gc_cnt: change to the count of @idx_gc list
522 * 519 *
523 * This function changes LEB properties (@free, @dirty or @flag). However, the 520 * This function changes LEB properties (@free, @dirty or @flag). However, the
524 * property which has the %LPROPS_NC value is not changed. Returns a pointer to 521 * property which has the %LPROPS_NC value is not changed. Returns a pointer to
@@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
535{ 532{
536 /* 533 /*
537 * This is the only function that is allowed to change lprops, so we 534 * This is the only function that is allowed to change lprops, so we
538 * discard the const qualifier. 535 * discard the "const" qualifier.
539 */ 536 */
540 struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; 537 struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
541 538
@@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
575 if (old_spc < c->dead_wm) 572 if (old_spc < c->dead_wm)
576 c->lst.total_dead -= old_spc; 573 c->lst.total_dead -= old_spc;
577 else 574 else
578 c->lst.total_dark -= calc_dark(c, old_spc); 575 c->lst.total_dark -= ubifs_calc_dark(c, old_spc);
579 576
580 c->lst.total_used -= c->leb_size - old_spc; 577 c->lst.total_used -= c->leb_size - old_spc;
581 } 578 }
@@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
616 if (new_spc < c->dead_wm) 613 if (new_spc < c->dead_wm)
617 c->lst.total_dead += new_spc; 614 c->lst.total_dead += new_spc;
618 else 615 else
619 c->lst.total_dark += calc_dark(c, new_spc); 616 c->lst.total_dark += ubifs_calc_dark(c, new_spc);
620 617
621 c->lst.total_used += c->leb_size - new_spc; 618 c->lst.total_used += c->leb_size - new_spc;
622 } 619 }
@@ -1096,7 +1093,7 @@ static int scan_check_cb(struct ubifs_info *c,
1096 } 1093 }
1097 } 1094 }
1098 1095
1099 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); 1096 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0);
1100 if (IS_ERR(sleb)) { 1097 if (IS_ERR(sleb)) {
1101 /* 1098 /*
1102 * After an unclean unmount, empty and freeable LEBs 1099 * After an unclean unmount, empty and freeable LEBs
@@ -1107,7 +1104,7 @@ static int scan_check_cb(struct ubifs_info *c,
1107 "- continuing checking"); 1104 "- continuing checking");
1108 lst->empty_lebs += 1; 1105 lst->empty_lebs += 1;
1109 lst->total_free += c->leb_size; 1106 lst->total_free += c->leb_size;
1110 lst->total_dark += calc_dark(c, c->leb_size); 1107 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1111 return LPT_SCAN_CONTINUE; 1108 return LPT_SCAN_CONTINUE;
1112 } 1109 }
1113 1110
@@ -1117,7 +1114,7 @@ static int scan_check_cb(struct ubifs_info *c,
1117 "- continuing checking"); 1114 "- continuing checking");
1118 lst->total_free += lp->free; 1115 lst->total_free += lp->free;
1119 lst->total_dirty += lp->dirty; 1116 lst->total_dirty += lp->dirty;
1120 lst->total_dark += calc_dark(c, c->leb_size); 1117 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1121 return LPT_SCAN_CONTINUE; 1118 return LPT_SCAN_CONTINUE;
1122 } 1119 }
1123 data->err = PTR_ERR(sleb); 1120 data->err = PTR_ERR(sleb);
@@ -1235,7 +1232,7 @@ static int scan_check_cb(struct ubifs_info *c,
1235 if (spc < c->dead_wm) 1232 if (spc < c->dead_wm)
1236 lst->total_dead += spc; 1233 lst->total_dead += spc;
1237 else 1234 else
1238 lst->total_dark += calc_dark(c, spc); 1235 lst->total_dark += ubifs_calc_dark(c, spc);
1239 } 1236 }
1240 1237
1241 ubifs_scan_destroy(sleb); 1238 ubifs_scan_destroy(sleb);
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index a88f33801b98..28beaeedadc0 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -29,7 +29,8 @@
29 * @c: UBIFS file-system description object 29 * @c: UBIFS file-system description object
30 * 30 *
31 * This function scans the master node LEBs and search for the latest master 31 * This function scans the master node LEBs and search for the latest master
32 * node. Returns zero in case of success and a negative error code in case of 32 * node. Returns zero in case of success, %-EUCLEAN if there master area is
33 * corrupted and requires recovery, and a negative error code in case of
33 * failure. 34 * failure.
34 */ 35 */
35static int scan_for_master(struct ubifs_info *c) 36static int scan_for_master(struct ubifs_info *c)
@@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_info *c)
40 41
41 lnum = UBIFS_MST_LNUM; 42 lnum = UBIFS_MST_LNUM;
42 43
43 sleb = ubifs_scan(c, lnum, 0, c->sbuf); 44 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
44 if (IS_ERR(sleb)) 45 if (IS_ERR(sleb))
45 return PTR_ERR(sleb); 46 return PTR_ERR(sleb);
46 nodes_cnt = sleb->nodes_cnt; 47 nodes_cnt = sleb->nodes_cnt;
@@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_info *c)
48 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, 49 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
49 list); 50 list);
50 if (snod->type != UBIFS_MST_NODE) 51 if (snod->type != UBIFS_MST_NODE)
51 goto out; 52 goto out_dump;
52 memcpy(c->mst_node, snod->node, snod->len); 53 memcpy(c->mst_node, snod->node, snod->len);
53 offs = snod->offs; 54 offs = snod->offs;
54 } 55 }
@@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_info *c)
56 57
57 lnum += 1; 58 lnum += 1;
58 59
59 sleb = ubifs_scan(c, lnum, 0, c->sbuf); 60 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
60 if (IS_ERR(sleb)) 61 if (IS_ERR(sleb))
61 return PTR_ERR(sleb); 62 return PTR_ERR(sleb);
62 if (sleb->nodes_cnt != nodes_cnt) 63 if (sleb->nodes_cnt != nodes_cnt)
@@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_info *c)
65 goto out; 66 goto out;
66 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); 67 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
67 if (snod->type != UBIFS_MST_NODE) 68 if (snod->type != UBIFS_MST_NODE)
68 goto out; 69 goto out_dump;
69 if (snod->offs != offs) 70 if (snod->offs != offs)
70 goto out; 71 goto out;
71 if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, 72 if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
@@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_info *c)
78 79
79out: 80out:
80 ubifs_scan_destroy(sleb); 81 ubifs_scan_destroy(sleb);
82 return -EUCLEAN;
83
84out_dump:
85 ubifs_err("unexpected node type %d master LEB %d:%d",
86 snod->type, lnum, snod->offs);
87 ubifs_scan_destroy(sleb);
81 return -EINVAL; 88 return -EINVAL;
82} 89}
83 90
@@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info *c)
256 263
257 err = scan_for_master(c); 264 err = scan_for_master(c);
258 if (err) { 265 if (err) {
259 err = ubifs_recover_master_node(c); 266 if (err == -EUCLEAN)
267 err = ubifs_recover_master_node(c);
260 if (err) 268 if (err)
261 /* 269 /*
262 * Note, we do not free 'c->mst_node' here because the 270 * Note, we do not free 'c->mst_node' here because the
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 152a7b34a141..82009c74b6a3 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -670,9 +670,10 @@ static int kill_orphans(struct ubifs_info *c)
670 struct ubifs_scan_leb *sleb; 670 struct ubifs_scan_leb *sleb;
671 671
672 dbg_rcvry("LEB %d", lnum); 672 dbg_rcvry("LEB %d", lnum);
673 sleb = ubifs_scan(c, lnum, 0, c->sbuf); 673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
676 if (IS_ERR(sleb)) { 677 if (IS_ERR(sleb)) {
677 err = PTR_ERR(sleb); 678 err = PTR_ERR(sleb);
678 break; 679 break;
@@ -899,7 +900,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
899 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { 900 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
900 struct ubifs_scan_leb *sleb; 901 struct ubifs_scan_leb *sleb;
901 902
902 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); 903 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0);
903 if (IS_ERR(sleb)) { 904 if (IS_ERR(sleb)) {
904 err = PTR_ERR(sleb); 905 err = PTR_ERR(sleb);
905 break; 906 break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index e5f6cf8a1155..f94ddf7efba0 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -286,7 +286,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
286 mst = mst2; 286 mst = mst2;
287 } 287 }
288 288
289 dbg_rcvry("recovered master node from LEB %d", 289 ubifs_msg("recovered master node from LEB %d",
290 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); 290 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
291 291
292 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); 292 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
@@ -790,7 +790,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
790 * We can only recover at the end of the log, so check that the 790 * We can only recover at the end of the log, so check that the
791 * next log LEB is empty or out of date. 791 * next log LEB is empty or out of date.
792 */ 792 */
793 sleb = ubifs_scan(c, next_lnum, 0, sbuf); 793 sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
794 if (IS_ERR(sleb)) 794 if (IS_ERR(sleb))
795 return sleb; 795 return sleb;
796 if (sleb->nodes_cnt) { 796 if (sleb->nodes_cnt) {
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 2970500f32df..5c2d6d759a3e 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -506,7 +506,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
506 if (c->need_recovery) 506 if (c->need_recovery)
507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
508 else 508 else
509 sleb = ubifs_scan(c, lnum, offs, c->sbuf); 509 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
510 if (IS_ERR(sleb)) 510 if (IS_ERR(sleb))
511 return PTR_ERR(sleb); 511 return PTR_ERR(sleb);
512 512
@@ -836,8 +836,8 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
836 const struct ubifs_cs_node *node; 836 const struct ubifs_cs_node *node;
837 837
838 dbg_mnt("replay log LEB %d:%d", lnum, offs); 838 dbg_mnt("replay log LEB %d:%d", lnum, offs);
839 sleb = ubifs_scan(c, lnum, offs, sbuf); 839 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
840 if (IS_ERR(sleb) ) { 840 if (IS_ERR(sleb)) {
841 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 841 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
842 return PTR_ERR(sleb); 842 return PTR_ERR(sleb);
843 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 843 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 892ebfee4fe5..96c525384191 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
108 108
109 /* Make the node pads to 8-byte boundary */ 109 /* Make the node pads to 8-byte boundary */
110 if ((node_len + pad_len) & 7) { 110 if ((node_len + pad_len) & 7) {
111 if (!quiet) { 111 if (!quiet)
112 dbg_err("bad padding length %d - %d", 112 dbg_err("bad padding length %d - %d",
113 offs, offs + node_len + pad_len); 113 offs, offs + node_len + pad_len);
114 }
115 return SCANNED_A_BAD_PAD_NODE; 114 return SCANNED_A_BAD_PAD_NODE;
116 } 115 }
117 116
@@ -253,15 +252,19 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
253 * @c: UBIFS file-system description object 252 * @c: UBIFS file-system description object
254 * @lnum: logical eraseblock number 253 * @lnum: logical eraseblock number
255 * @offs: offset to start at (usually zero) 254 * @offs: offset to start at (usually zero)
256 * @sbuf: scan buffer (must be c->leb_size) 255 * @sbuf: scan buffer (must be of @c->leb_size bytes in size)
256 * @quiet: print no messages
257 * 257 *
258 * This function scans LEB number @lnum and returns complete information about 258 * This function scans LEB number @lnum and returns complete information about
259 * its contents. Returns the scaned information in case of success and, 259 * its contents. Returns the scaned information in case of success and,
260 * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case 260 * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
261 * of failure. 261 * of failure.
262 *
263 * If @quiet is non-zero, this function does not print large and scary
264 * error messages and flash dumps in case of errors.
262 */ 265 */
263struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, 266struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
264 int offs, void *sbuf) 267 int offs, void *sbuf, int quiet)
265{ 268{
266 void *buf = sbuf + offs; 269 void *buf = sbuf + offs;
267 int err, len = c->leb_size - offs; 270 int err, len = c->leb_size - offs;
@@ -280,7 +283,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
280 283
281 cond_resched(); 284 cond_resched();
282 285
283 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 286 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
284 if (ret > 0) { 287 if (ret > 0) {
285 /* Padding bytes or a valid padding node */ 288 /* Padding bytes or a valid padding node */
286 offs += ret; 289 offs += ret;
@@ -320,7 +323,9 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
320 } 323 }
321 324
322 if (offs % c->min_io_size) { 325 if (offs % c->min_io_size) {
323 ubifs_err("empty space starts at non-aligned offset %d", offs); 326 if (!quiet)
327 ubifs_err("empty space starts at non-aligned offset %d",
328 offs);
324 goto corrupted;; 329 goto corrupted;;
325 } 330 }
326 331
@@ -331,18 +336,25 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
331 break; 336 break;
332 for (; len; offs++, buf++, len--) 337 for (; len; offs++, buf++, len--)
333 if (*(uint8_t *)buf != 0xff) { 338 if (*(uint8_t *)buf != 0xff) {
334 ubifs_err("corrupt empty space at LEB %d:%d", 339 if (!quiet)
335 lnum, offs); 340 ubifs_err("corrupt empty space at LEB %d:%d",
341 lnum, offs);
336 goto corrupted; 342 goto corrupted;
337 } 343 }
338 344
339 return sleb; 345 return sleb;
340 346
341corrupted: 347corrupted:
342 ubifs_scanned_corruption(c, lnum, offs, buf); 348 if (!quiet) {
349 ubifs_scanned_corruption(c, lnum, offs, buf);
350 ubifs_err("LEB %d scanning failed", lnum);
351 }
343 err = -EUCLEAN; 352 err = -EUCLEAN;
353 ubifs_scan_destroy(sleb);
354 return ERR_PTR(err);
355
344error: 356error:
345 ubifs_err("LEB %d scanning failed", lnum); 357 ubifs_err("LEB %d scanning failed, error %d", lnum, err);
346 ubifs_scan_destroy(sleb); 358 ubifs_scan_destroy(sleb);
347 return ERR_PTR(err); 359 return ERR_PTR(err);
348} 360}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c4af069df1ad..333e181ee987 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -36,7 +36,6 @@
36#include <linux/mount.h> 36#include <linux/mount.h>
37#include <linux/math64.h> 37#include <linux/math64.h>
38#include <linux/writeback.h> 38#include <linux/writeback.h>
39#include <linux/smp_lock.h>
40#include "ubifs.h" 39#include "ubifs.h"
41 40
42/* 41/*
@@ -318,6 +317,8 @@ static int ubifs_write_inode(struct inode *inode, int wait)
318 if (err) 317 if (err)
319 ubifs_err("can't write inode %lu, error %d", 318 ubifs_err("can't write inode %lu, error %d",
320 inode->i_ino, err); 319 inode->i_ino, err);
320 else
321 err = dbg_check_inode_size(c, inode, ui->ui_size);
321 } 322 }
322 323
323 ui->dirty = 0; 324 ui->dirty = 0;
@@ -448,17 +449,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
448 return 0; 449 return 0;
449 450
450 /* 451 /*
451 * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
452 * pages, so synchronize them first, then commit the journal. Strictly
453 * speaking, it is not necessary to commit the journal here,
454 * synchronizing write-buffers would be enough. But committing makes
455 * UBIFS free space predictions much more accurate, so we want to let
456 * the user be able to get more accurate results of 'statfs()' after
457 * they synchronize the file system.
458 */
459 sync_inodes_sb(sb);
460
461 /*
462 * Synchronize write buffers, because 'ubifs_run_commit()' does not 452 * Synchronize write buffers, because 'ubifs_run_commit()' does not
463 * do this if it waits for an already running commit. 453 * do this if it waits for an already running commit.
464 */ 454 */
@@ -468,6 +458,13 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
468 return err; 458 return err;
469 } 459 }
470 460
461 /*
462 * Strictly speaking, it is not necessary to commit the journal here,
463 * synchronizing write-buffers would be enough. But committing makes
464 * UBIFS free space predictions much more accurate, so we want to let
465 * the user be able to get more accurate results of 'statfs()' after
466 * they synchronize the file system.
467 */
471 err = ubifs_run_commit(c); 468 err = ubifs_run_commit(c);
472 if (err) 469 if (err)
473 return err; 470 return err;
@@ -1720,8 +1717,6 @@ static void ubifs_put_super(struct super_block *sb)
1720 ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, 1717 ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
1721 c->vi.vol_id); 1718 c->vi.vol_id);
1722 1719
1723 lock_kernel();
1724
1725 /* 1720 /*
1726 * The following asserts are only valid if there has not been a failure 1721 * The following asserts are only valid if there has not been a failure
1727 * of the media. For example, there will be dirty inodes if we failed 1722 * of the media. For example, there will be dirty inodes if we failed
@@ -1786,8 +1781,6 @@ static void ubifs_put_super(struct super_block *sb)
1786 ubi_close_volume(c->ubi); 1781 ubi_close_volume(c->ubi);
1787 mutex_unlock(&c->umount_mutex); 1782 mutex_unlock(&c->umount_mutex);
1788 kfree(c); 1783 kfree(c);
1789
1790 unlock_kernel();
1791} 1784}
1792 1785
1793static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) 1786static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1803,22 +1796,17 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1803 return err; 1796 return err;
1804 } 1797 }
1805 1798
1806 lock_kernel();
1807 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 1799 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
1808 if (c->ro_media) { 1800 if (c->ro_media) {
1809 ubifs_msg("cannot re-mount due to prior errors"); 1801 ubifs_msg("cannot re-mount due to prior errors");
1810 unlock_kernel();
1811 return -EROFS; 1802 return -EROFS;
1812 } 1803 }
1813 err = ubifs_remount_rw(c); 1804 err = ubifs_remount_rw(c);
1814 if (err) { 1805 if (err)
1815 unlock_kernel();
1816 return err; 1806 return err;
1817 }
1818 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { 1807 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
1819 if (c->ro_media) { 1808 if (c->ro_media) {
1820 ubifs_msg("cannot re-mount due to prior errors"); 1809 ubifs_msg("cannot re-mount due to prior errors");
1821 unlock_kernel();
1822 return -EROFS; 1810 return -EROFS;
1823 } 1811 }
1824 ubifs_remount_ro(c); 1812 ubifs_remount_ro(c);
@@ -1833,7 +1821,6 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1833 } 1821 }
1834 1822
1835 ubifs_assert(c->lst.taken_empty_lebs > 0); 1823 ubifs_assert(c->lst.taken_empty_lebs > 0);
1836 unlock_kernel();
1837 return 0; 1824 return 0;
1838} 1825}
1839 1826
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f249f7b0d656..e5b1a7d00fa0 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1159,8 +1159,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
1159 * o exact match, i.e. the found zero-level znode contains key @key, then %1 1159 * o exact match, i.e. the found zero-level znode contains key @key, then %1
1160 * is returned and slot number of the matched branch is stored in @n; 1160 * is returned and slot number of the matched branch is stored in @n;
1161 * o not exact match, which means that zero-level znode does not contain 1161 * o not exact match, which means that zero-level znode does not contain
1162 * @key, then %0 is returned and slot number of the closed branch is stored 1162 * @key, then %0 is returned and slot number of the closest branch is stored
1163 * in @n; 1163 * in @n;
1164 * o @key is so small that it is even less than the lowest key of the 1164 * o @key is so small that it is even less than the lowest key of the
1165 * leftmost zero-level node, then %0 is returned and %0 is stored in @n. 1165 * leftmost zero-level node, then %0 is returned and %0 is stored in @n.
1166 * 1166 *
@@ -1433,7 +1433,7 @@ static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
1433 * @lnum: LEB number is returned here 1433 * @lnum: LEB number is returned here
1434 * @offs: offset is returned here 1434 * @offs: offset is returned here
1435 * 1435 *
1436 * This function look up and reads node with key @key. The caller has to make 1436 * This function looks up and reads node with key @key. The caller has to make
1437 * sure the @node buffer is large enough to fit the node. Returns zero in case 1437 * sure the @node buffer is large enough to fit the node. Returns zero in case
1438 * of success, %-ENOENT if the node was not found, and a negative error code in 1438 * of success, %-ENOENT if the node was not found, and a negative error code in
1439 * case of failure. The node location can be returned in @lnum and @offs. 1439 * case of failure. The node location can be returned in @lnum and @offs.
@@ -3268,3 +3268,73 @@ out_unlock:
3268 mutex_unlock(&c->tnc_mutex); 3268 mutex_unlock(&c->tnc_mutex);
3269 return err; 3269 return err;
3270} 3270}
3271
3272#ifdef CONFIG_UBIFS_FS_DEBUG
3273
3274/**
3275 * dbg_check_inode_size - check if inode size is correct.
3276 * @c: UBIFS file-system description object
3277 * @inum: inode number
3278 * @size: inode size
3279 *
3280 * This function makes sure that the inode size (@size) is correct and it does
3281 * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL
3282 * if it has a data page beyond @size, and other negative error code in case of
3283 * other errors.
3284 */
3285int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
3286 loff_t size)
3287{
3288 int err, n;
3289 union ubifs_key from_key, to_key, *key;
3290 struct ubifs_znode *znode;
3291 unsigned int block;
3292
3293 if (!S_ISREG(inode->i_mode))
3294 return 0;
3295 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
3296 return 0;
3297
3298 block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
3299 data_key_init(c, &from_key, inode->i_ino, block);
3300 highest_data_key(c, &to_key, inode->i_ino);
3301
3302 mutex_lock(&c->tnc_mutex);
3303 err = ubifs_lookup_level0(c, &from_key, &znode, &n);
3304 if (err < 0)
3305 goto out_unlock;
3306
3307 if (err) {
3308 err = -EINVAL;
3309 key = &from_key;
3310 goto out_dump;
3311 }
3312
3313 err = tnc_next(c, &znode, &n);
3314 if (err == -ENOENT) {
3315 err = 0;
3316 goto out_unlock;
3317 }
3318 if (err < 0)
3319 goto out_unlock;
3320
3321 ubifs_assert(err == 0);
3322 key = &znode->zbranch[n].key;
3323 if (!key_in_range(c, key, &from_key, &to_key))
3324 goto out_unlock;
3325
3326out_dump:
3327 block = key_block(c, key);
3328 ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
3329 "(data key %s)", (unsigned long)inode->i_ino, size,
3330 ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
3331 dbg_dump_inode(c, inode);
3332 dbg_dump_stack();
3333 err = -EINVAL;
3334
3335out_unlock:
3336 mutex_unlock(&c->tnc_mutex);
3337 return err;
3338}
3339
3340#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index fde8d127c768..53288e5d604e 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
245 * it is more comprehensive and less efficient than is needed for this 245 * it is more comprehensive and less efficient than is needed for this
246 * purpose. 246 * purpose.
247 */ 247 */
248 sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); 248 sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0);
249 c->ileb_len = 0; 249 c->ileb_len = 0;
250 if (IS_ERR(sleb)) 250 if (IS_ERR(sleb))
251 return PTR_ERR(sleb); 251 return PTR_ERR(sleb);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 3eee07e0c495..191ca7863fe7 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -135,6 +135,13 @@
135/* The key is always at the same position in all keyed nodes */ 135/* The key is always at the same position in all keyed nodes */
136#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) 136#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
137 137
138/* Garbage collector journal head number */
139#define UBIFS_GC_HEAD 0
140/* Base journal head number */
141#define UBIFS_BASE_HEAD 1
142/* Data journal head number */
143#define UBIFS_DATA_HEAD 2
144
138/* 145/*
139 * LEB Properties Tree node types. 146 * LEB Properties Tree node types.
140 * 147 *
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a29349094422..b2d976366a46 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -105,12 +105,10 @@
105/* Number of non-data journal heads */ 105/* Number of non-data journal heads */
106#define NONDATA_JHEADS_CNT 2 106#define NONDATA_JHEADS_CNT 2
107 107
108/* Garbage collector head */ 108/* Shorter names for journal head numbers for internal usage */
109#define GCHD 0 109#define GCHD UBIFS_GC_HEAD
110/* Base journal head number */ 110#define BASEHD UBIFS_BASE_HEAD
111#define BASEHD 1 111#define DATAHD UBIFS_DATA_HEAD
112/* First "general purpose" journal head */
113#define DATAHD 2
114 112
115/* 'No change' value for 'ubifs_change_lp()' */ 113/* 'No change' value for 'ubifs_change_lp()' */
116#define LPROPS_NC 0x80000001 114#define LPROPS_NC 0x80000001
@@ -1451,7 +1449,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode);
1451 1449
1452/* scan.c */ 1450/* scan.c */
1453struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, 1451struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
1454 int offs, void *sbuf); 1452 int offs, void *sbuf, int quiet);
1455void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); 1453void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
1456int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, 1454int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
1457 int offs, int quiet); 1455 int offs, int quiet);
@@ -1676,6 +1674,7 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c);
1676const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); 1674const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
1677const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); 1675const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
1678const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); 1676const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
1677int ubifs_calc_dark(const struct ubifs_info *c, int spc);
1679 1678
1680/* file.c */ 1679/* file.c */
1681int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); 1680int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index adafcf556531..195830f47569 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -78,9 +78,9 @@ enum {
78 SECURITY_XATTR, 78 SECURITY_XATTR,
79}; 79};
80 80
81static struct inode_operations none_inode_operations; 81static const struct inode_operations none_inode_operations;
82static struct address_space_operations none_address_operations; 82static const struct address_space_operations none_address_operations;
83static struct file_operations none_file_operations; 83static const struct file_operations none_file_operations;
84 84
85/** 85/**
86 * create_xattr - create an extended attribute. 86 * create_xattr - create an extended attribute.
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index cb6e2cca214f..9e41f91aa269 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -150,7 +150,7 @@ xfs_fs_set_xquota(
150 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); 150 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
151} 151}
152 152
153struct quotactl_ops xfs_quotactl_operations = { 153const struct quotactl_ops xfs_quotactl_operations = {
154 .quota_sync = xfs_fs_quota_sync, 154 .quota_sync = xfs_fs_quota_sync,
155 .get_xstate = xfs_fs_get_xstate, 155 .get_xstate = xfs_fs_get_xstate,
156 .set_xstate = xfs_fs_set_xstate, 156 .set_xstate = xfs_fs_set_xstate,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 5d7c60ac77b4..bdd41c8c342f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -67,7 +67,7 @@
67#include <linux/freezer.h> 67#include <linux/freezer.h>
68#include <linux/parser.h> 68#include <linux/parser.h>
69 69
70static struct super_operations xfs_super_operations; 70static const struct super_operations xfs_super_operations;
71static kmem_zone_t *xfs_ioend_zone; 71static kmem_zone_t *xfs_ioend_zone;
72mempool_t *xfs_ioend_pool; 72mempool_t *xfs_ioend_pool;
73 73
@@ -1536,7 +1536,7 @@ xfs_fs_get_sb(
1536 mnt); 1536 mnt);
1537} 1537}
1538 1538
1539static struct super_operations xfs_super_operations = { 1539static const struct super_operations xfs_super_operations = {
1540 .alloc_inode = xfs_fs_alloc_inode, 1540 .alloc_inode = xfs_fs_alloc_inode,
1541 .destroy_inode = xfs_fs_destroy_inode, 1541 .destroy_inode = xfs_fs_destroy_inode,
1542 .write_inode = xfs_fs_write_inode, 1542 .write_inode = xfs_fs_write_inode,
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 5a2ea3a21781..18175ebd58ed 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
93 93
94extern const struct export_operations xfs_export_operations; 94extern const struct export_operations xfs_export_operations;
95extern struct xattr_handler *xfs_xattr_handlers[]; 95extern struct xattr_handler *xfs_xattr_handlers[];
96extern struct quotactl_ops xfs_quotactl_operations; 96extern const struct quotactl_ops xfs_quotactl_operations;
97 97
98#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 98#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
99 99
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c4ea51b55dce..f52ac276277e 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -117,7 +117,7 @@ struct getbmapx {
117#define BMV_IF_VALID \ 117#define BMV_IF_VALID \
118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) 118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
119 119
120/* bmv_oflags values - returned for for each non-header segment */ 120/* bmv_oflags values - returned for each non-header segment */
121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ 121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
122#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ 122#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */
123#define BMV_OF_LAST 0x4 /* segment is the last in the file */ 123#define BMV_OF_LAST 0x4 /* segment is the last in the file */