51 files changed, 915 insertions, 997 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 618a60f03886..240cef14fe58 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -106,6 +106,7 @@ struct connection {
 #define CF_CONNECT_PENDING 3
 #define CF_INIT_PENDING 4
 #define CF_IS_OTHERCON 5
+#define CF_CLOSE 6
        struct list_head writequeue;  /* List of outgoing writequeue_entries */
        spinlock_t writequeue_lock;
        int (*rx_action) (struct connection *); /* What to do when active */
@@ -299,6 +300,8 @@ static void lowcomms_write_space(struct sock *sk)
 static inline void lowcomms_connect_sock(struct connection *con)
 {
+        if (test_bit(CF_CLOSE, &con->flags))
+                return;
        if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
                queue_work(send_workqueue, &con->swork);
 }
@@ -926,10 +929,8 @@ static void tcp_connect_to_sock(struct connection *con)
                goto out_err;
        memset(&saddr, 0, sizeof(saddr));
-        if (dlm_nodeid_to_addr(con->nodeid, &saddr)) {
+        if (dlm_nodeid_to_addr(con->nodeid, &saddr))
-                sock_release(sock);
                goto out_err;
-        }
        sock->sk->sk_user_data = con;
        con->rx_action = receive_from_sock;
@@ -1284,7 +1285,6 @@ out:
 static void send_to_sock(struct connection *con)
 {
        int ret = 0;
-        ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
        const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
        struct writequeue_entry *e;
        int len, offset;
@@ -1293,8 +1293,6 @@ static void send_to_sock(struct connection *con)
        if (con->sock == NULL)
                goto out_connect;
-        sendpage = con->sock->ops->sendpage;
        spin_lock(&con->writequeue_lock);
        for (;;) {
                e = list_entry(con->writequeue.next, struct writequeue_entry,
@@ -1309,8 +1307,8 @@ static void send_to_sock(struct connection *con)
                ret = 0;
                if (len) {
-                        ret = sendpage(con->sock, e->page, offset, len,
+                        ret = kernel_sendpage(con->sock, e->page, offset, len,
-                                       msg_flags);
+                                              msg_flags);
                        if (ret == -EAGAIN || ret == 0) {
                                cond_resched();
                                goto out;
@@ -1370,6 +1368,13 @@ int dlm_lowcomms_close(int nodeid)
        log_print("closing connection to node %d", nodeid);
        con = nodeid2con(nodeid, 0);
        if (con) {
+                clear_bit(CF_CONNECT_PENDING, &con->flags);
+                clear_bit(CF_WRITE_PENDING, &con->flags);
+                set_bit(CF_CLOSE, &con->flags);
+                if (cancel_work_sync(&con->swork))
+                        log_print("canceled swork for node %d", nodeid);
+                if (cancel_work_sync(&con->rwork))
+                        log_print("canceled rwork for node %d", nodeid);
                clean_one_writequeue(con);
                close_connection(con, true);
        }
@@ -1395,9 +1400,10 @@ static void process_send_sockets(struct work_struct *work)
        if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
                con->connect_action(con);
+                set_bit(CF_WRITE_PENDING, &con->flags);
        }
-        clear_bit(CF_WRITE_PENDING, &con->flags);
+        if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags))
-        send_to_sock(con);
+                send_to_sock(con);
 }
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d33634119e17..451d166bbe93 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -23,6 +23,7 @@
 */
 #include <linux/time.h>
+#include <linux/blkdev.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/writeback.h>
@@ -73,7 +74,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
        }
        if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-                goto out;
+                goto flush;
        /*
         * The VFS has written the file data.  If the inode is unaltered
@@ -85,7 +86,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
                        .nr_to_write = 0, /* sys_fsync did this */
                };
                ret = sync_inode(inode, &wbc);
+                goto out;
        }
+flush:
+        /*
+         * In case we didn't commit a transaction, we have to flush
+         * disk caches manually so that data really is on persistent
+         * storage
+         */
+        if (test_opt(inode->i_sb, BARRIER))
+                blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 out:
        return ret;
 }
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b49908a167ae..cd098a7b77fc 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -172,10 +172,21 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
 * so before we call here everything must be consistently dirtied against
 * this transaction.
 */
-static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
+static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
 {
+        int ret;
        jbd_debug(2, "restarting handle %p\n", handle);
-        return ext3_journal_restart(handle, blocks_for_truncate(inode));
+        /*
+         * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle
+         * At this moment, get_block can be called only for blocks inside
+         * i_size since page cache has been already dropped and writes are
+         * blocked by i_mutex. So we can safely drop the truncate_mutex.
+         */
+        mutex_unlock(&EXT3_I(inode)->truncate_mutex);
+        ret = ext3_journal_restart(handle, blocks_for_truncate(inode));
+        mutex_lock(&EXT3_I(inode)->truncate_mutex);
+        return ret;
 }
 /*
@@ -2072,7 +2083,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
                        ext3_journal_dirty_metadata(handle, bh);
                }
                ext3_mark_inode_dirty(handle, inode);
-                ext3_journal_test_restart(handle, inode);
+                truncate_restart_transaction(handle, inode);
                if (bh) {
                        BUFFER_TRACE(bh, "retaking write access");
                        ext3_journal_get_write_access(handle, bh);
@@ -2282,7 +2293,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
                                return;
                        if (try_to_extend_transaction(handle, inode)) {
                                ext3_mark_inode_dirty(handle, inode);
-                                ext3_journal_test_restart(handle, inode);
+                                truncate_restart_transaction(handle, inode);
                        }
                        ext3_free_blocks(handle, inode, nr, 1);
@@ -2892,6 +2903,10 @@ static int ext3_do_update_inode(handle_t *handle,
        struct buffer_head *bh = iloc->bh;
        int err = 0, rc, block;
+again:
+        /* we can't allow multiple procs in here at once, its a bit racey */
+        lock_buffer(bh);
        /* For fields not not tracking in the in-memory inode,
         * initialise them to zero for new inodes. */
        if (ei->i_state & EXT3_STATE_NEW)
@@ -2951,16 +2966,20 @@ static int ext3_do_update_inode(handle_t *handle,
                               /* If this is the first large file
                                * created, add a flag to the superblock.
                                */
+                                unlock_buffer(bh);
                                err = ext3_journal_get_write_access(handle,
                                                EXT3_SB(sb)->s_sbh);
                                if (err)
                                        goto out_brelse;
                                ext3_update_dynamic_rev(sb);
                                EXT3_SET_RO_COMPAT_FEATURE(sb,
                                        EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
                                handle->h_sync = 1;
                                err = ext3_journal_dirty_metadata(handle,
                                                EXT3_SB(sb)->s_sbh);
+                                /* get our lock and start over */
+                                goto again;
                        }
                }
        }
@@ -2983,6 +3002,7 @@ static int ext3_do_update_inode(handle_t *handle,
                raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
        BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+        unlock_buffer(bh);
        rc = ext3_journal_dirty_metadata(handle, bh);
        if (!err)
                err = rc;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 99c99dfb0373..3773fd63d2f9 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -61,6 +61,121 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf,
        return simple_read_from_buffer(buf, len, ppos, tmp, size);
 }
+static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf,
+                                    size_t len, loff_t *ppos, unsigned val)
+{
+        char tmp[32];
+        size_t size = sprintf(tmp, "%u\n", val);
+        return simple_read_from_buffer(buf, len, ppos, tmp, size);
+}
+static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf,
+                                     size_t count, loff_t *ppos, unsigned *val,
+                                     unsigned global_limit)
+{
+        unsigned long t;
+        char tmp[32];
+        unsigned limit = (1 << 16) - 1;
+        int err;
+        if (*ppos || count >= sizeof(tmp) - 1)
+                return -EINVAL;
+        if (copy_from_user(tmp, buf, count))
+                return -EINVAL;
+        tmp[count] = '\0';
+        err = strict_strtoul(tmp, 0, &t);
+        if (err)
+                return err;
+        if (!capable(CAP_SYS_ADMIN))
+                limit = min(limit, global_limit);
+        if (t > limit)
+                return -EINVAL;
+        *val = t;
+        return count;
+}
+static ssize_t fuse_conn_max_background_read(struct file *file,
+                                             char __user *buf, size_t len,
+                                             loff_t *ppos)
+{
+        struct fuse_conn *fc;
+        unsigned val;
+        fc = fuse_ctl_file_conn_get(file);
+        if (!fc)
+                return 0;
+        val = fc->max_background;
+        fuse_conn_put(fc);
+        return fuse_conn_limit_read(file, buf, len, ppos, val);
+}
+static ssize_t fuse_conn_max_background_write(struct file *file,
+                                              const char __user *buf,
+                                              size_t count, loff_t *ppos)
+{
+        unsigned val;
+        ssize_t ret;
+        ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
+                                    max_user_bgreq);
+        if (ret > 0) {
+                struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+                if (fc) {
+                        fc->max_background = val;
+                        fuse_conn_put(fc);
+                }
+        }
+        return ret;
+}
+static ssize_t fuse_conn_congestion_threshold_read(struct file *file,
+                                                   char __user *buf, size_t len,
+                                                   loff_t *ppos)
+{
+        struct fuse_conn *fc;
+        unsigned val;
+        fc = fuse_ctl_file_conn_get(file);
+        if (!fc)
+                return 0;
+        val = fc->congestion_threshold;
+        fuse_conn_put(fc);
+        return fuse_conn_limit_read(file, buf, len, ppos, val);
+}
+static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
+                                                    const char __user *buf,
+                                                    size_t count, loff_t *ppos)
+{
+        unsigned val;
+        ssize_t ret;
+        ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
+                                    max_user_congthresh);
+        if (ret > 0) {
+                struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+                if (fc) {
+                        fc->congestion_threshold = val;
+                        fuse_conn_put(fc);
+                }
+        }
+        return ret;
+}
 static const struct file_operations fuse_ctl_abort_ops = {
        .open = nonseekable_open,
        .write = fuse_conn_abort_write,
@@ -71,6 +186,18 @@ static const struct file_operations fuse_ctl_waiting_ops = {
        .read = fuse_conn_waiting_read,
 };
+static const struct file_operations fuse_conn_max_background_ops = {
+        .open = nonseekable_open,
+        .read = fuse_conn_max_background_read,
+        .write = fuse_conn_max_background_write,
+};
+static const struct file_operations fuse_conn_congestion_threshold_ops = {
+        .open = nonseekable_open,
+        .read = fuse_conn_congestion_threshold_read,
+        .write = fuse_conn_congestion_threshold_write,
+};
 static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
                                          struct fuse_conn *fc,
                                          const char *name,
@@ -127,9 +254,14 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
                goto err;
        if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
-                                NULL, &fuse_ctl_waiting_ops) ||
+                                 NULL, &fuse_ctl_waiting_ops) ||
            !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
-                                 NULL, &fuse_ctl_abort_ops))
+                                 NULL, &fuse_ctl_abort_ops) ||
+            !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600,
+                                 1, NULL, &fuse_conn_max_background_ops) ||
+            !fuse_ctl_add_dentry(parent, fc, "congestion_threshold",
+                                 S_IFREG | 0600, 1, NULL,
+                                 &fuse_conn_congestion_threshold_ops))
                goto err;
        return 0;
@@ -156,7 +288,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
                d_drop(dentry);
                dput(dentry);
        }
-        fuse_control_sb->s_root->d_inode->i_nlink--;
+        drop_nlink(fuse_control_sb->s_root->d_inode);
 }
 static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6484eb75acd6..51d9e33d634f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 static void flush_bg_queue(struct fuse_conn *fc)
 {
-        while (fc->active_background < FUSE_MAX_BACKGROUND &&
+        while (fc->active_background < fc->max_background &&
               !list_empty(&fc->bg_queue)) {
                struct fuse_req *req;
@@ -280,11 +280,11 @@ __releases(&fc->lock)
        list_del(&req->intr_entry);
        req->state = FUSE_REQ_FINISHED;
        if (req->background) {
-                if (fc->num_background == FUSE_MAX_BACKGROUND) {
+                if (fc->num_background == fc->max_background) {
                        fc->blocked = 0;
                        wake_up_all(&fc->blocked_waitq);
                }
-                if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+                if (fc->num_background == fc->congestion_threshold &&
                    fc->connected && fc->bdi_initialized) {
                        clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
                        clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
@@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 {
        req->background = 1;
        fc->num_background++;
-        if (fc->num_background == FUSE_MAX_BACKGROUND)
+        if (fc->num_background == fc->max_background)
                fc->blocked = 1;
-        if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+        if (fc->num_background == fc->congestion_threshold &&
            fc->bdi_initialized) {
                set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
                set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 52b641fc0faf..fc9c79feb5f7 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -25,12 +25,6 @@
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
-/** Maximum number of outstanding background requests */
-#define FUSE_MAX_BACKGROUND 12
-/** Congestion starts at 75% of maximum */
-#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
 /** Bias for fi->writectr, meaning new writepages must not be sent */
 #define FUSE_NOWRITE INT_MIN
@@ -38,7 +32,7 @@
 #define FUSE_NAME_MAX 1024
 /** Number of dentries for each connection in the control filesystem */
-#define FUSE_CTL_NUM_DENTRIES 3
+#define FUSE_CTL_NUM_DENTRIES 5
 /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
    module will check permissions based on the file mode.  Otherwise no
@@ -55,6 +49,10 @@ extern struct list_head fuse_conn_list;
 /** Global mutex protecting fuse_conn_list and the control filesystem */
 extern struct mutex fuse_mutex;
+/** Module parameters */
+extern unsigned max_user_bgreq;
+extern unsigned max_user_congthresh;
 /** FUSE inode */
 struct fuse_inode {
        /** Inode data */
@@ -349,6 +347,12 @@ struct fuse_conn {
        /** rbtree of fuse_files waiting for poll events indexed by ph */
        struct rb_root polled_files;
+        /** Maximum number of outstanding background requests */
+        unsigned max_background;
+        /** Number of background requests at which congestion starts */
+        unsigned congestion_threshold;
        /** Number of requests currently in the background */
        unsigned num_background;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e5dbecd87b0f..6da947daabda 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -14,6 +14,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/parser.h>
 #include <linux/statfs.h>
 #include <linux/random.h>
@@ -28,10 +29,34 @@ static struct kmem_cache *fuse_inode_cachep;
 struct list_head fuse_conn_list;
 DEFINE_MUTEX(fuse_mutex);
+static int set_global_limit(const char *val, struct kernel_param *kp);
+unsigned max_user_bgreq;
+module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
+                  &max_user_bgreq, 0644);
+__MODULE_PARM_TYPE(max_user_bgreq, "uint");
+MODULE_PARM_DESC(max_user_bgreq,
+ "Global limit for the maximum number of backgrounded requests an "
+ "unprivileged user can set");
+unsigned max_user_congthresh;
+module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
+                  &max_user_congthresh, 0644);
+__MODULE_PARM_TYPE(max_user_congthresh, "uint");
+MODULE_PARM_DESC(max_user_congthresh,
+ "Global limit for the maximum congestion threshold an "
+ "unprivileged user can set");
 #define FUSE_SUPER_MAGIC 0x65735546
 #define FUSE_DEFAULT_BLKSIZE 512
+/** Maximum number of outstanding background requests */
+#define FUSE_DEFAULT_MAX_BACKGROUND 12
+/** Congestion starts at 75% of maximum */
+#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
 struct fuse_mount_data {
        int fd;
        unsigned rootmode;
@@ -517,6 +542,8 @@ void fuse_conn_init(struct fuse_conn *fc)
        INIT_LIST_HEAD(&fc->bg_queue);
        INIT_LIST_HEAD(&fc->entry);
        atomic_set(&fc->num_waiting, 0);
+        fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
+        fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
        fc->khctr = 0;
        fc->polled_files = RB_ROOT;
        fc->reqctr = 0;
@@ -727,6 +754,54 @@ static const struct super_operations fuse_super_operations = {
        .show_options   = fuse_show_options,
 };
+static void sanitize_global_limit(unsigned *limit)
+{
+        if (*limit == 0)
+                *limit = ((num_physpages << PAGE_SHIFT) >> 13) /
+                         sizeof(struct fuse_req);
+        if (*limit >= 1 << 16)
+                *limit = (1 << 16) - 1;
+}
+static int set_global_limit(const char *val, struct kernel_param *kp)
+{
+        int rv;
+        rv = param_set_uint(val, kp);
+        if (rv)
+                return rv;
+        sanitize_global_limit((unsigned *)kp->arg);
+        return 0;
+}
+static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
+{
+        int cap_sys_admin = capable(CAP_SYS_ADMIN);
+        if (arg->minor < 13)
+                return;
+        sanitize_global_limit(&max_user_bgreq);
+        sanitize_global_limit(&max_user_congthresh);
+        if (arg->max_background) {
+                fc->max_background = arg->max_background;
+                if (!cap_sys_admin && fc->max_background > max_user_bgreq)
+                        fc->max_background = max_user_bgreq;
+        }
+        if (arg->congestion_threshold) {
+                fc->congestion_threshold = arg->congestion_threshold;
+                if (!cap_sys_admin &&
+                    fc->congestion_threshold > max_user_congthresh)
+                        fc->congestion_threshold = max_user_congthresh;
+        }
+}
 static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 {
        struct fuse_init_out *arg = &req->misc.init_out;
@@ -736,6 +811,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
        else {
                unsigned long ra_pages;
+                process_init_limits(fc, arg);
                if (arg->minor >= 6) {
                        ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
                        if (arg->flags & FUSE_ASYNC_READ)
@@ -1150,6 +1227,9 @@ static int __init fuse_init(void)
        if (res)
                goto err_sysfs_cleanup;
+        sanitize_global_limit(&max_user_bgreq);
+        sanitize_global_limit(&max_user_congthresh);
        return 0;
 err_sysfs_cleanup:
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 61f32f3868cd..b0435dd0654d 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -456,7 +456,7 @@ int cleanup_journal_tail(journal_t *journal)
 {
        transaction_t * transaction;
        tid_t           first_tid;
-        unsigned long   blocknr, freed;
+        unsigned int    blocknr, freed;
        if (is_journal_aborted(journal))
                return 1;
@@ -502,8 +502,8 @@ int cleanup_journal_tail(journal_t *journal)
                freed = freed + journal->j_last - journal->j_first;
        jbd_debug(1,
-                  "Cleaning journal tail from %d to %d (offset %lu), "
+                  "Cleaning journal tail from %d to %d (offset %u), "
-                  "freeing %lu\n",
+                  "freeing %u\n",
                  journal->j_tail_sequence, first_tid, blocknr, freed);
        journal->j_free += freed;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 618e21c0b7a3..4bd882548c45 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal)
        int bufs;
        int flags;
        int err;
-        unsigned long blocknr;
+        unsigned int blocknr;
        ktime_t start_time;
        u64 commit_time;
        char *tagp = NULL;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f96f85092d1c..bd3c073b485d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -276,7 +276,7 @@ static void journal_kill_thread(journal_t *journal)
 int journal_write_metadata_buffer(transaction_t *transaction,
                                  struct journal_head  *jh_in,
                                  struct journal_head **jh_out,
-                                  unsigned long blocknr)
+                                  unsigned int blocknr)
 {
        int need_copy_out = 0;
        int done_copy_out = 0;
@@ -567,9 +567,9 @@ int log_wait_commit(journal_t *journal, tid_t tid)
 * Log buffer allocation routines:
 */
-int journal_next_log_block(journal_t *journal, unsigned long *retp)
+int journal_next_log_block(journal_t *journal, unsigned int *retp)
 {
-        unsigned long blocknr;
+        unsigned int blocknr;
        spin_lock(&journal->j_state_lock);
        J_ASSERT(journal->j_free > 1);
@@ -590,11 +590,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
 * this is a no-op.  If needed, we can use j_blk_offset - everything is
 * ready.
 */
-int journal_bmap(journal_t *journal, unsigned long blocknr,
+int journal_bmap(journal_t *journal, unsigned int blocknr,
-                 unsigned long *retp)
+                 unsigned int *retp)
 {
        int err = 0;
-        unsigned long ret;
+        unsigned int ret;
        if (journal->j_inode) {
                ret = bmap(journal->j_inode, blocknr);
@@ -604,7 +604,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
                        char b[BDEVNAME_SIZE];
                        printk(KERN_ALERT "%s: journal block not found "
-                                        "at offset %lu on %s\n",
+                                        "at offset %u on %s\n",
                                __func__,
                                blocknr,
                                bdevname(journal->j_dev, b));
@@ -630,7 +630,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
 struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
 {
        struct buffer_head *bh;
-        unsigned long blocknr;
+        unsigned int blocknr;
        int err;
        err = journal_next_log_block(journal, &blocknr);
@@ -774,7 +774,7 @@ journal_t * journal_init_inode (struct inode *inode)
        journal_t *journal = journal_init_common();
        int err;
        int n;
-        unsigned long blocknr;
+        unsigned int blocknr;
        if (!journal)
                return NULL;
@@ -846,12 +846,12 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
        journal_superblock_t *sb = journal->j_superblock;
-        unsigned long first, last;
+        unsigned int first, last;
        first = be32_to_cpu(sb->s_first);
        last = be32_to_cpu(sb->s_maxlen);
        if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
-                printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
+                printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n",
                       first, last);
                journal_fail_superblock(journal);
                return -EINVAL;
@@ -885,7 +885,7 @@ static int journal_reset(journal_t *journal)
 **/
 int journal_create(journal_t *journal)
 {
-        unsigned long blocknr;
+        unsigned int blocknr;
        struct buffer_head *bh;
        journal_superblock_t *sb;
        int i, err;
@@ -969,14 +969,14 @@ void journal_update_superblock(journal_t *journal, int wait)
        if (sb->s_start == 0 && journal->j_tail_sequence ==
                                journal->j_transaction_sequence) {
                jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
-                        "(start %ld, seq %d, errno %d)\n",
+                        "(start %u, seq %d, errno %d)\n",
                        journal->j_tail, journal->j_tail_sequence,
                        journal->j_errno);
                goto out;
        }
        spin_lock(&journal->j_state_lock);
-        jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+        jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
                  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
        sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1371,7 +1371,7 @@ int journal_flush(journal_t *journal)
 {
        int err = 0;
        transaction_t *transaction = NULL;
-        unsigned long old_tail;
+        unsigned int old_tail;
        spin_lock(&journal->j_state_lock);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index db5e982c5ddf..cb1a49ae605e 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
        int err;
        unsigned int max, nbufs, next;
-        unsigned long blocknr;
+        unsigned int blocknr;
        struct buffer_head *bh;
        struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
                 unsigned int offset)
 {
        int err;
-        unsigned long blocknr;
+        unsigned int blocknr;
        struct buffer_head *bh;
        *bhp = NULL;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
                        struct recovery_info *info, enum passtype pass)
 {
        unsigned int            first_commit_ID, next_commit_ID;
-        unsigned long           next_log_block;
+        unsigned int            next_log_block;
        int                     err, success = 0;
        journal_superblock_t *  sb;
        journal_header_t *      tmp;
@@ -367,14 +367,14 @@ static int do_one_pass(journal_t *journal,
                        if (tid_geq(next_commit_ID, info->end_transaction))
                                break;
-                jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+                jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n",
                          next_commit_ID, next_log_block, journal->j_last);
                /* Skip over each chunk of the transaction looking
                 * either the next descriptor block or the final commit
                 * record. */
-                jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+                jbd_debug(3, "JBD: checking block %u\n", next_log_block);
                err = jread(&bh, journal, next_log_block);
                if (err)
                        goto failed;
@@ -429,7 +429,7 @@ static int do_one_pass(journal_t *journal,
                        tagp = &bh->b_data[sizeof(journal_header_t)];
                        while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
                               <= journal->j_blocksize) {
-                                unsigned long io_block;
+                                unsigned int io_block;
                                tag = (journal_block_tag_t *) tagp;
                                flags = be32_to_cpu(tag->t_flags);
@@ -443,10 +443,10 @@ static int do_one_pass(journal_t *journal,
                                        success = err;
                                        printk (KERN_ERR
                                                "JBD: IO error %d recovering "
-                                                "block %ld in log\n",
+                                                "block %u in log\n",
                                                err, io_block);
                                } else {
-                                        unsigned long blocknr;
+                                        unsigned int blocknr;
                                        J_ASSERT(obh != NULL);
                                        blocknr = be32_to_cpu(tag->t_blocknr);
@@ -581,7 +581,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
        max = be32_to_cpu(header->r_count);
        while (offset < max) {
-                unsigned long blocknr;
+                unsigned int blocknr;
                int err;
                blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index da6cd9bdaabc..ad717328343a 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -101,7 +101,7 @@ struct jbd_revoke_record_s
 {
        struct list_head  hash;
        tid_t             sequence;     /* Used for recovery only */
-        unsigned long     blocknr;
+        unsigned int      blocknr;
 };
@@ -126,7 +126,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int);
 /* Utility functions to maintain the revoke table */
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
-static inline int hash(journal_t *journal, unsigned long block)
+static inline int hash(journal_t *journal, unsigned int block)
 {
        struct jbd_revoke_table_s *table = journal->j_revoke;
        int hash_shift = table->hash_shift;
@@ -136,7 +136,7 @@ static inline int hash(journal_t *journal, unsigned long block)
                (block << (hash_shift - 12))) & (table->hash_size - 1);
 }
-static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+static int insert_revoke_hash(journal_t *journal, unsigned int blocknr,
                              tid_t seq)
 {
        struct list_head *hash_list;
@@ -166,7 +166,7 @@ oom:
 /* Find a revoke record in the journal's hash table. */
 static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
-                                                      unsigned long blocknr)
+                                                      unsigned int blocknr)
 {
        struct list_head *hash_list;
        struct jbd_revoke_record_s *record;
@@ -332,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal)
 * by one.
 */
-int journal_revoke(handle_t *handle, unsigned long blocknr,
+int journal_revoke(handle_t *handle, unsigned int blocknr,
                   struct buffer_head *bh_in)
 {
        struct buffer_head *bh = NULL;
@@ -401,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
                }
        }
-        jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
+        jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in);
        err = insert_revoke_hash(journal, blocknr,
                                handle->h_transaction->t_tid);
        BUFFER_TRACE(bh_in, "exit");
@@ -644,7 +644,7 @@ static void flush_descriptor(journal_t *journal,
 */
 int journal_set_revoke(journal_t *journal,
-                       unsigned long blocknr,
+                       unsigned int blocknr,
                       tid_t sequence)
 {
        struct jbd_revoke_record_s *record;
@@ -668,7 +668,7 @@ int journal_set_revoke(journal_t *journal,
 */
 int journal_test_revoke(journal_t *journal,
-                        unsigned long blocknr,
+                        unsigned int blocknr,
                        tid_t sequence)
 {
        struct jbd_revoke_record_s *record;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index c03ac11f74be..006f9ad838a2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -56,7 +56,8 @@ get_transaction(journal_t *journal, transaction_t *transaction)
        spin_lock_init(&transaction->t_handle_lock);
        /* Set up the commit timer for the new transaction. */
-        journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
+        journal->j_commit_timer.expires =
+                                round_jiffies_up(transaction->t_expires);
        add_timer(&journal->j_commit_timer);
        J_ASSERT(journal->j_running_transaction == NULL);
@@ -228,6 +229,8 @@ repeat_locked:
                  __log_space_left(journal));
        spin_unlock(&transaction->t_handle_lock);
        spin_unlock(&journal->j_state_lock);
+        lock_map_acquire(&handle->h_lockdep_map);
 out:
        if (unlikely(new_transaction))          /* It's usually NULL */
                kfree(new_transaction);
@@ -292,9 +295,6 @@ handle_t *journal_start(journal_t *journal, int nblocks)
                handle = ERR_PTR(err);
                goto out;
        }
-        lock_map_acquire(&handle->h_lockdep_map);
 out:
        return handle;
 }
@@ -416,6 +416,7 @@ int journal_restart(handle_t *handle, int nblocks)
        __log_start_commit(journal, transaction->t_tid);
        spin_unlock(&journal->j_state_lock);
+        lock_map_release(&handle->h_lockdep_map);
        handle->h_buffer_credits = nblocks;
        ret = start_this_handle(journal, handle);
        return ret;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index aecf2519db76..d5e5559e31db 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -216,7 +216,6 @@ xfs_setfilesize(
        if (ip->i_d.di_size < isize) {
                ip->i_d.di_size = isize;
                ip->i_update_core = 1;
-                ip->i_update_size = 1;
                xfs_mark_inode_dirty_sync(ip);
        }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 0542fd507649..988d8f87bc0f 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -172,12 +172,21 @@ xfs_file_release(
 */
 STATIC int
 xfs_file_fsync(
-        struct file     *filp,
+        struct file             *file,
-        struct dentry   *dentry,
+        struct dentry           *dentry,
-        int             datasync)
+        int                     datasync)
 {
-        xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
+        struct inode            *inode = dentry->d_inode;
-        return -xfs_fsync(XFS_I(dentry->d_inode));
+        struct xfs_inode        *ip = XFS_I(inode);
+        int                     error;
+        /* capture size updates in I/O completion before writing the inode. */
+        error = filemap_fdatawait(inode->i_mapping);
+        if (error)
+                return error;
+        xfs_iflags_clear(ip, XFS_ITRUNCATED);
+        return -xfs_fsync(ip);
 }
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6c32f1d63d8c..da0159d99f82 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -43,7 +43,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index fde63a3c4ecc..49e4a6aea73c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -812,19 +812,21 @@ write_retry:
        /* Handle various SYNC-type writes */
        if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+                loff_t end = pos + ret - 1;
                int error2;
                xfs_iunlock(xip, iolock);
                if (need_i_mutex)
                        mutex_unlock(&inode->i_mutex);
-                error2 = filemap_write_and_wait_range(mapping, pos,
-                                                      pos + ret - 1);
+                error2 = filemap_write_and_wait_range(mapping, pos, end);
                if (!error)
                        error = error2;
                if (need_i_mutex)
                        mutex_lock(&inode->i_mutex);
                xfs_ilock(xip, iolock);
-                error2 = xfs_write_sync_logforce(mp, xip);
+                error2 = xfs_fsync(xip);
                if (!error)
                        error = error2;
        }
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index c3526d445f6a..76fdc5861932 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -20,16 +20,9 @@
 DEFINE_PER_CPU(struct xfsstats, xfsstats);
-STATIC int
+static int xfs_stat_proc_show(struct seq_file *m, void *v)
-xfs_read_xfsstats(
-        char            *buffer,
-        char            **start,
-        off_t           offset,
-        int             count,
-        int             *eof,
-        void            *data)
 {
-        int             c, i, j, len, val;
+        int             c, i, j, val;
        __uint64_t      xs_xstrat_bytes = 0;
        __uint64_t      xs_write_bytes = 0;
        __uint64_t      xs_read_bytes = 0;
@@ -60,18 +53,18 @@ xfs_read_xfsstats(
        };
        /* Loop over all stats groups */
-        for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) {
+        for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
-                len += sprintf(buffer + len, "%s", xstats[i].desc);
+                seq_printf(m, "%s", xstats[i].desc);
                /* inner loop does each group */
                while (j < xstats[i].endpoint) {
                        val = 0;
                        /* sum over all cpus */
                        for_each_possible_cpu(c)
                                val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-                        len += sprintf(buffer + len, " %u", val);
+                        seq_printf(m, " %u", val);
                        j++;
                }
-                buffer[len++] = '\n';
+                seq_putc(m, '\n');
        }
        /* extra precision counters */
        for_each_possible_cpu(i) {
@@ -80,36 +73,38 @@ xfs_read_xfsstats(
                xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
        }
-        len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n",
+        seq_printf(m, "xpc %Lu %Lu %Lu\n",
                        xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
-        len += sprintf(buffer + len, "debug %u\n",
+        seq_printf(m, "debug %u\n",
 #if defined(DEBUG)
                1);
 #else
                0);
 #endif
+        return 0;
+}
-        if (offset >= len) {
+static int xfs_stat_proc_open(struct inode *inode, struct file *file)
-                *start = buffer;
+{
-                *eof = 1;
+        return single_open(file, xfs_stat_proc_show, NULL);
-                return 0;
-        }
-        *start = buffer + offset;
-        if ((len -= offset) > count)
-                return count;
-        *eof = 1;
-        return len;
 }
+static const struct file_operations xfs_stat_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = xfs_stat_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
 int
 xfs_init_procfs(void)
 {
        if (!proc_mkdir("fs/xfs", NULL))
                goto out;
-        if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
+        if (!proc_create("fs/xfs/stat", 0, NULL,
-                        xfs_read_xfsstats, NULL))
+                         &xfs_stat_proc_fops))
                goto out_remove_entry;
        return 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a220d36f789b..5d7c60ac77b4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -579,15 +579,19 @@ xfs_showargs(
        else if (mp->m_qflags & XFS_UQUOTA_ACCT)
                seq_puts(m, "," MNTOPT_UQUOTANOENF);
-        if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+        /* Either project or group quotas can be active, not both */
-                seq_puts(m, "," MNTOPT_PRJQUOTA);
-        else if (mp->m_qflags & XFS_PQUOTA_ACCT)
+        if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-                seq_puts(m, "," MNTOPT_PQUOTANOENF);
+                if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                        seq_puts(m, "," MNTOPT_PRJQUOTA);
-        if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD))
+                else
-                seq_puts(m, "," MNTOPT_GRPQUOTA);
+                        seq_puts(m, "," MNTOPT_PQUOTANOENF);
-        else if (mp->m_qflags & XFS_GQUOTA_ACCT)
+        } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-                seq_puts(m, "," MNTOPT_GQUOTANOENF);
+                if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                        seq_puts(m, "," MNTOPT_GRPQUOTA);
+                else
+                        seq_puts(m, "," MNTOPT_GQUOTANOENF);
+        }
        if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
                seq_puts(m, "," MNTOPT_NOQUOTA);
@@ -687,7 +691,7 @@ xfs_barrier_test(
        return error;
 }
-void
+STATIC void
 xfs_mountfs_check_barriers(xfs_mount_t *mp)
 {
        int error;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 98ef624d9baf..320be6aea492 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -749,21 +749,6 @@ __xfs_inode_clear_reclaim_tag(
                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
 }
-void
-xfs_inode_clear_reclaim_tag(
-        xfs_inode_t     *ip)
-{
-        xfs_mount_t     *mp = ip->i_mount;
-        xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
-        read_lock(&pag->pag_ici_lock);
-        spin_lock(&ip->i_flags_lock);
-        __xfs_inode_clear_reclaim_tag(mp, pag, ip);
-        spin_unlock(&ip->i_flags_lock);
-        read_unlock(&pag->pag_ici_lock);
-        xfs_put_perag(mp, pag);
-}
 STATIC int
 xfs_reclaim_inode_now(
        struct xfs_inode        *ip,
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 59120602588a..27920eb7a820 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -49,7 +49,6 @@ int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
-void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
                                struct xfs_inode *ip);
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 21b08c0396a1..83e7ea3e25fa 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -48,50 +48,34 @@
 struct xqmstats xqmstats;
-STATIC int
+static int xqm_proc_show(struct seq_file *m, void *v)
-xfs_qm_read_xfsquota(
-        char            *buffer,
-        char            **start,
-        off_t           offset,
-        int             count,
-        int             *eof,
-        void            *data)
 {
-        int             len;
        /* maximum; incore; ratio free to inuse; freelist */
-        len = sprintf(buffer, "%d\t%d\t%d\t%u\n",
+        seq_printf(m, "%d\t%d\t%d\t%u\n",
                        ndquot,
                        xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
                        xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
                        xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
+        return 0;
-        if (offset >= len) {
-                *start = buffer;
-                *eof = 1;
-                return 0;
-        }
-        *start = buffer + offset;
-        if ((len -= offset) > count)
-                return count;
-        *eof = 1;
-        return len;
 }
-STATIC int
+static int xqm_proc_open(struct inode *inode, struct file *file)
-xfs_qm_read_stats(
-        char            *buffer,
-        char            **start,
-        off_t           offset,
-        int             count,
-        int             *eof,
-        void            *data)
 {
-        int             len;
+        return single_open(file, xqm_proc_show, NULL);
+}
+static const struct file_operations xqm_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = xqm_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int xqmstat_proc_show(struct seq_file *m, void *v)
+{
        /* quota performance statistics */
-        len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n",
+        seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
                        xqmstats.xs_qm_dqreclaims,
                        xqmstats.xs_qm_dqreclaim_misses,
                        xqmstats.xs_qm_dquot_dups,
@@ -100,25 +84,27 @@ xfs_qm_read_stats(
                        xqmstats.xs_qm_dqwants,
                        xqmstats.xs_qm_dqshake_reclaims,
                        xqmstats.xs_qm_dqinact_reclaims);
+        return 0;
+}
-        if (offset >= len) {
+static int xqmstat_proc_open(struct inode *inode, struct file *file)
-                *start = buffer;
+{
-                *eof = 1;
+        return single_open(file, xqmstat_proc_show, NULL);
-                return 0;
-        }
-        *start = buffer + offset;
-        if ((len -= offset) > count)
-                return count;
-        *eof = 1;
-        return len;
 }
+static const struct file_operations xqmstat_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = xqmstat_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
 void
 xfs_qm_init_procfs(void)
 {
-        create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL);
+        proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
-        create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL);
+        proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
 }
 void
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index f24b50b68d03..a5d54bf4931b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -198,6 +198,15 @@ typedef struct xfs_perag
        xfs_agino_t     pagi_count;     /* number of allocated inodes */
        int             pagb_count;     /* pagb slots in use */
        xfs_perag_busy_t *pagb_list;    /* unstable blocks */
+        /*
+         * Inode allocation search lookup optimisation.
+         * If the pagino matches, the search for new inodes
+         * doesn't need to search the near ones again straight away
+         */
+        xfs_agino_t     pagl_pagino;
+        xfs_agino_t     pagl_leftrec;
+        xfs_agino_t     pagl_rightrec;
 #ifdef __KERNEL__
        spinlock_t      pagb_lock;      /* lock for pagb_list */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8ee5b5a76a2a..8971fb09d387 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3713,7 +3713,7 @@ done:
 * entry (null if none).  Else, *lastxp will be set to the index
 * of the found entry; *gotp will contain the entry.
 */
-xfs_bmbt_rec_host_t *                   /* pointer to found extent entry */
+STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
 xfs_bmap_search_multi_extents(
        xfs_ifork_t     *ifp,           /* inode fork pointer */
        xfs_fileoff_t   bno,            /* block number searched for */
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 1b8ff9256bd0..56f62d2edc35 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -392,17 +392,6 @@ xfs_bmap_count_blocks(
        int                     whichfork,
        int                     *count);
-/*
- * Search the extent records for the entry containing block bno.
- * If bno lies in a hole, point to the next entry.  If bno lies
- * past eof, *eofp will be set, and *prevp will contain the last
- * entry (null if none).  Else, *lastxp will be set to the index
- * of the found entry; *gotp will contain the entry.
- */
-xfs_bmbt_rec_host_t *
-xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
-                        xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
 #endif  /* __KERNEL__ */
 #endif  /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 5c1ade06578e..eb7b702d0690 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -202,16 +202,6 @@ xfs_bmbt_get_state(
                                ext_flag);
 }
-/* Endian flipping versions of the bmbt extraction functions */
-void
-xfs_bmbt_disk_get_all(
-        xfs_bmbt_rec_t  *r,
-        xfs_bmbt_irec_t *s)
-{
-        __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
-                                get_unaligned_be64(&r->l1), s);
-}
 /*
 * Extract the blockcount field from an on disk bmap extent record.
 */
@@ -816,6 +806,16 @@ xfs_bmbt_trace_key(
        *l1 = 0;
 }
+/* Endian flipping versions of the bmbt extraction functions */
+STATIC void
+xfs_bmbt_disk_get_all(
+        xfs_bmbt_rec_t  *r,
+        xfs_bmbt_irec_t *s)
+{
+        __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
+                                get_unaligned_be64(&r->l1), s);
+}
 STATIC void
 xfs_bmbt_trace_record(
        struct xfs_btree_cur    *cur,
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 0e8df007615e..5549d495947f 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -220,7 +220,6 @@ extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
 extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
 extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r);
-extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
 extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
 extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 26717388acf5..52b5f14d0c32 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -646,46 +646,6 @@ xfs_btree_read_bufl(
 }
 /*
- * Get a buffer for the block, return it read in.
- * Short-form addressing.
- */
-int                                     /* error */
-xfs_btree_read_bufs(
-        xfs_mount_t     *mp,            /* file system mount point */
-        xfs_trans_t     *tp,            /* transaction pointer */
-        xfs_agnumber_t  agno,           /* allocation group number */
-        xfs_agblock_t   agbno,          /* allocation group block number */
-        uint            lock,           /* lock flags for read_buf */
-        xfs_buf_t       **bpp,          /* buffer for agno/agbno */
-        int             refval)         /* ref count value for buffer */
-{
-        xfs_buf_t       *bp;            /* return value */
-        xfs_daddr_t     d;              /* real disk block address */
-        int             error;
-        ASSERT(agno != NULLAGNUMBER);
-        ASSERT(agbno != NULLAGBLOCK);
-        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-        if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
-                                        mp->m_bsize, lock, &bp))) {
-                return error;
-        }
-        ASSERT(!bp || !XFS_BUF_GETERROR(bp));
-        if (bp != NULL) {
-                switch (refval) {
-                case XFS_ALLOC_BTREE_REF:
-                        XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
-                        break;
-                case XFS_INO_BTREE_REF:
-                        XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
-                        break;
-                }
-        }
-        *bpp = bp;
-        return 0;
-}
-/*
 * Read-ahead the block, don't wait for it, don't return a buffer.
 * Long-form addressing.
 */
@@ -2951,7 +2911,7 @@ error0:
 * inode we have to copy the single block it was pointing to into the
 * inode.
 */
-int
+STATIC int
 xfs_btree_kill_iroot(
        struct xfs_btree_cur    *cur)
 {
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4f852b735b96..7fa07062bdda 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -379,20 +379,6 @@ xfs_btree_read_bufl(
        int                     refval);/* ref count value for buffer */
 /*
- * Get a buffer for the block, return it read in.
- * Short-form addressing.
- */
-int                                     /* error */
-xfs_btree_read_bufs(
-        struct xfs_mount        *mp,    /* file system mount point */
-        struct xfs_trans        *tp,    /* transaction pointer */
-        xfs_agnumber_t          agno,   /* allocation group number */
-        xfs_agblock_t           agbno,  /* allocation group block number */
-        uint                    lock,   /* lock flags for read_buf */
-        struct xfs_buf          **bpp,  /* buffer for agno/agbno */
-        int                     refval);/* ref count value for buffer */
-/*
 * Read-ahead the block, don't wait for it, don't return a buffer.
 * Long-form addressing.
 */
@@ -432,7 +418,6 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
 int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
 int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
 int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
-int xfs_btree_kill_iroot(struct xfs_btree_cur *);
 int xfs_btree_insert(struct xfs_btree_cur *, int *);
 int xfs_btree_delete(struct xfs_btree_cur *, int *);
 int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 3120a3a5e20f..ab64f3efb43b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -57,75 +57,35 @@ xfs_ialloc_cluster_alignment(
 }
 /*
- * Lookup the record equal to ino in the btree given by cur.
+ * Lookup a record by ino in the btree given by cur.
- */
-STATIC int                              /* error */
-xfs_inobt_lookup_eq(
-        struct xfs_btree_cur    *cur,   /* btree cursor */
-        xfs_agino_t             ino,    /* starting inode of chunk */
-        __int32_t               fcnt,   /* free inode count */
-        xfs_inofree_t           free,   /* free inode mask */
-        int                     *stat)  /* success/failure */
-{
-        cur->bc_rec.i.ir_startino = ino;
-        cur->bc_rec.i.ir_freecount = fcnt;
-        cur->bc_rec.i.ir_free = free;
-        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
-/*
- * Lookup the first record greater than or equal to ino
- * in the btree given by cur.
 */
 int                                     /* error */
-xfs_inobt_lookup_ge(
+xfs_inobt_lookup(
        struct xfs_btree_cur    *cur,   /* btree cursor */
        xfs_agino_t             ino,    /* starting inode of chunk */
-        __int32_t               fcnt,   /* free inode count */
+        xfs_lookup_t            dir,    /* <=, >=, == */
-        xfs_inofree_t           free,   /* free inode mask */
        int                     *stat)  /* success/failure */
 {
        cur->bc_rec.i.ir_startino = ino;
-        cur->bc_rec.i.ir_freecount = fcnt;
+        cur->bc_rec.i.ir_freecount = 0;
-        cur->bc_rec.i.ir_free = free;
+        cur->bc_rec.i.ir_free = 0;
-        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+        return xfs_btree_lookup(cur, dir, stat);
 }
 /*
- * Lookup the first record less than or equal to ino
+ * Update the record referred to by cur to the value given.
- * in the btree given by cur.
- */
-int                                     /* error */
-xfs_inobt_lookup_le(
-        struct xfs_btree_cur    *cur,   /* btree cursor */
-        xfs_agino_t             ino,    /* starting inode of chunk */
-        __int32_t               fcnt,   /* free inode count */
-        xfs_inofree_t           free,   /* free inode mask */
-        int                     *stat)  /* success/failure */
-{
-        cur->bc_rec.i.ir_startino = ino;
-        cur->bc_rec.i.ir_freecount = fcnt;
-        cur->bc_rec.i.ir_free = free;
-        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
-}
-/*
- * Update the record referred to by cur to the value given
- * by [ino, fcnt, free].
 * This either works (return 0) or gets an EFSCORRUPTED error.
 */
 STATIC int                              /* error */
 xfs_inobt_update(
        struct xfs_btree_cur    *cur,   /* btree cursor */
-        xfs_agino_t             ino,    /* starting inode of chunk */
+        xfs_inobt_rec_incore_t  *irec)  /* btree record */
-        __int32_t               fcnt,   /* free inode count */
-        xfs_inofree_t           free)   /* free inode mask */
 {
        union xfs_btree_rec     rec;
-        rec.inobt.ir_startino = cpu_to_be32(ino);
+        rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
-        rec.inobt.ir_freecount = cpu_to_be32(fcnt);
+        rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
-        rec.inobt.ir_free = cpu_to_be64(free);
+        rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
        return xfs_btree_update(cur, &rec);
 }
@@ -135,9 +95,7 @@ xfs_inobt_update(
 int                                     /* error */
 xfs_inobt_get_rec(
        struct xfs_btree_cur    *cur,   /* btree cursor */
-        xfs_agino_t             *ino,   /* output: starting inode of chunk */
+        xfs_inobt_rec_incore_t  *irec,  /* btree record */
-        __int32_t               *fcnt,  /* output: number of free inodes */
-        xfs_inofree_t           *free,  /* output: free inode mask */
        int                     *stat)  /* output: success/failure */
 {
        union xfs_btree_rec     *rec;
@@ -145,14 +103,136 @@ xfs_inobt_get_rec(
        error = xfs_btree_get_rec(cur, &rec, stat);
        if (!error && *stat == 1) {
-                *ino = be32_to_cpu(rec->inobt.ir_startino);
+                irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
-                *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
+                irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
-                *free = be64_to_cpu(rec->inobt.ir_free);
+                irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
        }
        return error;
 }
 /*
+ * Verify that the number of free inodes in the AGI is correct.
+ */
+#ifdef DEBUG
+STATIC int
+xfs_check_agi_freecount(
+        struct xfs_btree_cur    *cur,
+        struct xfs_agi          *agi)
+{
+        if (cur->bc_nlevels == 1) {
+                xfs_inobt_rec_incore_t rec;
+                int             freecount = 0;
+                int             error;
+                int             i;
+                error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+                if (error)
+                        return error;
+                do {
+                        error = xfs_inobt_get_rec(cur, &rec, &i);
+                        if (error)
+                                return error;
+                        if (i) {
+                                freecount += rec.ir_freecount;
+                                error = xfs_btree_increment(cur, 0, &i);
+                                if (error)
+                                        return error;
+                        }
+                } while (i == 1);
+                if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
+                        ASSERT(freecount == be32_to_cpu(agi->agi_freecount));
+        }
+        return 0;
+}
+#else
+#define xfs_check_agi_freecount(cur, agi)       0
+#endif
+/*
+ * Initialise a new set of inodes.
+ */
+STATIC void
+xfs_ialloc_inode_init(
+        struct xfs_mount        *mp,
+        struct xfs_trans        *tp,
+        xfs_agnumber_t          agno,
+        xfs_agblock_t           agbno,
+        xfs_agblock_t           length,
+        unsigned int            gen)
+{
+        struct xfs_buf          *fbuf;
+        struct xfs_dinode       *free;
+        int                     blks_per_cluster, nbufs, ninodes;
+        int                     version;
+        int                     i, j;
+        xfs_daddr_t             d;
+        /*
+         * Loop over the new block(s), filling in the inodes.
+         * For small block sizes, manipulate the inodes in buffers
+         * which are multiples of the blocks size.
+         */
+        if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
+                blks_per_cluster = 1;
+                nbufs = length;
+                ninodes = mp->m_sb.sb_inopblock;
+        } else {
+                blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
+                                   mp->m_sb.sb_blocksize;
+                nbufs = length / blks_per_cluster;
+                ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
+        }
+        /*
+         * Figure out what version number to use in the inodes we create.
+         * If the superblock version has caught up to the one that supports
+         * the new inode format, then use the new inode version.  Otherwise
+         * use the old version so that old kernels will continue to be
+         * able to use the file system.
+         */
+        if (xfs_sb_version_hasnlink(&mp->m_sb))
+                version = 2;
+        else
+                version = 1;
+        for (j = 0; j < nbufs; j++) {
+                /*
+                 * Get the block.
+                 */
+                d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
+                fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+                                         mp->m_bsize * blks_per_cluster,
+                                         XFS_BUF_LOCK);
+                ASSERT(fbuf);
+                ASSERT(!XFS_BUF_GETERROR(fbuf));
+                /*
+                 * Initialize all inodes in this buffer and then log them.
+                 *
+                 * XXX: It would be much better if we had just one transaction
+                 *      to log a whole cluster of inodes instead of all the
+                 *      individual transactions causing a lot of log traffic.
+                 */
+                xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
+                for (i = 0; i < ninodes; i++) {
+                        int     ioffset = i << mp->m_sb.sb_inodelog;
+                        uint    isize = sizeof(struct xfs_dinode);
+                        free = xfs_make_iptr(mp, fbuf, i);
+                        free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+                        free->di_version = version;
+                        free->di_gen = cpu_to_be32(gen);
+                        free->di_next_unlinked = cpu_to_be32(NULLAGINO);
+                        xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
+                }
+                xfs_trans_inode_alloc_buf(tp, fbuf);
+        }
+}
+/*
 * Allocate new inodes in the allocation group specified by agbp.
 * Return 0 for success, else error code.
 */
@@ -164,24 +244,15 @@ xfs_ialloc_ag_alloc(
 {
        xfs_agi_t       *agi;           /* allocation group header */
        xfs_alloc_arg_t args;           /* allocation argument structure */
-        int             blks_per_cluster;  /* fs blocks per inode cluster */
        xfs_btree_cur_t *cur;           /* inode btree cursor */
-        xfs_daddr_t     d;              /* disk addr of buffer */
        xfs_agnumber_t  agno;
        int             error;
-        xfs_buf_t       *fbuf;          /* new free inodes' buffer */
+        int             i;
-        xfs_dinode_t    *free;          /* new free inode structure */
-        int             i;              /* inode counter */
-        int             j;              /* block counter */
-        int             nbufs;          /* num bufs of new inodes */
        xfs_agino_t     newino;         /* new first inode's number */
        xfs_agino_t     newlen;         /* new number of inodes */
-        int             ninodes;        /* num inodes per buf */
        xfs_agino_t     thisino;        /* current inode number, for loop */
-        int             version;        /* inode version number to use */
        int             isaligned = 0;  /* inode allocation at stripe unit */
                                        /* boundary */
-        unsigned int    gen;
        args.tp = tp;
        args.mp = tp->t_mountp;
@@ -202,12 +273,12 @@ xfs_ialloc_ag_alloc(
         */
        agi = XFS_BUF_TO_AGI(agbp);
        newino = be32_to_cpu(agi->agi_newino);
+        agno = be32_to_cpu(agi->agi_seqno);
        args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
                        XFS_IALLOC_BLOCKS(args.mp);
        if (likely(newino != NULLAGINO &&
                  (args.agbno < be32_to_cpu(agi->agi_length)))) {
-                args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-                                be32_to_cpu(agi->agi_seqno), args.agbno);
                args.type = XFS_ALLOCTYPE_THIS_BNO;
                args.mod = args.total = args.wasdel = args.isfl =
                        args.userdata = args.minalignslop = 0;
@@ -258,8 +329,7 @@ xfs_ialloc_ag_alloc(
                 * For now, just allocate blocks up front.
                 */
                args.agbno = be32_to_cpu(agi->agi_root);
-                args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-                                be32_to_cpu(agi->agi_seqno), args.agbno);
                /*
                 * Allocate a fixed-size extent of inodes.
                 */
@@ -282,8 +352,7 @@ xfs_ialloc_ag_alloc(
        if (isaligned && args.fsbno == NULLFSBLOCK) {
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
                args.agbno = be32_to_cpu(agi->agi_root);
-                args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-                                be32_to_cpu(agi->agi_seqno), args.agbno);
                args.alignment = xfs_ialloc_cluster_alignment(&args);
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
@@ -294,85 +363,30 @@ xfs_ialloc_ag_alloc(
                return 0;
        }
        ASSERT(args.len == args.minlen);
-        /*
-         * Convert the results.
-         */
-        newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
-        /*
-         * Loop over the new block(s), filling in the inodes.
-         * For small block sizes, manipulate the inodes in buffers
-         * which are multiples of the blocks size.
-         */
-        if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
-                blks_per_cluster = 1;
-                nbufs = (int)args.len;
-                ninodes = args.mp->m_sb.sb_inopblock;
-        } else {
-                blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
-                                   args.mp->m_sb.sb_blocksize;
-                nbufs = (int)args.len / blks_per_cluster;
-                ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
-        }
-        /*
-         * Figure out what version number to use in the inodes we create.
-         * If the superblock version has caught up to the one that supports
-         * the new inode format, then use the new inode version.  Otherwise
-         * use the old version so that old kernels will continue to be
-         * able to use the file system.
-         */
-        if (xfs_sb_version_hasnlink(&args.mp->m_sb))
-                version = 2;
-        else
-                version = 1;
        /*
+         * Stamp and write the inode buffers.
+         *
         * Seed the new inode cluster with a random generation number. This
         * prevents short-term reuse of generation numbers if a chunk is
         * freed and then immediately reallocated. We use random numbers
         * rather than a linear progression to prevent the next generation
         * number from being easily guessable.
         */
-        gen = random32();
+        xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len,
-        for (j = 0; j < nbufs; j++) {
+                              random32());
-                /*
-                 * Get the block.
-                 */
-                d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
-                                     args.agbno + (j * blks_per_cluster));
-                fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
-                                         args.mp->m_bsize * blks_per_cluster,
-                                         XFS_BUF_LOCK);
-                ASSERT(fbuf);
-                ASSERT(!XFS_BUF_GETERROR(fbuf));
-                /*
+        /*
-                 * Initialize all inodes in this buffer and then log them.
+         * Convert the results.
-                 *
+         */
-                 * XXX: It would be much better if we had just one transaction to
+        newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
-                 *      log a whole cluster of inodes instead of all the individual
-                 *      transactions causing a lot of log traffic.
-                 */
-                xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
-                for (i = 0; i < ninodes; i++) {
-                        int     ioffset = i << args.mp->m_sb.sb_inodelog;
-                        uint    isize = sizeof(struct xfs_dinode);
-                        free = xfs_make_iptr(args.mp, fbuf, i);
-                        free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
-                        free->di_version = version;
-                        free->di_gen = cpu_to_be32(gen);
-                        free->di_next_unlinked = cpu_to_be32(NULLAGINO);
-                        xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
-                }
-                xfs_trans_inode_alloc_buf(tp, fbuf);
-        }
        be32_add_cpu(&agi->agi_count, newlen);
        be32_add_cpu(&agi->agi_freecount, newlen);
-        agno = be32_to_cpu(agi->agi_seqno);
        down_read(&args.mp->m_peraglock);
        args.mp->m_perag[agno].pagi_freecount += newlen;
        up_read(&args.mp->m_peraglock);
        agi->agi_newino = cpu_to_be32(newino);
        /*
         * Insert records describing the new inode chunk into the btree.
         */
@@ -380,13 +394,17 @@ xfs_ialloc_ag_alloc(
        for (thisino = newino;
             thisino < newino + newlen;
             thisino += XFS_INODES_PER_CHUNK) {
-                if ((error = xfs_inobt_lookup_eq(cur, thisino,
+                cur->bc_rec.i.ir_startino = thisino;
-                                XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
+                cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
+                cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
+                error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
+                if (error) {
                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
                        return error;
                }
                ASSERT(i == 0);
-                if ((error = xfs_btree_insert(cur, &i))) {
+                error = xfs_btree_insert(cur, &i);
+                if (error) {
                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
                        return error;
                }
@@ -539,6 +557,62 @@ nextag:
 }
 /*
+ * Try to retrieve the next record to the left/right from the current one.
+ */
+STATIC int
+xfs_ialloc_next_rec(
+        struct xfs_btree_cur    *cur,
+        xfs_inobt_rec_incore_t  *rec,
+        int                     *done,
+        int                     left)
+{
+        int                     error;
+        int                     i;
+        if (left)
+                error = xfs_btree_decrement(cur, 0, &i);
+        else
+                error = xfs_btree_increment(cur, 0, &i);
+        if (error)
+                return error;
+        *done = !i;
+        if (i) {
+                error = xfs_inobt_get_rec(cur, rec, &i);
+                if (error)
+                        return error;
+                XFS_WANT_CORRUPTED_RETURN(i == 1);
+        }
+        return 0;
+}
+STATIC int
+xfs_ialloc_get_rec(
+        struct xfs_btree_cur    *cur,
+        xfs_agino_t             agino,
+        xfs_inobt_rec_incore_t  *rec,
+        int                     *done,
+        int                     left)
+{
+        int                     error;
+        int                     i;
+        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
+        if (error)
+                return error;
+        *done = !i;
+        if (i) {
+                error = xfs_inobt_get_rec(cur, rec, &i);
+                if (error)
+                        return error;
+                XFS_WANT_CORRUPTED_RETURN(i == 1);
+        }
+        return 0;
+}
+/*
 * Visible inode allocation functions.
 */
@@ -592,8 +666,8 @@ xfs_dialloc(
        int             j;              /* result code */
        xfs_mount_t     *mp;            /* file system mount structure */
        int             offset;         /* index of inode in chunk */
-        xfs_agino_t     pagino;         /* parent's a.g. relative inode # */
+        xfs_agino_t     pagino;         /* parent's AG relative inode # */
-        xfs_agnumber_t  pagno;          /* parent's allocation group number */
+        xfs_agnumber_t  pagno;          /* parent's AG number */
        xfs_inobt_rec_incore_t rec;     /* inode allocation record */
        xfs_agnumber_t  tagno;          /* testing allocation group number */
        xfs_btree_cur_t *tcur;          /* temp cursor */
@@ -716,6 +790,8 @@ nextag:
         */
        agno = tagno;
        *IO_agbp = NULL;
+ restart_pagno:
        cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
        /*
         * If pagino is 0 (this is the root inode allocation) use newino.
@@ -723,220 +799,199 @@ nextag:
         */
        if (!pagino)
                pagino = be32_to_cpu(agi->agi_newino);
-#ifdef DEBUG
-        if (cur->bc_nlevels == 1) {
-                int     freecount = 0;
-                if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
+        error = xfs_check_agi_freecount(cur, agi);
-                        goto error0;
+        if (error)
-                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                goto error0;
-                do {
-                        if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
-                                        &rec.ir_freecount, &rec.ir_free, &i)))
-                                goto error0;
-                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                        freecount += rec.ir_freecount;
-                        if ((error = xfs_btree_increment(cur, 0, &i)))
-                                goto error0;
-                } while (i == 1);
-                ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
-                       XFS_FORCED_SHUTDOWN(mp));
-        }
-#endif
        /*
-         * If in the same a.g. as the parent, try to get near the parent.
+         * If in the same AG as the parent, try to get near the parent.
         */
        if (pagno == agno) {
-                if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
+                xfs_perag_t     *pag = &mp->m_perag[agno];
+                int             doneleft;       /* done, to the left */
+                int             doneright;      /* done, to the right */
+                int             searchdistance = 10;
+                error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
+                if (error)
+                        goto error0;
+                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                error = xfs_inobt_get_rec(cur, &rec, &j);
+                if (error)
                        goto error0;
-                if (i != 0 &&
+                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
-                            &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
+                if (rec.ir_freecount > 0) {
-                    j == 1 &&
-                    rec.ir_freecount > 0) {
                        /*
                         * Found a free inode in the same chunk
-                         * as parent, done.
+                         * as the parent, done.
                         */
+                        goto alloc_inode;
                }
+                /*
+                 * In the same AG as parent, but parent's chunk is full.
+                 */
+                /* duplicate the cursor, search left & right simultaneously */
+                error = xfs_btree_dup_cursor(cur, &tcur);
+                if (error)
+                        goto error0;
                /*
-                 * In the same a.g. as parent, but parent's chunk is full.
+                 * Skip to last blocks looked up if same parent inode.
                 */
-                else {
+                if (pagino != NULLAGINO &&
-                        int     doneleft;       /* done, to the left */
+                    pag->pagl_pagino == pagino &&
-                        int     doneright;      /* done, to the right */
+                    pag->pagl_leftrec != NULLAGINO &&
+                    pag->pagl_rightrec != NULLAGINO) {
+                        error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
+                                                   &trec, &doneleft, 1);
+                        if (error)
+                                goto error1;
+                        error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
+                                                   &rec, &doneright, 0);
                        if (error)
-                                goto error0;
-                        ASSERT(i == 1);
-                        ASSERT(j == 1);
-                        /*
-                         * Duplicate the cursor, search left & right
-                         * simultaneously.
-                         */
-                        if ((error = xfs_btree_dup_cursor(cur, &tcur)))
-                                goto error0;
-                        /*
-                         * Search left with tcur, back up 1 record.
-                         */
-                        if ((error = xfs_btree_decrement(tcur, 0, &i)))
                                goto error1;
-                        doneleft = !i;
+                } else {
-                        if (!doneleft) {
+                        /* search left with tcur, back up 1 record */
-                                if ((error = xfs_inobt_get_rec(tcur,
+                        error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
-                                                &trec.ir_startino,
+                        if (error)
-                                                &trec.ir_freecount,
-                                                &trec.ir_free, &i)))
-                                        goto error1;
-                                XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
-                        }
-                        /*
-                         * Search right with cur, go forward 1 record.
-                         */
-                        if ((error = xfs_btree_increment(cur, 0, &i)))
                                goto error1;
-                        doneright = !i;
-                        if (!doneright) {
-                                if ((error = xfs_inobt_get_rec(cur,
-                                                &rec.ir_startino,
-                                                &rec.ir_freecount,
-                                                &rec.ir_free, &i)))
-                                        goto error1;
-                                XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
-                        }
-                        /*
-                         * Loop until we find the closest inode chunk
-                         * with a free one.
-                         */
-                        while (!doneleft || !doneright) {
-                                int     useleft;  /* using left inode
-                                                     chunk this time */
+                        /* search right with cur, go forward 1 record. */
+                        error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
+                        if (error)
+                                goto error1;
+                }
+                /*
+                 * Loop until we find an inode chunk with a free inode.
+                 */
+                while (!doneleft || !doneright) {
+                        int     useleft;  /* using left inode chunk this time */
+                        if (!--searchdistance) {
                                /*
-                                 * Figure out which block is closer,
+                                 * Not in range - save last search
-                                 * if both are valid.
+                                 * location and allocate a new inode
-                                 */
-                                if (!doneleft && !doneright)
-                                        useleft =
-                                                pagino -
-                                                (trec.ir_startino +
-                                                 XFS_INODES_PER_CHUNK - 1) <
-                                                 rec.ir_startino - pagino;
-                                else
-                                        useleft = !doneleft;
-                                /*
-                                 * If checking the left, does it have
-                                 * free inodes?
-                                 */
-                                if (useleft && trec.ir_freecount) {
-                                        /*
-                                         * Yes, set it up as the chunk to use.
-                                         */
-                                        rec = trec;
-                                        xfs_btree_del_cursor(cur,
-                                                XFS_BTREE_NOERROR);
-                                        cur = tcur;
-                                        break;
-                                }
-                                /*
-                                 * If checking the right, does it have
-                                 * free inodes?
-                                 */
-                                if (!useleft && rec.ir_freecount) {
-                                        /*
-                                         * Yes, it's already set up.
-                                         */
-                                        xfs_btree_del_cursor(tcur,
-                                                XFS_BTREE_NOERROR);
-                                        break;
-                                }
-                                /*
-                                 * If used the left, get another one
-                                 * further left.
-                                 */
-                                if (useleft) {
-                                        if ((error = xfs_btree_decrement(tcur, 0,
-                                                        &i)))
-                                                goto error1;
-                                        doneleft = !i;
-                                        if (!doneleft) {
-                                                if ((error = xfs_inobt_get_rec(
-                                                            tcur,
-                                                            &trec.ir_startino,
-                                                            &trec.ir_freecount,
-                                                            &trec.ir_free, &i)))
-                                                        goto error1;
-                                                XFS_WANT_CORRUPTED_GOTO(i == 1,
-                                                        error1);
-                                        }
-                                }
-                                /*
-                                 * If used the right, get another one
-                                 * further right.
                                 */
-                                else {
+                                pag->pagl_leftrec = trec.ir_startino;
-                                        if ((error = xfs_btree_increment(cur, 0,
+                                pag->pagl_rightrec = rec.ir_startino;
-                                                        &i)))
+                                pag->pagl_pagino = pagino;
-                                                goto error1;
+                                goto newino;
-                                        doneright = !i;
+                        }
-                                        if (!doneright) {
-                                                if ((error = xfs_inobt_get_rec(
+                        /* figure out the closer block if both are valid. */
-                                                            cur,
+                        if (!doneleft && !doneright) {
-                                                            &rec.ir_startino,
+                                useleft = pagino -
-                                                            &rec.ir_freecount,
+                                 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
-                                                            &rec.ir_free, &i)))
+                                  rec.ir_startino - pagino;
-                                                        goto error1;
+                        } else {
-                                                XFS_WANT_CORRUPTED_GOTO(i == 1,
+                                useleft = !doneleft;
-                                                        error1);
-                                        }
-                                }
                        }
-                        ASSERT(!doneleft || !doneright);
+                        /* free inodes to the left? */
+                        if (useleft && trec.ir_freecount) {
+                                rec = trec;
+                                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+                                cur = tcur;
+                                pag->pagl_leftrec = trec.ir_startino;
+                                pag->pagl_rightrec = rec.ir_startino;
+                                pag->pagl_pagino = pagino;
+                                goto alloc_inode;
+                        }
+                        /* free inodes to the right? */
+                        if (!useleft && rec.ir_freecount) {
+                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+                                pag->pagl_leftrec = trec.ir_startino;
+                                pag->pagl_rightrec = rec.ir_startino;
+                                pag->pagl_pagino = pagino;
+                                goto alloc_inode;
+                        }
+                        /* get next record to check */
+                        if (useleft) {
+                                error = xfs_ialloc_next_rec(tcur, &trec,
+                                                                 &doneleft, 1);
+                        } else {
+                                error = xfs_ialloc_next_rec(cur, &rec,
+                                                                 &doneright, 0);
+                        }
+                        if (error)
+                                goto error1;
                }
+                /*
+                 * We've reached the end of the btree. because
+                 * we are only searching a small chunk of the
+                 * btree each search, there is obviously free
+                 * inodes closer to the parent inode than we
+                 * are now. restart the search again.
+                 */
+                pag->pagl_pagino = NULLAGINO;
+                pag->pagl_leftrec = NULLAGINO;
+                pag->pagl_rightrec = NULLAGINO;
+                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+                goto restart_pagno;
        }
        /*
-         * In a different a.g. from the parent.
+         * In a different AG from the parent.
         * See if the most recently allocated block has any free.
         */
-        else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
+newino:
-                if ((error = xfs_inobt_lookup_eq(cur,
+        if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
-                                be32_to_cpu(agi->agi_newino), 0, 0, &i)))
+                error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
+                                         XFS_LOOKUP_EQ, &i);
+                if (error)
                        goto error0;
-                if (i == 1 &&
-                    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+                if (i == 1) {
-                            &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
+                        error = xfs_inobt_get_rec(cur, &rec, &j);
-                    j == 1 &&
-                    rec.ir_freecount > 0) {
-                        /*
-                         * The last chunk allocated in the group still has
-                         * a free inode.
-                         */
-                }
-                /*
-                 * None left in the last group, search the whole a.g.
-                 */
-                else {
                        if (error)
                                goto error0;
-                        if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
-                                goto error0;
+                        if (j == 1 && rec.ir_freecount > 0) {
-                        ASSERT(i == 1);
+                                /*
-                        for (;;) {
+                                 * The last chunk allocated in the group
-                                if ((error = xfs_inobt_get_rec(cur,
+                                 * still has a free inode.
-                                                &rec.ir_startino,
+                                 */
-                                                &rec.ir_freecount, &rec.ir_free,
+                                goto alloc_inode;
-                                                &i)))
-                                        goto error0;
-                                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                                if (rec.ir_freecount > 0)
-                                        break;
-                                if ((error = xfs_btree_increment(cur, 0, &i)))
-                                        goto error0;
-                                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                        }
                }
        }
+        /*
+         * None left in the last group, search the whole AG
+         */
+        error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+        if (error)
+                goto error0;
+        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+        for (;;) {
+                error = xfs_inobt_get_rec(cur, &rec, &i);
+                if (error)
+                        goto error0;
+                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                if (rec.ir_freecount > 0)
+                        break;
+                error = xfs_btree_increment(cur, 0, &i);
+                if (error)
+                        goto error0;
+                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+        }
+alloc_inode:
        offset = xfs_ialloc_find_free(&rec.ir_free);
        ASSERT(offset >= 0);
        ASSERT(offset < XFS_INODES_PER_CHUNK);
@@ -945,33 +1000,19 @@ nextag:
        ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
        rec.ir_free &= ~XFS_INOBT_MASK(offset);
        rec.ir_freecount--;
-        if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
+        error = xfs_inobt_update(cur, &rec);
-                        rec.ir_free)))
+        if (error)
                goto error0;
        be32_add_cpu(&agi->agi_freecount, -1);
        xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
        down_read(&mp->m_peraglock);
        mp->m_perag[tagno].pagi_freecount--;
        up_read(&mp->m_peraglock);
-#ifdef DEBUG
-        if (cur->bc_nlevels == 1) {
-                int     freecount = 0;
-                if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
+        error = xfs_check_agi_freecount(cur, agi);
-                        goto error0;
+        if (error)
-                do {
+                goto error0;
-                        if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
-                                        &rec.ir_freecount, &rec.ir_free, &i)))
-                                goto error0;
-                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                        freecount += rec.ir_freecount;
-                        if ((error = xfs_btree_increment(cur, 0, &i)))
-                                goto error0;
-                } while (i == 1);
-                ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
-                       XFS_FORCED_SHUTDOWN(mp));
-        }
-#endif
        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
        xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
        *inop = ino;
@@ -1062,38 +1103,23 @@ xfs_difree(
         * Initialize the cursor.
         */
        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
-#ifdef DEBUG
-        if (cur->bc_nlevels == 1) {
-                int freecount = 0;
-                if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
+        error = xfs_check_agi_freecount(cur, agi);
-                        goto error0;
+        if (error)
-                do {
+                goto error0;
-                        if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
-                                        &rec.ir_freecount, &rec.ir_free, &i)))
-                                goto error0;
-                        if (i) {
-                                freecount += rec.ir_freecount;
-                                if ((error = xfs_btree_increment(cur, 0, &i)))
-                                        goto error0;
-                        }
-                } while (i == 1);
-                ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
-                       XFS_FORCED_SHUTDOWN(mp));
-        }
-#endif
        /*
         * Look for the entry describing this inode.
         */
-        if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
+        if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
                cmn_err(CE_WARN,
-                        "xfs_difree: xfs_inobt_lookup_le returned()  an error %d on %s.  Returning error.",
+                        "xfs_difree: xfs_inobt_lookup returned()  an error %d on %s.  Returning error.",
                        error, mp->m_fsname);
                goto error0;
        }
        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-        if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount,
+        error = xfs_inobt_get_rec(cur, &rec, &i);
-                        &rec.ir_free, &i))) {
+        if (error) {
                cmn_err(CE_WARN,
                        "xfs_difree: xfs_inobt_get_rec()  returned an error %d on %s.  Returning error.",
                        error, mp->m_fsname);
@@ -1148,12 +1174,14 @@ xfs_difree(
        } else {
                *delete = 0;
-                if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
+                error = xfs_inobt_update(cur, &rec);
+                if (error) {
                        cmn_err(CE_WARN,
-                                "xfs_difree: xfs_inobt_update()  returned an error %d on %s.  Returning error.",
+        "xfs_difree: xfs_inobt_update returned an error %d on %s.",
                                error, mp->m_fsname);
                        goto error0;
                }
                /* 
                 * Change the inode free counts and log the ag/sb changes.
                 */
@@ -1165,28 +1193,10 @@ xfs_difree(
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
        }
-#ifdef DEBUG
+        error = xfs_check_agi_freecount(cur, agi);
-        if (cur->bc_nlevels == 1) {
+        if (error)
-                int freecount = 0;
+                goto error0;
-                if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
-                        goto error0;
-                do {
-                        if ((error = xfs_inobt_get_rec(cur,
-                                        &rec.ir_startino,
-                                        &rec.ir_freecount,
-                                        &rec.ir_free, &i)))
-                                goto error0;
-                        if (i) {
-                                freecount += rec.ir_freecount;
-                                if ((error = xfs_btree_increment(cur, 0, &i)))
-                                        goto error0;
-                        }
-                } while (i == 1);
-                ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
-                       XFS_FORCED_SHUTDOWN(mp));
-        }
-#endif
        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
        return 0;
@@ -1297,9 +1307,7 @@ xfs_imap(
                chunk_agbno = agbno - offset_agbno;
        } else {
                xfs_btree_cur_t *cur;   /* inode btree cursor */
-                xfs_agino_t     chunk_agino; /* first agino in inode chunk */
+                xfs_inobt_rec_incore_t chunk_rec;
-                __int32_t       chunk_cnt; /* count of free inodes in chunk */
-                xfs_inofree_t   chunk_free; /* mask of free inodes in chunk */
                xfs_buf_t       *agbp;  /* agi buffer */
                int             i;      /* temp state */
@@ -1315,15 +1323,14 @@ xfs_imap(
                }
                cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
-                error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i);
+                error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
                if (error) {
                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-                                        "xfs_inobt_lookup_le() failed");
+                                        "xfs_inobt_lookup() failed");
                        goto error0;
                }
-                error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
+                error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
-                                &chunk_free, &i);
                if (error) {
                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
                                        "xfs_inobt_get_rec() failed");
@@ -1341,7 +1348,7 @@ xfs_imap(
                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
                if (error)
                        return error;
-                chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
+                chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
                offset_agbno = agbno - chunk_agbno;
        }
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index aeee8278f92c..bb5385475e1f 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -150,23 +150,15 @@ xfs_ialloc_pagi_init(
        xfs_agnumber_t  agno);          /* allocation group number */
 /*
- * Lookup the first record greater than or equal to ino
+ * Lookup a record by ino in the btree given by cur.
- * in the btree given by cur.
 */
-int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
+int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
-                __int32_t fcnt, xfs_inofree_t free, int *stat);
+                xfs_lookup_t dir, int *stat);
-/*
- * Lookup the first record less than or equal to ino
- * in the btree given by cur.
- */
-int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
-                __int32_t fcnt, xfs_inofree_t free, int *stat);
 /*
 * Get the data from the pointed-to record.
 */
-extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
+extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
-                             __int32_t *fcnt, xfs_inofree_t *free, int *stat);
+                xfs_inobt_rec_incore_t *rec, int *stat);
 #endif  /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index ecbf8b4d2e2e..80e526489be5 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -82,7 +82,6 @@ xfs_inode_alloc(
        memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
        ip->i_flags = 0;
        ip->i_update_core = 0;
-        ip->i_update_size = 0;
        ip->i_delayed_blks = 0;
        memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
        ip->i_size = 0;
@@ -456,32 +455,6 @@ out_error_or_again:
        return error;
 }
-/*
- * Look for the inode corresponding to the given ino in the hash table.
- * If it is there and its i_transp pointer matches tp, return it.
- * Otherwise, return NULL.
- */
-xfs_inode_t *
-xfs_inode_incore(xfs_mount_t    *mp,
-                 xfs_ino_t      ino,
-                 xfs_trans_t    *tp)
-{
-        xfs_inode_t     *ip;
-        xfs_perag_t     *pag;
-        pag = xfs_get_perag(mp, ino);
-        read_lock(&pag->pag_ici_lock);
-        ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino));
-        read_unlock(&pag->pag_ici_lock);
-        xfs_put_perag(mp, pag);
-        /* the returned inode must match the transaction */
-        if (ip && (ip->i_transp != tp))
-                return NULL;
-        return ip;
-}
 /*
 * Decrement reference count of an inode structure and unlock it.
 *
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index da428b3fe0f5..c1dc7ef5a1d8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -651,7 +651,7 @@ xfs_iformat_btree(
        return 0;
 }
-void
+STATIC void
 xfs_dinode_from_disk(
        xfs_icdinode_t          *to,
        xfs_dinode_t            *from)
@@ -1247,7 +1247,7 @@ xfs_isize_check(
 * In that case the pages will still be in memory, but the inode size
 * will never have been updated.
 */
-xfs_fsize_t
+STATIC xfs_fsize_t
 xfs_file_last_byte(
        xfs_inode_t     *ip)
 {
@@ -3837,7 +3837,7 @@ xfs_iext_inline_to_direct(
 /*
 * Resize an extent indirection array to new_size bytes.
 */
-void
+STATIC void
 xfs_iext_realloc_indirect(
        xfs_ifork_t     *ifp,           /* inode fork pointer */
        int             new_size)       /* new indirection array size */
@@ -3862,7 +3862,7 @@ xfs_iext_realloc_indirect(
 /*
 * Switch from indirection array to linear (direct) extent allocations.
 */
-void
+STATIC void
 xfs_iext_indirect_to_direct(
         xfs_ifork_t    *ifp)           /* inode fork pointer */
 {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 65f24a3cc992..0b38b9a869ec 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -261,7 +261,6 @@ typedef struct xfs_inode {
        /* Miscellaneous state. */
        unsigned short          i_flags;        /* see defined flags below */
        unsigned char           i_update_core;  /* timestamps/size is dirty */
-        unsigned char           i_update_size;  /* di_size field is dirty */
        unsigned int            i_delayed_blks; /* count of delay alloc blks */
        xfs_icdinode_t          i_d;            /* most of ondisk inode */
@@ -468,8 +467,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
 /*
 * xfs_iget.c prototypes.
 */
-xfs_inode_t     *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
-                                  struct xfs_trans *);
 int             xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
                         uint, uint, xfs_inode_t **, xfs_daddr_t);
 void            xfs_iput(xfs_inode_t *, uint);
@@ -504,7 +501,6 @@ void		xfs_ipin(xfs_inode_t *);
 void            xfs_iunpin(xfs_inode_t *);
 int             xfs_iflush(xfs_inode_t *, uint);
 void            xfs_ichgtime(xfs_inode_t *, int);
-xfs_fsize_t     xfs_file_last_byte(xfs_inode_t *);
 void            xfs_lock_inodes(xfs_inode_t **, int, uint);
 void            xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
@@ -572,8 +568,6 @@ int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
                          struct xfs_buf **, uint);
 int             xfs_iread(struct xfs_mount *, struct xfs_trans *,
                          struct xfs_inode *, xfs_daddr_t, uint);
-void            xfs_dinode_from_disk(struct xfs_icdinode *,
-                                     struct xfs_dinode *);
 void            xfs_dinode_to_disk(struct xfs_dinode *,
                                   struct xfs_icdinode *);
 void            xfs_idestroy_fork(struct xfs_inode *, int);
@@ -592,8 +586,6 @@ void		xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
 void            xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
 void            xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
 void            xfs_iext_realloc_direct(xfs_ifork_t *, int);
-void            xfs_iext_realloc_indirect(xfs_ifork_t *, int);
-void            xfs_iext_indirect_to_direct(xfs_ifork_t *);
 void            xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
 void            xfs_iext_inline_to_direct(xfs_ifork_t *, int);
 void            xfs_iext_destroy(xfs_ifork_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 977c4aec587e..47d5b663c37e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -263,14 +263,6 @@ xfs_inode_item_format(
        }
        /*
-         * We don't have to worry about re-ordering here because
-         * the update_size field is protected by the inode lock
-         * and we have that held in exclusive mode.
-         */
-        if (ip->i_update_size)
-                ip->i_update_size = 0;
-        /*
         * Make sure to get the latest atime from the Linux inode.
         */
        xfs_synchronize_atime(ip);
@@ -712,8 +704,6 @@ xfs_inode_item_unlock(
         * Clear out the fields of the inode log item particular
         * to the current transaction.
         */
-        iip->ili_ilock_recur = 0;
-        iip->ili_iolock_recur = 0;
        iip->ili_flags = 0;
        /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index a52ac125f055..65bae4c9b8bf 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -137,8 +137,6 @@ typedef struct xfs_inode_log_item {
        struct xfs_inode        *ili_inode;        /* inode ptr */
        xfs_lsn_t               ili_flush_lsn;     /* lsn at last flush */
        xfs_lsn_t               ili_last_lsn;      /* lsn at last transaction */
-        unsigned short          ili_ilock_recur;   /* lock recursion count */
-        unsigned short          ili_iolock_recur;  /* lock recursion count */
        unsigned short          ili_flags;         /* misc flags */
        unsigned short          ili_logged;        /* flushed logged data */
        unsigned int            ili_last_fields;   /* fields when flushed */
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index 7a28191cb0de..b8e4ee4e89a4 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -72,7 +72,6 @@ struct xfs_mount;
 #if XFS_BIG_INUMS
 #define XFS_MAXINUMBER          ((xfs_ino_t)((1ULL << 56) - 1ULL))
-#define XFS_INO64_OFFSET        ((xfs_ino_t)(1ULL << 32))
 #else
 #define XFS_MAXINUMBER          ((xfs_ino_t)((1ULL << 32) - 1ULL))
 #endif
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index aeb2d2221c7d..b68f9107e26c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -39,7 +39,7 @@
 #include "xfs_error.h"
 #include "xfs_btree.h"
-int
+STATIC int
 xfs_internal_inum(
        xfs_mount_t     *mp,
        xfs_ino_t       ino)
@@ -353,9 +353,6 @@ xfs_bulkstat(
        int                     end_of_ag; /* set if we've seen the ag end */
        int                     error;  /* error code */
        int                     fmterror;/* bulkstat formatter result */
-        __int32_t               gcnt;   /* current btree rec's count */
-        xfs_inofree_t           gfree;  /* current btree rec's free mask */
-        xfs_agino_t             gino;   /* current btree rec's start inode */
        int                     i;      /* loop index */
        int                     icount; /* count of inodes good in irbuf */
        size_t                  irbsize; /* size of irec buffer in bytes */
@@ -442,40 +439,43 @@ xfs_bulkstat(
                 * we need to get the remainder of the chunk we're in.
                 */
                if (agino > 0) {
+                        xfs_inobt_rec_incore_t r;
                        /*
                         * Lookup the inode chunk that this inode lives in.
                         */
-                        error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp);
+                        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE,
+                                                 &tmp);
                        if (!error &&   /* no I/O error */
                            tmp &&      /* lookup succeeded */
                                        /* got the record, should always work */
-                            !(error = xfs_inobt_get_rec(cur, &gino, &gcnt,
+                            !(error = xfs_inobt_get_rec(cur, &r, &i)) &&
-                                    &gfree, &i)) &&
                            i == 1 &&
                                        /* this is the right chunk */
-                            agino < gino + XFS_INODES_PER_CHUNK &&
+                            agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
                                        /* lastino was not last in chunk */
-                            (chunkidx = agino - gino + 1) <
+                            (chunkidx = agino - r.ir_startino + 1) <
                                    XFS_INODES_PER_CHUNK &&
                                        /* there are some left allocated */
                            xfs_inobt_maskn(chunkidx,
-                                    XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) {
+                                    XFS_INODES_PER_CHUNK - chunkidx) &
+                                    ~r.ir_free) {
                                /*
                                 * Grab the chunk record.  Mark all the
                                 * uninteresting inodes (because they're
                                 * before our start point) free.
                                 */
                                for (i = 0; i < chunkidx; i++) {
-                                        if (XFS_INOBT_MASK(i) & ~gfree)
+                                        if (XFS_INOBT_MASK(i) & ~r.ir_free)
-                                                gcnt++;
+                                                r.ir_freecount++;
                                }
-                                gfree |= xfs_inobt_maskn(0, chunkidx);
+                                r.ir_free |= xfs_inobt_maskn(0, chunkidx);
-                                irbp->ir_startino = gino;
+                                irbp->ir_startino = r.ir_startino;
-                                irbp->ir_freecount = gcnt;
+                                irbp->ir_freecount = r.ir_freecount;
-                                irbp->ir_free = gfree;
+                                irbp->ir_free = r.ir_free;
                                irbp++;
-                                agino = gino + XFS_INODES_PER_CHUNK;
+                                agino = r.ir_startino + XFS_INODES_PER_CHUNK;
-                                icount = XFS_INODES_PER_CHUNK - gcnt;
+                                icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
                        } else {
                                /*
                                 * If any of those tests failed, bump the
@@ -493,7 +493,7 @@ xfs_bulkstat(
                        /*
                         * Start of ag.  Lookup the first inode chunk.
                         */
-                        error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp);
+                        error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
                        icount = 0;
                }
                /*
@@ -501,6 +501,8 @@ xfs_bulkstat(
                 * until we run out of inodes or space in the buffer.
                 */
                while (irbp < irbufend && icount < ubcount) {
+                        xfs_inobt_rec_incore_t r;
                        /*
                         * Loop as long as we're unable to read the
                         * inode btree.
@@ -510,51 +512,55 @@ xfs_bulkstat(
                                if (XFS_AGINO_TO_AGBNO(mp, agino) >=
                                                be32_to_cpu(agi->agi_length))
                                        break;
-                                error = xfs_inobt_lookup_ge(cur, agino, 0, 0,
+                                error = xfs_inobt_lookup(cur, agino,
-                                                            &tmp);
+                                                         XFS_LOOKUP_GE, &tmp);
                                cond_resched();
                        }
                        /*
                         * If ran off the end of the ag either with an error,
                         * or the normal way, set end and stop collecting.
                         */
-                        if (error ||
+                        if (error) {
-                            (error = xfs_inobt_get_rec(cur, &gino, &gcnt,
-                                    &gfree, &i)) ||
-                            i == 0) {
                                end_of_ag = 1;
                                break;
                        }
+                        error = xfs_inobt_get_rec(cur, &r, &i);
+                        if (error || i == 0) {
+                                end_of_ag = 1;
+                                break;
+                        }
                        /*
                         * If this chunk has any allocated inodes, save it.
                         * Also start read-ahead now for this chunk.
                         */
-                        if (gcnt < XFS_INODES_PER_CHUNK) {
+                        if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
                                /*
                                 * Loop over all clusters in the next chunk.
                                 * Do a readahead if there are any allocated
                                 * inodes in that cluster.
                                 */
-                                for (agbno = XFS_AGINO_TO_AGBNO(mp, gino),
+                                agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
-                                     chunkidx = 0;
+                                for (chunkidx = 0;
                                     chunkidx < XFS_INODES_PER_CHUNK;
                                     chunkidx += nicluster,
                                     agbno += nbcluster) {
-                                        if (xfs_inobt_maskn(chunkidx,
+                                        if (xfs_inobt_maskn(chunkidx, nicluster)
-                                                            nicluster) & ~gfree)
+                                                        & ~r.ir_free)
                                                xfs_btree_reada_bufs(mp, agno,
                                                        agbno, nbcluster);
                                }
-                                irbp->ir_startino = gino;
+                                irbp->ir_startino = r.ir_startino;
-                                irbp->ir_freecount = gcnt;
+                                irbp->ir_freecount = r.ir_freecount;
-                                irbp->ir_free = gfree;
+                                irbp->ir_free = r.ir_free;
                                irbp++;
-                                icount += XFS_INODES_PER_CHUNK - gcnt;
+                                icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
                        }
                        /*
                         * Set agino to after this chunk and bump the cursor.
                         */
-                        agino = gino + XFS_INODES_PER_CHUNK;
+                        agino = r.ir_startino + XFS_INODES_PER_CHUNK;
                        error = xfs_btree_increment(cur, 0, &tmp);
                        cond_resched();
                }
@@ -820,9 +826,7 @@ xfs_inumbers(
        int             bufidx;
        xfs_btree_cur_t *cur;
        int             error;
-        __int32_t       gcnt;
+        xfs_inobt_rec_incore_t r;
-        xfs_inofree_t   gfree;
-        xfs_agino_t     gino;
        int             i;
        xfs_ino_t       ino;
        int             left;
@@ -855,7 +859,8 @@ xfs_inumbers(
                                continue;
                        }
                        cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
-                        error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
+                        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
+                                                 &tmp);
                        if (error) {
                                xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
                                cur = NULL;
@@ -870,9 +875,8 @@ xfs_inumbers(
                                continue;
                        }
                }
-                if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree,
+                error = xfs_inobt_get_rec(cur, &r, &i);
-                        &i)) ||
+                if (error || i == 0) {
-                    i == 0) {
                        xfs_buf_relse(agbp);
                        agbp = NULL;
                        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -881,10 +885,12 @@ xfs_inumbers(
                        agino = 0;
                        continue;
                }
-                agino = gino + XFS_INODES_PER_CHUNK - 1;
+                agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
-                buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino);
+                buffer[bufidx].xi_startino =
-                buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt;
+                        XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
-                buffer[bufidx].xi_allocmask = ~gfree;
+                buffer[bufidx].xi_alloccount =
+                        XFS_INODES_PER_CHUNK - r.ir_freecount;
+                buffer[bufidx].xi_allocmask = ~r.ir_free;
                bufidx++;
                left--;
                if (bufidx == bcount) {
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 1fb04e7deb61..20792bf45946 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -99,11 +99,6 @@ xfs_bulkstat_one(
        void                    *dibuff,
        int                     *stat);
-int
-xfs_internal_inum(
-        xfs_mount_t             *mp,
-        xfs_ino_t               ino);
 typedef int (*inumbers_fmt_pf)(
        void                    __user *ubuffer, /* buffer to write to */
        const xfs_inogrp_t      *buffer,        /* buffer to read from */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index bcad5f4c1fd1..679c7c4926a2 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -451,8 +451,6 @@ extern int	 xlog_find_tail(xlog_t	*log,
 extern int       xlog_recover(xlog_t *log);
 extern int       xlog_recover_finish(xlog_t *log);
 extern void      xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
-extern void      xlog_recover_process_iunlinks(xlog_t *log);
 extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
 extern void      xlog_put_bp(struct xfs_buf *);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 47da2fb45377..1099395d7d6c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3263,7 +3263,7 @@ xlog_recover_process_one_iunlink(
 * freeing of the inode and its removal from the list must be
 * atomic.
 */
-void
+STATIC void
 xlog_recover_process_iunlinks(
        xlog_t          *log)
 {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5c6f092659c1..8b6c9e807efb 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1568,7 +1568,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
 *
 * The m_sb_lock must be held when this routine is called.
 */
-int
+STATIC int
 xfs_mod_incore_sb_unlocked(
        xfs_mount_t     *mp,
        xfs_sb_field_t  field,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a5122382afde..a6c023bc0fb2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -414,13 +414,10 @@ typedef struct xfs_mod_sb {
 extern int      xfs_log_sbcount(xfs_mount_t *, uint);
 extern int      xfs_mountfs(xfs_mount_t *mp);
-extern void     xfs_mountfs_check_barriers(xfs_mount_t *mp);
 extern void     xfs_unmountfs(xfs_mount_t *);
 extern int      xfs_unmountfs_writesb(xfs_mount_t *);
 extern int      xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
-extern int      xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
-                        int64_t, int);
 extern int      xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
                        uint, int);
 extern int      xfs_mount_log_sb(xfs_mount_t *, __int64_t);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index afee7eb24323..4b0613d99faa 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -564,35 +564,6 @@ xfs_mru_cache_lookup(
 }
 /*
- * To look up an element using its key, but leave its location in the internal
- * lists alone, call xfs_mru_cache_peek().  If the element isn't found, this
- * function returns NULL.
- *
- * See the comments above the declaration of the xfs_mru_cache_lookup() function
- * for important locking information pertaining to this call.
- */
-void *
-xfs_mru_cache_peek(
-        xfs_mru_cache_t *mru,
-        unsigned long   key)
-{
-        xfs_mru_cache_elem_t *elem;
-        ASSERT(mru && mru->lists);
-        if (!mru || !mru->lists)
-                return NULL;
-        spin_lock(&mru->lock);
-        elem = radix_tree_lookup(&mru->store, key);
-        if (!elem)
-                spin_unlock(&mru->lock);
-        else
-                __release(mru_lock); /* help sparse not be stupid */
-        return elem ? elem->value : NULL;
-}
-/*
 * To release the internal data structure spinlock after having performed an
 * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
 * with the data store pointer.
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index dd58ea1bbebe..5d439f34b0c9 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -49,7 +49,6 @@ int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
 void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
 void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
 void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
-void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
 void xfs_mru_cache_done(struct xfs_mru_cache *mru);
 #endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index fea68615ed23..3f816ad7ff19 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -88,90 +88,6 @@ xfs_write_clear_setuid(
 }
 /*
- * Handle logging requirements of various synchronous types of write.
- */
-int
-xfs_write_sync_logforce(
-        xfs_mount_t     *mp,
-        xfs_inode_t     *ip)
-{
-        int             error = 0;
-        /*
-         * If we're treating this as O_DSYNC and we have not updated the
-         * size, force the log.
-         */
-        if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
-            !(ip->i_update_size)) {
-                xfs_inode_log_item_t    *iip = ip->i_itemp;
-                /*
-                 * If an allocation transaction occurred
-                 * without extending the size, then we have to force
-                 * the log up the proper point to ensure that the
-                 * allocation is permanent.  We can't count on
-                 * the fact that buffered writes lock out direct I/O
-                 * writes - the direct I/O write could have extended
-                 * the size nontransactionally, then finished before
-                 * we started.  xfs_write_file will think that the file
-                 * didn't grow but the update isn't safe unless the
-                 * size change is logged.
-                 *
-                 * Force the log if we've committed a transaction
-                 * against the inode or if someone else has and
-                 * the commit record hasn't gone to disk (e.g.
-                 * the inode is pinned).  This guarantees that
-                 * all changes affecting the inode are permanent
-                 * when we return.
-                 */
-                if (iip && iip->ili_last_lsn) {
-                        error = _xfs_log_force(mp, iip->ili_last_lsn,
-                                        XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
-                } else if (xfs_ipincount(ip) > 0) {
-                        error = _xfs_log_force(mp, (xfs_lsn_t)0,
-                                        XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
-                }
-        } else {
-                xfs_trans_t     *tp;
-                /*
-                 * O_SYNC or O_DSYNC _with_ a size update are handled
-                 * the same way.
-                 *
-                 * If the write was synchronous then we need to make
-                 * sure that the inode modification time is permanent.
-                 * We'll have updated the timestamp above, so here
-                 * we use a synchronous transaction to log the inode.
-                 * It's not fast, but it's necessary.
-                 *
-                 * If this a dsync write and the size got changed
-                 * non-transactionally, then we need to ensure that
-                 * the size change gets logged in a synchronous
-                 * transaction.
-                 */
-                tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
-                if ((error = xfs_trans_reserve(tp, 0,
-                                                XFS_SWRITE_LOG_RES(mp),
-                                                0, 0, 0))) {
-                        /* Transaction reserve failed */
-                        xfs_trans_cancel(tp, 0);
-                } else {
-                        /* Transaction reserve successful */
-                        xfs_ilock(ip, XFS_ILOCK_EXCL);
-                        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-                        xfs_trans_ihold(tp, ip);
-                        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-                        xfs_trans_set_sync(tp);
-                        error = xfs_trans_commit(tp, 0);
-                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                }
-        }
-        return error;
-}
-/*
 * Force a shutdown of the filesystem instantly while keeping
 * the filesystem consistent. We don't do an unmount here; just shutdown
 * the shop, make sure that absolutely nothing persistent happens to
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f76c003ec55d..f5e4874c37d8 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -68,7 +68,6 @@ xfs_get_extsz_hint(
 * Prototypes for functions in xfs_rw.c.
 */
 extern int xfs_write_clear_setuid(struct xfs_inode *ip);
-extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip);
 extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
 extern int xfs_bioerror(struct xfs_buf *bp);
 extern int xfs_bioerror_relse(struct xfs_buf *bp);
@@ -78,10 +77,4 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
 extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
                                xfs_buf_t *bp, xfs_daddr_t blkno);
-/*
- * Prototypes for functions in xfs_vnodeops.c.
- */
-extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
-                        int flags);
 #endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 775249a54f6f..ed47fc77759c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -68,7 +68,7 @@ typedef struct xfs_trans_header {
 #define XFS_TRANS_GROWFS                14
 #define XFS_TRANS_STRAT_WRITE           15
 #define XFS_TRANS_DIOSTRAT              16
-#define XFS_TRANS_WRITE_SYNC            17
+/* 17 was XFS_TRANS_WRITE_SYNC */
 #define XFS_TRANS_WRITEID               18
 #define XFS_TRANS_ADDAFORK              19
 #define XFS_TRANS_ATTRINVAL             20
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 8ee2f8c8b0a6..218829e6a152 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -307,7 +307,7 @@ xfs_trans_read_buf(
                        return (flags & XFS_BUF_TRYLOCK) ?
                                        EAGAIN : XFS_ERROR(ENOMEM);
-                if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
+                if (XFS_BUF_GETERROR(bp) != 0) {
                        xfs_ioerror_alert("xfs_trans_read_buf", mp,
                                          bp, blkno);
                        error = XFS_BUF_GETERROR(bp);
@@ -315,7 +315,7 @@ xfs_trans_read_buf(
                        return error;
                }
 #ifdef DEBUG
-                if (xfs_do_error && (bp != NULL)) {
+                if (xfs_do_error) {
                        if (xfs_error_target == target) {
                                if (((xfs_req_num++) % xfs_error_mod) == 0) {
                                        xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 23d276af2e0c..785ff101da0a 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -49,30 +49,7 @@ xfs_trans_inode_broot_debug(
 /*
- * Get and lock the inode for the caller if it is not already
+ * Get an inode and join it to the transaction.
- * locked within the given transaction.  If it is already locked
- * within the transaction, just increment its lock recursion count
- * and return a pointer to it.
- *
- * For an inode to be locked in a transaction, the inode lock, as
- * opposed to the io lock, must be taken exclusively.  This ensures
- * that the inode can be involved in only 1 transaction at a time.
- * Lock recursion is handled on the io lock, but only for lock modes
- * of equal or lesser strength.  That is, you can recur on the io lock
- * held EXCL with a SHARED request but not vice versa.  Also, if
- * the inode is already a part of the transaction then you cannot
- * go from not holding the io lock to having it EXCL or SHARED.
- *
- * Use the inode cache routine xfs_inode_incore() to find the inode
- * if it is already owned by this transaction.
- *
- * If we don't already own the inode, use xfs_iget() to get it.
- * Since the inode log item structure is embedded in the incore
- * inode structure and is initialized when the inode is brought
- * into memory, there is nothing to do with it here.
- *
- * If the given transaction pointer is NULL, just call xfs_iget().
- * This simplifies code which must handle both cases.
 */
 int
 xfs_trans_iget(
@@ -84,62 +61,11 @@ xfs_trans_iget(
        xfs_inode_t     **ipp)
 {
        int                     error;
-        xfs_inode_t             *ip;
-        /*
-         * If the transaction pointer is NULL, just call the normal
-         * xfs_iget().
-         */
-        if (tp == NULL)
-                return xfs_iget(mp, NULL, ino, flags, lock_flags, ipp, 0);
-        /*
-         * If we find the inode in core with this transaction
-         * pointer in its i_transp field, then we know we already
-         * have it locked.  In this case we just increment the lock
-         * recursion count and return the inode to the caller.
-         * Assert that the inode is already locked in the mode requested
-         * by the caller.  We cannot do lock promotions yet, so
-         * die if someone gets this wrong.
-         */
-        if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) {
-                /*
-                 * Make sure that the inode lock is held EXCL and
-                 * that the io lock is never upgraded when the inode
-                 * is already a part of the transaction.
-                 */
-                ASSERT(ip->i_itemp != NULL);
-                ASSERT(lock_flags & XFS_ILOCK_EXCL);
-                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-                ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
-                       xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-                ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
-                       (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
-                ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
-                       xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
-                ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
-                       (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
-                if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
-                        ip->i_itemp->ili_iolock_recur++;
-                }
-                if (lock_flags & XFS_ILOCK_EXCL) {
-                        ip->i_itemp->ili_ilock_recur++;
-                }
-                *ipp = ip;
-                return 0;
-        }
-        ASSERT(lock_flags & XFS_ILOCK_EXCL);
-        error = xfs_iget(tp->t_mountp, tp, ino, flags, lock_flags, &ip, 0);
-        if (error) {
-                return error;
-        }
-        ASSERT(ip != NULL);
-        xfs_trans_ijoin(tp, ip, lock_flags);
+        error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0);
-        *ipp = ip;
+        if (!error && tp)
-        return 0;
+                xfs_trans_ijoin(tp, *ipp, lock_flags);
+        return error;
 }
 /*
@@ -163,8 +89,6 @@ xfs_trans_ijoin(
                xfs_inode_item_init(ip, ip->i_mount);
        iip = ip->i_itemp;
        ASSERT(iip->ili_flags == 0);
-        ASSERT(iip->ili_ilock_recur == 0);
-        ASSERT(iip->ili_iolock_recur == 0);
        /*
         * Get a log_item_desc to point at the new item.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 492d75bae2bf..a434f287962d 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -611,7 +611,7 @@ xfs_fsync(
        xfs_inode_t     *ip)
 {
        xfs_trans_t     *tp;
-        int             error;
+        int             error = 0;
        int             log_flushed = 0, changed = 1;
        xfs_itrace_entry(ip);
@@ -619,14 +619,9 @@ xfs_fsync(
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return XFS_ERROR(EIO);
-        /* capture size updates in I/O completion before writing the inode. */
-        error = xfs_wait_on_pages(ip, 0, -1);
-        if (error)
-                return XFS_ERROR(error);
        /*
         * We always need to make sure that the required inode state is safe on
-         * disk.  The vnode might be clean but we still might need to force the
+         * disk.  The inode might be clean but we still might need to force the
         * log because of committed transactions that haven't hit the disk yet.
         * Likewise, there could be unflushed non-transactional changes to the
         * inode core that have to go to disk and this requires us to issue
@@ -638,7 +633,7 @@ xfs_fsync(
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
-        if (!(ip->i_update_size || ip->i_update_core)) {
+        if (!ip->i_update_core) {
                /*
                 * Timestamps/size haven't changed since last inode flush or
                 * inode transaction commit.  That means either nothing got
@@ -718,7 +713,7 @@ xfs_fsync(
 * when the link count isn't zero and by xfs_dm_punch_hole() when
 * punching a hole to EOF.
 */
-int
+STATIC int
 xfs_free_eofblocks(
        xfs_mount_t     *mp,
        xfs_inode_t     *ip,
@@ -1476,8 +1471,8 @@ xfs_create(
        if (error == ENOSPC) {
                /* flush outstanding delalloc blocks and retry */
                xfs_flush_inodes(dp);
-                error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
+                error = xfs_trans_reserve(tp, resblks, log_res, 0,
-                        XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+                                XFS_TRANS_PERM_LOG_RES, log_count);
        }
        if (error == ENOSPC) {
                /* No space at all so try a "no-allocation" reservation */