128 files changed, 4631 insertions, 6014 deletions
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cef584451113..378acdafa356 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1751,7 +1751,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
                        io_parms.pid = pid;
                        io_parms.tcon = pTcon;
                        io_parms.offset = *poffset;
-                        io_parms.length = len;
+                        io_parms.length = cur_len;
                        rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
                                         &read_data, &buf_type);
                        pSMBr = (struct smb_com_read_rsp *)read_data;
diff --git a/fs/dcache.c b/fs/dcache.c
index 3c34ac0e9a1b..be18598c7fd7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1735,8 +1735,6 @@ seqretry:
                tname = dentry->d_name.name;
                i = dentry->d_inode;
                prefetch(tname);
-                if (i)
-                        prefetch(i);
                /*
                 * This seqcount check is required to ensure name and
                 * len are loaded atomically, so as not to walk off the
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index abc49f292454..90e5997262ea 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -14,17 +14,9 @@
 #include "dlm_internal.h"
 #include "lock.h"
 #include "user.h"
-#include "ast.h"
-#define WAKE_ASTS  0
-static uint64_t                 ast_seq_count;
-static struct list_head         ast_queue;
-static spinlock_t               ast_queue_lock;
-static struct task_struct *     astd_task;
-static unsigned long            astd_wakeflags;
-static struct mutex             astd_running;
+static uint64_t                 dlm_cb_seq;
+static spinlock_t               dlm_cb_seq_spin;
 static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
 {
@@ -57,21 +49,13 @@ static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
        }
 }
-void dlm_del_ast(struct dlm_lkb *lkb)
-{
-        spin_lock(&ast_queue_lock);
-        if (!list_empty(&lkb->lkb_astqueue))
-                list_del_init(&lkb->lkb_astqueue);
-        spin_unlock(&ast_queue_lock);
-}
 int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
                         int status, uint32_t sbflags, uint64_t seq)
 {
        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
        uint64_t prev_seq;
        int prev_mode;
-        int i;
+        int i, rv;
        for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
                if (lkb->lkb_callbacks[i].seq)
@@ -100,7 +84,8 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
                                          mode,
                                          (unsigned long long)prev_seq,
                                          prev_mode);
-                                return 0;
+                                rv = 0;
+                                goto out;
                        }
                }
@@ -109,6 +94,7 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
                lkb->lkb_callbacks[i].mode = mode;
                lkb->lkb_callbacks[i].sb_status = status;
                lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
+                rv = 0;
                break;
        }
@@ -117,21 +103,24 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
                          lkb->lkb_id, (unsigned long long)seq,
                          flags, mode, status, sbflags);
                dlm_dump_lkb_callbacks(lkb);
-                return -1;
+                rv = -1;
+                goto out;
        }
+ out:
-        return 0;
+        return rv;
 }
 int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
                         struct dlm_callback *cb, int *resid)
 {
-        int i;
+        int i, rv;
        *resid = 0;
-        if (!lkb->lkb_callbacks[0].seq)
+        if (!lkb->lkb_callbacks[0].seq) {
-                return -ENOENT;
+                rv = -ENOENT;
+                goto out;
+        }
        /* oldest undelivered cb is callbacks[0] */
@@ -163,7 +152,8 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
                                  cb->mode,
                                  (unsigned long long)lkb->lkb_last_cast.seq,
                                  lkb->lkb_last_cast.mode);
-                        return 0;
+                        rv = 0;
+                        goto out;
                }
        }
@@ -176,171 +166,150 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
                memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
                lkb->lkb_last_bast_time = ktime_get();
        }
+        rv = 0;
-        return 0;
+ out:
+        return rv;
 }
-void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
-                 uint32_t sbflags)
+                uint32_t sbflags)
 {
-        uint64_t seq;
+        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+        uint64_t new_seq, prev_seq;
        int rv;
-        spin_lock(&ast_queue_lock);
+        spin_lock(&dlm_cb_seq_spin);
+        new_seq = ++dlm_cb_seq;
-        seq = ++ast_seq_count;
+        spin_unlock(&dlm_cb_seq_spin);
        if (lkb->lkb_flags & DLM_IFL_USER) {
-                spin_unlock(&ast_queue_lock);
+                dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
-                dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq);
                return;
        }
-        rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq);
+        mutex_lock(&lkb->lkb_cb_mutex);
-        if (rv < 0) {
+        prev_seq = lkb->lkb_callbacks[0].seq;
-                spin_unlock(&ast_queue_lock);
-                return;
-        }
-        if (list_empty(&lkb->lkb_astqueue)) {
+        rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq);
+        if (rv < 0)
+                goto out;
+        if (!prev_seq) {
                kref_get(&lkb->lkb_ref);
-                list_add_tail(&lkb->lkb_astqueue, &ast_queue);
-        }
-        spin_unlock(&ast_queue_lock);
-        set_bit(WAKE_ASTS, &astd_wakeflags);
+                if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
-        wake_up_process(astd_task);
+                        mutex_lock(&ls->ls_cb_mutex);
+                        list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
+                        mutex_unlock(&ls->ls_cb_mutex);
+                } else {
+                        queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
+                }
+        }
+ out:
+        mutex_unlock(&lkb->lkb_cb_mutex);
 }
-static void process_asts(void)
+void dlm_callback_work(struct work_struct *work)
 {
-        struct dlm_ls *ls = NULL;
+        struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work);
-        struct dlm_rsb *r = NULL;
+        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
-        struct dlm_lkb *lkb;
        void (*castfn) (void *astparam);
        void (*bastfn) (void *astparam, int mode);
        struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
        int i, rv, resid;
-repeat:
+        memset(&callbacks, 0, sizeof(callbacks));
-        spin_lock(&ast_queue_lock);
-        list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
-                r = lkb->lkb_resource;
-                ls = r->res_ls;
-                if (dlm_locking_stopped(ls))
+        mutex_lock(&lkb->lkb_cb_mutex);
-                        continue;
+        if (!lkb->lkb_callbacks[0].seq) {
+                /* no callback work exists, shouldn't happen */
-                /* we remove from astqueue list and remove everything in
+                log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
-                   lkb_callbacks before releasing the spinlock so empty
+                dlm_print_lkb(lkb);
-                   lkb_astqueue is always consistent with empty lkb_callbacks */
+                dlm_dump_lkb_callbacks(lkb);
+        }
-                list_del_init(&lkb->lkb_astqueue);
-                castfn = lkb->lkb_astfn;
-                bastfn = lkb->lkb_bastfn;
-                memset(&callbacks, 0, sizeof(callbacks));
+        for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+                rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
+                if (rv < 0)
+                        break;
+        }
-                for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+        if (resid) {
-                        rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
+                /* cbs remain, loop should have removed all, shouldn't happen */
-                        if (rv < 0)
+                log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
-                                break;
+                          resid);
-                }
+                dlm_print_lkb(lkb);
-                spin_unlock(&ast_queue_lock);
+                dlm_dump_lkb_callbacks(lkb);
+        }
+        mutex_unlock(&lkb->lkb_cb_mutex);
-                if (resid) {
+        castfn = lkb->lkb_astfn;
-                        /* shouldn't happen, for loop should have removed all */
+        bastfn = lkb->lkb_bastfn;
-                        log_error(ls, "callback resid %d lkb %x",
-                                  resid, lkb->lkb_id);
-                }
-                for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+        for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
-                        if (!callbacks[i].seq)
+                if (!callbacks[i].seq)
-                                break;
+                        break;
-                        if (callbacks[i].flags & DLM_CB_SKIP) {
+                if (callbacks[i].flags & DLM_CB_SKIP) {
-                                continue;
+                        continue;
-                        } else if (callbacks[i].flags & DLM_CB_BAST) {
+                } else if (callbacks[i].flags & DLM_CB_BAST) {
-                                bastfn(lkb->lkb_astparam, callbacks[i].mode);
+                        bastfn(lkb->lkb_astparam, callbacks[i].mode);
-                        } else if (callbacks[i].flags & DLM_CB_CAST) {
+                } else if (callbacks[i].flags & DLM_CB_CAST) {
-                                lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
+                        lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
-                                lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
+                        lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
-                                castfn(lkb->lkb_astparam);
+                        castfn(lkb->lkb_astparam);
-                        }
                }
-                /* removes ref for ast_queue, may cause lkb to be freed */
-                dlm_put_lkb(lkb);
-                cond_resched();
-                goto repeat;
        }
-        spin_unlock(&ast_queue_lock);
-}
-static inline int no_asts(void)
-{
-        int ret;
-        spin_lock(&ast_queue_lock);
+        /* undo kref_get from dlm_add_callback, may cause lkb to be freed */
-        ret = list_empty(&ast_queue);
+        dlm_put_lkb(lkb);
-        spin_unlock(&ast_queue_lock);
-        return ret;
 }
-static int dlm_astd(void *data)
+int dlm_callback_start(struct dlm_ls *ls)
 {
-        while (!kthread_should_stop()) {
+        ls->ls_callback_wq = alloc_workqueue("dlm_callback",
-                set_current_state(TASK_INTERRUPTIBLE);
+                                             WQ_UNBOUND |
-                if (!test_bit(WAKE_ASTS, &astd_wakeflags))
+                                             WQ_MEM_RECLAIM |
-                        schedule();
+                                             WQ_NON_REENTRANT,
-                set_current_state(TASK_RUNNING);
+                                             0);
+        if (!ls->ls_callback_wq) {
-                mutex_lock(&astd_running);
+                log_print("can't start dlm_callback workqueue");
-                if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags))
+                return -ENOMEM;
-                        process_asts();
-                mutex_unlock(&astd_running);
        }
        return 0;
 }
-void dlm_astd_wake(void)
+void dlm_callback_stop(struct dlm_ls *ls)
 {
-        if (!no_asts()) {
+        if (ls->ls_callback_wq)
-                set_bit(WAKE_ASTS, &astd_wakeflags);
+                destroy_workqueue(ls->ls_callback_wq);
-                wake_up_process(astd_task);
-        }
 }
-int dlm_astd_start(void)
+void dlm_callback_suspend(struct dlm_ls *ls)
 {
-        struct task_struct *p;
+        set_bit(LSFL_CB_DELAY, &ls->ls_flags);
-        int error = 0;
-        INIT_LIST_HEAD(&ast_queue);
-        spin_lock_init(&ast_queue_lock);
-        mutex_init(&astd_running);
-        p = kthread_run(dlm_astd, NULL, "dlm_astd");
-        if (IS_ERR(p))
-                error = PTR_ERR(p);
-        else
-                astd_task = p;
-        return error;
-}
-void dlm_astd_stop(void)
+        if (ls->ls_callback_wq)
-{
+                flush_workqueue(ls->ls_callback_wq);
-        kthread_stop(astd_task);
 }
-void dlm_astd_suspend(void)
+void dlm_callback_resume(struct dlm_ls *ls)
 {
-        mutex_lock(&astd_running);
+        struct dlm_lkb *lkb, *safe;
-}
+        int count = 0;
-void dlm_astd_resume(void)
+        clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
-{
-        mutex_unlock(&astd_running);
+        if (!ls->ls_callback_wq)
+                return;
+        mutex_lock(&ls->ls_cb_mutex);
+        list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
+                list_del_init(&lkb->lkb_cb_list);
+                queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
+                count++;
+        }
+        mutex_unlock(&ls->ls_cb_mutex);
+        log_debug(ls, "dlm_callback_resume %d", count);
 }
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 8aa89c9b5611..757b551c6820 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -18,14 +18,15 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
                         int status, uint32_t sbflags, uint64_t seq);
 int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
                         struct dlm_callback *cb, int *resid);
-void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
-                 uint32_t sbflags);
+                uint32_t sbflags);
-void dlm_astd_wake(void);
+void dlm_callback_work(struct work_struct *work);
-int dlm_astd_start(void);
+int dlm_callback_start(struct dlm_ls *ls);
-void dlm_astd_stop(void);
+void dlm_callback_stop(struct dlm_ls *ls);
-void dlm_astd_suspend(void);
+void dlm_callback_suspend(struct dlm_ls *ls);
-void dlm_astd_resume(void);
+void dlm_callback_resume(struct dlm_ls *ls);
 #endif
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 9b026ea8baa9..6cf72fcc0d0c 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -28,7 +28,8 @@
 * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
 * /config/dlm/<cluster>/comms/<comm>/nodeid
 * /config/dlm/<cluster>/comms/<comm>/local
- * /config/dlm/<cluster>/comms/<comm>/addr
+ * /config/dlm/<cluster>/comms/<comm>/addr      (write only)
+ * /config/dlm/<cluster>/comms/<comm>/addr_list (read only)
 * The <cluster> level is useless, but I haven't figured out how to avoid it.
 */
@@ -80,6 +81,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
                                size_t len);
 static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
                                size_t len);
+static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf);
 static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
 static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
                                size_t len);
@@ -92,7 +94,6 @@ struct dlm_cluster {
        unsigned int cl_tcp_port;
        unsigned int cl_buffer_size;
        unsigned int cl_rsbtbl_size;
-        unsigned int cl_lkbtbl_size;
        unsigned int cl_dirtbl_size;
        unsigned int cl_recover_timer;
        unsigned int cl_toss_secs;
@@ -101,13 +102,13 @@ struct dlm_cluster {
        unsigned int cl_protocol;
        unsigned int cl_timewarn_cs;
        unsigned int cl_waitwarn_us;
+        unsigned int cl_new_rsb_count;
 };
 enum {
        CLUSTER_ATTR_TCP_PORT = 0,
        CLUSTER_ATTR_BUFFER_SIZE,
        CLUSTER_ATTR_RSBTBL_SIZE,
-        CLUSTER_ATTR_LKBTBL_SIZE,
        CLUSTER_ATTR_DIRTBL_SIZE,
        CLUSTER_ATTR_RECOVER_TIMER,
        CLUSTER_ATTR_TOSS_SECS,
@@ -116,6 +117,7 @@ enum {
        CLUSTER_ATTR_PROTOCOL,
        CLUSTER_ATTR_TIMEWARN_CS,
        CLUSTER_ATTR_WAITWARN_US,
+        CLUSTER_ATTR_NEW_RSB_COUNT,
 };
 struct cluster_attribute {
@@ -160,7 +162,6 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
 CLUSTER_ATTR(tcp_port, 1);
 CLUSTER_ATTR(buffer_size, 1);
 CLUSTER_ATTR(rsbtbl_size, 1);
-CLUSTER_ATTR(lkbtbl_size, 1);
 CLUSTER_ATTR(dirtbl_size, 1);
 CLUSTER_ATTR(recover_timer, 1);
 CLUSTER_ATTR(toss_secs, 1);
@@ -169,12 +170,12 @@ CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
 CLUSTER_ATTR(timewarn_cs, 1);
 CLUSTER_ATTR(waitwarn_us, 0);
+CLUSTER_ATTR(new_rsb_count, 0);
 static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
        [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
        [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
-        [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
        [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
        [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
        [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
@@ -183,6 +184,7 @@ static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
        [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
        [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
+        [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
        NULL,
 };
@@ -190,6 +192,7 @@ enum {
        COMM_ATTR_NODEID = 0,
        COMM_ATTR_LOCAL,
        COMM_ATTR_ADDR,
+        COMM_ATTR_ADDR_LIST,
 };
 struct comm_attribute {
@@ -217,14 +220,22 @@ static struct comm_attribute comm_attr_local = {
 static struct comm_attribute comm_attr_addr = {
        .attr   = { .ca_owner = THIS_MODULE,
                    .ca_name = "addr",
-                    .ca_mode = S_IRUGO | S_IWUSR },
+                    .ca_mode = S_IWUSR },
        .store  = comm_addr_write,
 };
+static struct comm_attribute comm_attr_addr_list = {
+        .attr   = { .ca_owner = THIS_MODULE,
+                    .ca_name = "addr_list",
+                    .ca_mode = S_IRUGO },
+        .show   = comm_addr_list_read,
+};
 static struct configfs_attribute *comm_attrs[] = {
        [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr,
        [COMM_ATTR_LOCAL] = &comm_attr_local.attr,
        [COMM_ATTR_ADDR] = &comm_attr_addr.attr,
+        [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr,
        NULL,
 };
@@ -435,7 +446,6 @@ static struct config_group *make_cluster(struct config_group *g,
        cl->cl_tcp_port = dlm_config.ci_tcp_port;
        cl->cl_buffer_size = dlm_config.ci_buffer_size;
        cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
-        cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
        cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
        cl->cl_recover_timer = dlm_config.ci_recover_timer;
        cl->cl_toss_secs = dlm_config.ci_toss_secs;
@@ -444,6 +454,7 @@ static struct config_group *make_cluster(struct config_group *g,
        cl->cl_protocol = dlm_config.ci_protocol;
        cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
        cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+        cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
        space_list = &sps->ss_group;
        comm_list = &cms->cs_group;
@@ -720,6 +731,50 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
        return len;
 }
+static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf)
+{
+        ssize_t s;
+        ssize_t allowance;
+        int i;
+        struct sockaddr_storage *addr;
+        struct sockaddr_in *addr_in;
+        struct sockaddr_in6 *addr_in6;
+        
+        /* Taken from ip6_addr_string() defined in lib/vsprintf.c */
+        char buf0[sizeof("AF_INET6      xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")];
+        
+        /* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */
+        allowance = 4096;
+        buf[0] = '\0';
+        for (i = 0; i < cm->addr_count; i++) {
+                addr = cm->addr[i];
+                switch(addr->ss_family) {
+                case AF_INET:
+                        addr_in = (struct sockaddr_in *)addr;
+                        s = sprintf(buf0, "AF_INET      %pI4\n", &addr_in->sin_addr.s_addr);
+                        break;
+                case AF_INET6:
+                        addr_in6 = (struct sockaddr_in6 *)addr;
+                        s = sprintf(buf0, "AF_INET6     %pI6\n", &addr_in6->sin6_addr);
+                        break;
+                default:
+                        s = sprintf(buf0, "%s\n", "<UNKNOWN>");
+                        break;
+                }
+                allowance -= s;
+                if (allowance >= 0)
+                        strcat(buf, buf0);
+                else {
+                        allowance += s;
+                        break;
+                }
+        }
+        return 4096 - allowance;
+}
 static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
                         char *buf)
 {
@@ -983,7 +1038,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_TCP_PORT       21064
 #define DEFAULT_BUFFER_SIZE     4096
 #define DEFAULT_RSBTBL_SIZE     1024
-#define DEFAULT_LKBTBL_SIZE     1024
 #define DEFAULT_DIRTBL_SIZE     1024
 #define DEFAULT_RECOVER_TIMER      5
 #define DEFAULT_TOSS_SECS         10
@@ -992,12 +1046,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_PROTOCOL           0
 #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 #define DEFAULT_WAITWARN_US        0
+#define DEFAULT_NEW_RSB_COUNT    128
 struct dlm_config_info dlm_config = {
        .ci_tcp_port = DEFAULT_TCP_PORT,
        .ci_buffer_size = DEFAULT_BUFFER_SIZE,
        .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
-        .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
        .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
        .ci_recover_timer = DEFAULT_RECOVER_TIMER,
        .ci_toss_secs = DEFAULT_TOSS_SECS,
@@ -1005,6 +1059,7 @@ struct dlm_config_info dlm_config = {
        .ci_log_debug = DEFAULT_LOG_DEBUG,
        .ci_protocol = DEFAULT_PROTOCOL,
        .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
-        .ci_waitwarn_us = DEFAULT_WAITWARN_US
+        .ci_waitwarn_us = DEFAULT_WAITWARN_US,
+        .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT
 };
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index dd0ce24d5a80..3099d0dd26c0 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -20,7 +20,6 @@ struct dlm_config_info {
        int ci_tcp_port;
        int ci_buffer_size;
        int ci_rsbtbl_size;
-        int ci_lkbtbl_size;
        int ci_dirtbl_size;
        int ci_recover_timer;
        int ci_toss_secs;
@@ -29,6 +28,7 @@ struct dlm_config_info {
        int ci_protocol;
        int ci_timewarn_cs;
        int ci_waitwarn_us;
+        int ci_new_rsb_count;
 };
 extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 0262451eb9c6..fe2860c02449 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -37,6 +37,7 @@
 #include <linux/jhash.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
+#include <linux/idr.h>
 #include <asm/uaccess.h>
 #include <linux/dlm.h>
@@ -52,7 +53,6 @@ struct dlm_ls;
 struct dlm_lkb;
 struct dlm_rsb;
 struct dlm_member;
-struct dlm_lkbtable;
 struct dlm_rsbtable;
 struct dlm_dirtable;
 struct dlm_direntry;
@@ -108,11 +108,6 @@ struct dlm_rsbtable {
        spinlock_t              lock;
 };
-struct dlm_lkbtable {
-        struct list_head        list;
-        rwlock_t                lock;
-        uint16_t                counter;
-};
 /*
 * Lockspace member (per node in a ls)
@@ -248,17 +243,18 @@ struct dlm_lkb {
        int8_t                  lkb_wait_count;
        int                     lkb_wait_nodeid; /* for debugging */
-        struct list_head        lkb_idtbl_list; /* lockspace lkbtbl */
        struct list_head        lkb_statequeue; /* rsb g/c/w list */
        struct list_head        lkb_rsb_lookup; /* waiting for rsb lookup */
        struct list_head        lkb_wait_reply; /* waiting for remote reply */
-        struct list_head        lkb_astqueue;   /* need ast to be sent */
        struct list_head        lkb_ownqueue;   /* list of locks for a process */
        struct list_head        lkb_time_list;
        ktime_t                 lkb_timestamp;
        ktime_t                 lkb_wait_time;
        unsigned long           lkb_timeout_cs;
+        struct mutex            lkb_cb_mutex;
+        struct work_struct      lkb_cb_work;
+        struct list_head        lkb_cb_list; /* for ls_cb_delay or proc->asts */
        struct dlm_callback     lkb_callbacks[DLM_CALLBACKS_SIZE];
        struct dlm_callback     lkb_last_cast;
        struct dlm_callback     lkb_last_bast;
@@ -299,7 +295,7 @@ struct dlm_rsb {
        int                     res_recover_locks_count;
        char                    *res_lvbptr;
-        char                    res_name[1];
+        char                    res_name[DLM_RESNAME_MAXLEN+1];
 };
 /* find_rsb() flags */
@@ -465,12 +461,12 @@ struct dlm_ls {
        unsigned long           ls_scan_time;
        struct kobject          ls_kobj;
+        struct idr              ls_lkbidr;
+        spinlock_t              ls_lkbidr_spin;
        struct dlm_rsbtable     *ls_rsbtbl;
        uint32_t                ls_rsbtbl_size;
-        struct dlm_lkbtable     *ls_lkbtbl;
-        uint32_t                ls_lkbtbl_size;
        struct dlm_dirtable     *ls_dirtbl;
        uint32_t                ls_dirtbl_size;
@@ -483,6 +479,10 @@ struct dlm_ls {
        struct mutex            ls_timeout_mutex;
        struct list_head        ls_timeout;
+        spinlock_t              ls_new_rsb_spin;
+        int                     ls_new_rsb_count;
+        struct list_head        ls_new_rsb;     /* new rsb structs */
        struct list_head        ls_nodes;       /* current nodes in ls */
        struct list_head        ls_nodes_gone;  /* dead node list, recovery */
        int                     ls_num_nodes;   /* number of nodes in ls */
@@ -506,8 +506,12 @@ struct dlm_ls {
        struct miscdevice       ls_device;
+        struct workqueue_struct *ls_callback_wq;
        /* recovery related */
+        struct mutex            ls_cb_mutex;
+        struct list_head        ls_cb_delay; /* save for queue_work later */
        struct timer_list       ls_timer;
        struct task_struct      *ls_recoverd_task;
        struct mutex            ls_recoverd_active;
@@ -544,6 +548,7 @@ struct dlm_ls {
 #define LSFL_RCOM_WAIT          4
 #define LSFL_UEVENT_WAIT        5
 #define LSFL_TIMEWARN           6
+#define LSFL_CB_DELAY           7
 /* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index f71d0b5abd95..83b5e32514e1 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -305,7 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
                rv = -EDEADLK;
        }
-        dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
+        dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
 }
 static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -319,7 +319,7 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
        if (is_master_copy(lkb)) {
                send_bast(r, lkb, rqmode);
        } else {
-                dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0);
+                dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0);
        }
 }
@@ -327,19 +327,68 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
 * Basic operations on rsb's and lkb's
 */
-static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
+static int pre_rsb_struct(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r1, *r2;
+        int count = 0;
+        spin_lock(&ls->ls_new_rsb_spin);
+        if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) {
+                spin_unlock(&ls->ls_new_rsb_spin);
+                return 0;
+        }
+        spin_unlock(&ls->ls_new_rsb_spin);
+        r1 = dlm_allocate_rsb(ls);
+        r2 = dlm_allocate_rsb(ls);
+        spin_lock(&ls->ls_new_rsb_spin);
+        if (r1) {
+                list_add(&r1->res_hashchain, &ls->ls_new_rsb);
+                ls->ls_new_rsb_count++;
+        }
+        if (r2) {
+                list_add(&r2->res_hashchain, &ls->ls_new_rsb);
+                ls->ls_new_rsb_count++;
+        }
+        count = ls->ls_new_rsb_count;
+        spin_unlock(&ls->ls_new_rsb_spin);
+        if (!count)
+                return -ENOMEM;
+        return 0;
+}
+/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can
+   unlock any spinlocks, go back and call pre_rsb_struct again.
+   Otherwise, take an rsb off the list and return it. */
+static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
+                          struct dlm_rsb **r_ret)
 {
        struct dlm_rsb *r;
+        int count;
-        r = dlm_allocate_rsb(ls, len);
+        spin_lock(&ls->ls_new_rsb_spin);
-        if (!r)
+        if (list_empty(&ls->ls_new_rsb)) {
-                return NULL;
+                count = ls->ls_new_rsb_count;
+                spin_unlock(&ls->ls_new_rsb_spin);
+                log_debug(ls, "find_rsb retry %d %d %s",
+                          count, dlm_config.ci_new_rsb_count, name);
+                return -EAGAIN;
+        }
+        r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
+        list_del(&r->res_hashchain);
+        ls->ls_new_rsb_count--;
+        spin_unlock(&ls->ls_new_rsb_spin);
        r->res_ls = ls;
        r->res_length = len;
        memcpy(r->res_name, name, len);
        mutex_init(&r->res_mutex);
+        INIT_LIST_HEAD(&r->res_hashchain);
        INIT_LIST_HEAD(&r->res_lookup);
        INIT_LIST_HEAD(&r->res_grantqueue);
        INIT_LIST_HEAD(&r->res_convertqueue);
@@ -347,7 +396,8 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
        INIT_LIST_HEAD(&r->res_root_list);
        INIT_LIST_HEAD(&r->res_recover_list);
-        return r;
+        *r_ret = r;
+        return 0;
 }
 static int search_rsb_list(struct list_head *head, char *name, int len,
@@ -405,16 +455,6 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
        return error;
 }
-static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
-                      unsigned int flags, struct dlm_rsb **r_ret)
-{
-        int error;
-        spin_lock(&ls->ls_rsbtbl[b].lock);
-        error = _search_rsb(ls, name, len, b, flags, r_ret);
-        spin_unlock(&ls->ls_rsbtbl[b].lock);
-        return error;
-}
 /*
 * Find rsb in rsbtbl and potentially create/add one
 *
@@ -432,35 +472,48 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
 static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
                    unsigned int flags, struct dlm_rsb **r_ret)
 {
-        struct dlm_rsb *r = NULL, *tmp;
+        struct dlm_rsb *r = NULL;
        uint32_t hash, bucket;
-        int error = -EINVAL;
+        int error;
-        if (namelen > DLM_RESNAME_MAXLEN)
+        if (namelen > DLM_RESNAME_MAXLEN) {
+                error = -EINVAL;
                goto out;
+        }
        if (dlm_no_directory(ls))
                flags |= R_CREATE;
-        error = 0;
        hash = jhash(name, namelen, 0);
        bucket = hash & (ls->ls_rsbtbl_size - 1);
-        error = search_rsb(ls, name, namelen, bucket, flags, &r);
+ retry:
+        if (flags & R_CREATE) {
+                error = pre_rsb_struct(ls);
+                if (error < 0)
+                        goto out;
+        }
+        spin_lock(&ls->ls_rsbtbl[bucket].lock);
+        error = _search_rsb(ls, name, namelen, bucket, flags, &r);
        if (!error)
-                goto out;
+                goto out_unlock;
        if (error == -EBADR && !(flags & R_CREATE))
-                goto out;
+                goto out_unlock;
        /* the rsb was found but wasn't a master copy */
        if (error == -ENOTBLK)
-                goto out;
+                goto out_unlock;
-        error = -ENOMEM;
+        error = get_rsb_struct(ls, name, namelen, &r);
-        r = create_rsb(ls, name, namelen);
+        if (error == -EAGAIN) {
-        if (!r)
+                spin_unlock(&ls->ls_rsbtbl[bucket].lock);
-                goto out;
+                goto retry;
+        }
+        if (error)
+                goto out_unlock;
        r->res_hash = hash;
        r->res_bucket = bucket;
@@ -474,18 +527,10 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
                        nodeid = 0;
                r->res_nodeid = nodeid;
        }
-        spin_lock(&ls->ls_rsbtbl[bucket].lock);
-        error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
-        if (!error) {
-                spin_unlock(&ls->ls_rsbtbl[bucket].lock);
-                dlm_free_rsb(r);
-                r = tmp;
-                goto out;
-        }
        list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
-        spin_unlock(&ls->ls_rsbtbl[bucket].lock);
        error = 0;
+ out_unlock:
+        spin_unlock(&ls->ls_rsbtbl[bucket].lock);
 out:
        *r_ret = r;
        return error;
@@ -580,9 +625,8 @@ static void detach_lkb(struct dlm_lkb *lkb)
 static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 {
-        struct dlm_lkb *lkb, *tmp;
+        struct dlm_lkb *lkb;
-        uint32_t lkid = 0;
+        int rv, id;
-        uint16_t bucket;
        lkb = dlm_allocate_lkb(ls);
        if (!lkb)
@@ -594,60 +638,42 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
        INIT_LIST_HEAD(&lkb->lkb_ownqueue);
        INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
        INIT_LIST_HEAD(&lkb->lkb_time_list);
-        INIT_LIST_HEAD(&lkb->lkb_astqueue);
+        INIT_LIST_HEAD(&lkb->lkb_cb_list);
+        mutex_init(&lkb->lkb_cb_mutex);
+        INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
-        get_random_bytes(&bucket, sizeof(bucket));
+ retry:
-        bucket &= (ls->ls_lkbtbl_size - 1);
+        rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
+        if (!rv)
-        write_lock(&ls->ls_lkbtbl[bucket].lock);
+                return -ENOMEM;
-        /* counter can roll over so we must verify lkid is not in use */
+        spin_lock(&ls->ls_lkbidr_spin);
+        rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id);
+        if (!rv)
+                lkb->lkb_id = id;
+        spin_unlock(&ls->ls_lkbidr_spin);
-        while (lkid == 0) {
+        if (rv == -EAGAIN)
-                lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++;
+                goto retry;
-                list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list,
+        if (rv < 0) {
-                                    lkb_idtbl_list) {
+                log_error(ls, "create_lkb idr error %d", rv);
-                        if (tmp->lkb_id != lkid)
+                return rv;
-                                continue;
-                        lkid = 0;
-                        break;
-                }
        }
-        lkb->lkb_id = lkid;
-        list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
-        write_unlock(&ls->ls_lkbtbl[bucket].lock);
        *lkb_ret = lkb;
        return 0;
 }
-static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
-{
-        struct dlm_lkb *lkb;
-        uint16_t bucket = (lkid >> 16);
-        list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
-                if (lkb->lkb_id == lkid)
-                        return lkb;
-        }
-        return NULL;
-}
 static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
 {
        struct dlm_lkb *lkb;
-        uint16_t bucket = (lkid >> 16);
-        if (bucket >= ls->ls_lkbtbl_size)
-                return -EBADSLT;
-        read_lock(&ls->ls_lkbtbl[bucket].lock);
+        spin_lock(&ls->ls_lkbidr_spin);
-        lkb = __find_lkb(ls, lkid);
+        lkb = idr_find(&ls->ls_lkbidr, lkid);
        if (lkb)
                kref_get(&lkb->lkb_ref);
-        read_unlock(&ls->ls_lkbtbl[bucket].lock);
+        spin_unlock(&ls->ls_lkbidr_spin);
        *lkb_ret = lkb;
        return lkb ? 0 : -ENOENT;
@@ -668,12 +694,12 @@ static void kill_lkb(struct kref *kref)
 static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
 {
-        uint16_t bucket = (lkb->lkb_id >> 16);
+        uint32_t lkid = lkb->lkb_id;
-        write_lock(&ls->ls_lkbtbl[bucket].lock);
+        spin_lock(&ls->ls_lkbidr_spin);
        if (kref_put(&lkb->lkb_ref, kill_lkb)) {
-                list_del(&lkb->lkb_idtbl_list);
+                idr_remove(&ls->ls_lkbidr, lkid);
-                write_unlock(&ls->ls_lkbtbl[bucket].lock);
+                spin_unlock(&ls->ls_lkbidr_spin);
                detach_lkb(lkb);
@@ -683,7 +709,7 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
                dlm_free_lkb(lkb);
                return 1;
        } else {
-                write_unlock(&ls->ls_lkbtbl[bucket].lock);
+                spin_unlock(&ls->ls_lkbidr_spin);
                return 0;
        }
 }
@@ -849,9 +875,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
                if (!num_nodes) {
                        num_nodes = ls->ls_num_nodes;
-                        warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
+                        warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL);
-                        if (warned)
-                                memset(warned, 0, num_nodes * sizeof(int));
                }
                if (!warned)
                        continue;
@@ -863,9 +887,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
                          dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
        }
        mutex_unlock(&ls->ls_waiters_mutex);
+        kfree(warned);
-        if (warned)
-                kfree(warned);
        if (debug_expired)
                log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
@@ -2401,9 +2423,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
        if (deadlk) {
                /* it's left on the granted queue */
-                log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
-                          lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
-                          lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
                revert_lock(r, lkb);
                queue_cast(r, lkb, -EDEADLK);
                error = -EDEADLK;
@@ -3993,8 +4012,6 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
        default:
                log_error(ls, "unknown message type %d", ms->m_type);
        }
-        dlm_astd_wake();
 }
 /* If the lockspace is in recovery mode (locking stopped), then normal
@@ -4133,7 +4150,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
        struct dlm_message *ms_stub;
        int wait_type, stub_unlock_result, stub_cancel_result;
-        ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
+        ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL);
        if (!ms_stub) {
                log_error(ls, "dlm_recover_waiters_pre no mem");
                return;
@@ -4809,7 +4826,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
                goto out_put;
        spin_lock(&ua->proc->locks_spin);
-        /* dlm_user_add_ast() may have already taken lkb off the proc list */
+        /* dlm_user_add_cb() may have already taken lkb off the proc list */
        if (!list_empty(&lkb->lkb_ownqueue))
                list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
        spin_unlock(&ua->proc->locks_spin);
@@ -4946,7 +4963,7 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
 /* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
   (which does lock_rsb) due to deadlock with receiving a message that does
-   lock_rsb followed by dlm_user_add_ast() */
+   lock_rsb followed by dlm_user_add_cb() */
 static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
                                     struct dlm_user_proc *proc)
@@ -4969,7 +4986,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
        return lkb;
 }
-/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which
+/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
   1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
   which we clear here. */
@@ -5011,10 +5028,10 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
                dlm_put_lkb(lkb);
        }
-        list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+        list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
                memset(&lkb->lkb_callbacks, 0,
                       sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
-                list_del_init(&lkb->lkb_astqueue);
+                list_del_init(&lkb->lkb_cb_list);
                dlm_put_lkb(lkb);
        }
@@ -5053,10 +5070,10 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
        spin_unlock(&proc->locks_spin);
        spin_lock(&proc->asts_spin);
-        list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+        list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
                memset(&lkb->lkb_callbacks, 0,
                       sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
-                list_del_init(&lkb->lkb_astqueue);
+                list_del_init(&lkb->lkb_cb_list);
                dlm_put_lkb(lkb);
        }
        spin_unlock(&proc->asts_spin);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 14cbf4099753..a1d8f1af144b 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -15,7 +15,6 @@
 #include "lockspace.h"
 #include "member.h"
 #include "recoverd.h"
-#include "ast.h"
 #include "dir.h"
 #include "lowcomms.h"
 #include "config.h"
@@ -24,6 +23,7 @@
 #include "recover.h"
 #include "requestqueue.h"
 #include "user.h"
+#include "ast.h"
 static int                      ls_count;
 static struct mutex             ls_lock;
@@ -359,17 +359,10 @@ static int threads_start(void)
 {
        int error;
-        /* Thread which process lock requests for all lockspace's */
-        error = dlm_astd_start();
-        if (error) {
-                log_print("cannot start dlm_astd thread %d", error);
-                goto fail;
-        }
        error = dlm_scand_start();
        if (error) {
                log_print("cannot start dlm_scand thread %d", error);
-                goto astd_fail;
+                goto fail;
        }
        /* Thread for sending/receiving messages for all lockspace's */
@@ -383,8 +376,6 @@ static int threads_start(void)
 scand_fail:
        dlm_scand_stop();
- astd_fail:
-        dlm_astd_stop();
 fail:
        return error;
 }
@@ -393,7 +384,6 @@ static void threads_stop(void)
 {
        dlm_scand_stop();
        dlm_lowcomms_stop();
-        dlm_astd_stop();
 }
 static int new_lockspace(const char *name, int namelen, void **lockspace,
@@ -463,7 +453,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
        size = dlm_config.ci_rsbtbl_size;
        ls->ls_rsbtbl_size = size;
-        ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
+        ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
        if (!ls->ls_rsbtbl)
                goto out_lsfree;
        for (i = 0; i < size; i++) {
@@ -472,22 +462,13 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
                spin_lock_init(&ls->ls_rsbtbl[i].lock);
        }
-        size = dlm_config.ci_lkbtbl_size;
+        idr_init(&ls->ls_lkbidr);
-        ls->ls_lkbtbl_size = size;
+        spin_lock_init(&ls->ls_lkbidr_spin);
-        ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
-        if (!ls->ls_lkbtbl)
-                goto out_rsbfree;
-        for (i = 0; i < size; i++) {
-                INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
-                rwlock_init(&ls->ls_lkbtbl[i].lock);
-                ls->ls_lkbtbl[i].counter = 1;
-        }
        size = dlm_config.ci_dirtbl_size;
        ls->ls_dirtbl_size = size;
-        ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
+        ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size);
        if (!ls->ls_dirtbl)
                goto out_lkbfree;
        for (i = 0; i < size; i++) {
@@ -502,6 +483,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
        INIT_LIST_HEAD(&ls->ls_timeout);
        mutex_init(&ls->ls_timeout_mutex);
+        INIT_LIST_HEAD(&ls->ls_new_rsb);
+        spin_lock_init(&ls->ls_new_rsb_spin);
        INIT_LIST_HEAD(&ls->ls_nodes);
        INIT_LIST_HEAD(&ls->ls_nodes_gone);
        ls->ls_num_nodes = 0;
@@ -520,6 +504,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
        init_completion(&ls->ls_members_done);
        ls->ls_members_result = -1;
+        mutex_init(&ls->ls_cb_mutex);
+        INIT_LIST_HEAD(&ls->ls_cb_delay);
        ls->ls_recoverd_task = NULL;
        mutex_init(&ls->ls_recoverd_active);
        spin_lock_init(&ls->ls_recover_lock);
@@ -553,18 +540,26 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
        list_add(&ls->ls_list, &lslist);
        spin_unlock(&lslist_lock);
+        if (flags & DLM_LSFL_FS) {
+                error = dlm_callback_start(ls);
+                if (error) {
+                        log_error(ls, "can't start dlm_callback %d", error);
+                        goto out_delist;
+                }
+        }
        /* needs to find ls in lslist */
        error = dlm_recoverd_start(ls);
        if (error) {
                log_error(ls, "can't start dlm_recoverd %d", error);
-                goto out_delist;
+                goto out_callback;
        }
        ls->ls_kobj.kset = dlm_kset;
        error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
                                     "%s", ls->ls_name);
        if (error)
-                goto out_stop;
+                goto out_recoverd;
        kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
        /* let kobject handle freeing of ls if there's an error */
@@ -578,7 +573,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
        error = do_uevent(ls, 1);
        if (error)
-                goto out_stop;
+                goto out_recoverd;
        wait_for_completion(&ls->ls_members_done);
        error = ls->ls_members_result;
@@ -595,19 +590,20 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
        do_uevent(ls, 0);
        dlm_clear_members(ls);
        kfree(ls->ls_node_array);
- out_stop:
+ out_recoverd:
        dlm_recoverd_stop(ls);
+ out_callback:
+        dlm_callback_stop(ls);
 out_delist:
        spin_lock(&lslist_lock);
        list_del(&ls->ls_list);
        spin_unlock(&lslist_lock);
        kfree(ls->ls_recover_buf);
 out_dirfree:
-        kfree(ls->ls_dirtbl);
+        vfree(ls->ls_dirtbl);
 out_lkbfree:
-        kfree(ls->ls_lkbtbl);
+        idr_destroy(&ls->ls_lkbidr);
- out_rsbfree:
+        vfree(ls->ls_rsbtbl);
-        kfree(ls->ls_rsbtbl);
 out_lsfree:
        if (do_unreg)
                kobject_put(&ls->ls_kobj);
@@ -641,50 +637,64 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
        return error;
 }
-/* Return 1 if the lockspace still has active remote locks,
+static int lkb_idr_is_local(int id, void *p, void *data)
- *        2 if the lockspace still has active local locks.
+{
- */
+        struct dlm_lkb *lkb = p;
-static int lockspace_busy(struct dlm_ls *ls)
-{
+        if (!lkb->lkb_nodeid)
-        int i, lkb_found = 0;
+                return 1;
-        struct dlm_lkb *lkb;
+        return 0;
+}
-        /* NOTE: We check the lockidtbl here rather than the resource table.
-           This is because there may be LKBs queued as ASTs that have been
+static int lkb_idr_is_any(int id, void *p, void *data)
-           unlinked from their RSBs and are pending deletion once the AST has
+{
-           been delivered */
+        return 1;
+}
-        for (i = 0; i < ls->ls_lkbtbl_size; i++) {
-                read_lock(&ls->ls_lkbtbl[i].lock);
+static int lkb_idr_free(int id, void *p, void *data)
-                if (!list_empty(&ls->ls_lkbtbl[i].list)) {
+{
-                        lkb_found = 1;
+        struct dlm_lkb *lkb = p;
-                        list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
-                                            lkb_idtbl_list) {
+        if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
-                                if (!lkb->lkb_nodeid) {
+                dlm_free_lvb(lkb->lkb_lvbptr);
-                                        read_unlock(&ls->ls_lkbtbl[i].lock);
-                                        return 2;
+        dlm_free_lkb(lkb);
-                                }
+        return 0;
-                        }
+}
-                }
-                read_unlock(&ls->ls_lkbtbl[i].lock);
+/* NOTE: We check the lkbidr here rather than the resource table.
+   This is because there may be LKBs queued as ASTs that have been unlinked
+   from their RSBs and are pending deletion once the AST has been delivered */
+static int lockspace_busy(struct dlm_ls *ls, int force)
+{
+        int rv;
+        spin_lock(&ls->ls_lkbidr_spin);
+        if (force == 0) {
+                rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
+        } else if (force == 1) {
+                rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
+        } else {
+                rv = 0;
        }
-        return lkb_found;
+        spin_unlock(&ls->ls_lkbidr_spin);
+        return rv;
 }
 static int release_lockspace(struct dlm_ls *ls, int force)
 {
-        struct dlm_lkb *lkb;
        struct dlm_rsb *rsb;
        struct list_head *head;
        int i, busy, rv;
-        busy = lockspace_busy(ls);
+        busy = lockspace_busy(ls, force);
        spin_lock(&lslist_lock);
        if (ls->ls_create_count == 1) {
-                if (busy > force)
+                if (busy) {
                        rv = -EBUSY;
-                else {
+                } else {
                        /* remove_lockspace takes ls off lslist */
                        ls->ls_create_count = 0;
                        rv = 0;
@@ -708,12 +718,12 @@ static int release_lockspace(struct dlm_ls *ls, int force)
        dlm_recoverd_stop(ls);
+        dlm_callback_stop(ls);
        remove_lockspace(ls);
        dlm_delete_debug_file(ls);
-        dlm_astd_suspend();
        kfree(ls->ls_recover_buf);
        /*
@@ -721,31 +731,15 @@ static int release_lockspace(struct dlm_ls *ls, int force)
         */
        dlm_dir_clear(ls);
-        kfree(ls->ls_dirtbl);
+        vfree(ls->ls_dirtbl);
        /*
-         * Free all lkb's on lkbtbl[] lists.
+         * Free all lkb's in idr
         */
-        for (i = 0; i < ls->ls_lkbtbl_size; i++) {
+        idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
-                head = &ls->ls_lkbtbl[i].list;
+        idr_remove_all(&ls->ls_lkbidr);
-                while (!list_empty(head)) {
+        idr_destroy(&ls->ls_lkbidr);
-                        lkb = list_entry(head->next, struct dlm_lkb,
-                                         lkb_idtbl_list);
-                        list_del(&lkb->lkb_idtbl_list);
-                        dlm_del_ast(lkb);
-                        if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
-                                dlm_free_lvb(lkb->lkb_lvbptr);
-                        dlm_free_lkb(lkb);
-                }
-        }
-        dlm_astd_resume();
-        kfree(ls->ls_lkbtbl);
        /*
         * Free all rsb's on rsbtbl[] lists
@@ -770,7 +764,14 @@ static int release_lockspace(struct dlm_ls *ls, int force)
                }
        }
-        kfree(ls->ls_rsbtbl);
+        vfree(ls->ls_rsbtbl);
+        while (!list_empty(&ls->ls_new_rsb)) {
+                rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
+                                       res_hashchain);
+                list_del(&rsb->res_hashchain);
+                dlm_free_rsb(rsb);
+        }
        /*
         * Free structures on any other lists
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5e2c71f05e46..990626e7da80 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -512,12 +512,10 @@ static void process_sctp_notification(struct connection *con,
                        }
                        make_sockaddr(&prim.ssp_addr, 0, &addr_len);
                        if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
-                                int i;
                                unsigned char *b=(unsigned char *)&prim.ssp_addr;
                                log_print("reject connect from unknown addr");
-                                for (i=0; i<sizeof(struct sockaddr_storage);i++)
+                                print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
-                                        printk("%02x ", b[i]);
+                                                     b, sizeof(struct sockaddr_storage));
-                                printk("\n");
                                sctp_send_shutdown(prim.ssp_assoc_id);
                                return;
                        }
@@ -748,7 +746,10 @@ static int tcp_accept_from_sock(struct connection *con)
        /* Get the new node's NODEID */
        make_sockaddr(&peeraddr, 0, &len);
        if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
+                unsigned char *b=(unsigned char *)&peeraddr;
                log_print("connect from non cluster node");
+                print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
+                                     b, sizeof(struct sockaddr_storage));
                sock_release(newsock);
                mutex_unlock(&con->sock_mutex);
                return -1;
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 8e0d00db004f..da64df7576e1 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -16,6 +16,7 @@
 #include "memory.h"
 static struct kmem_cache *lkb_cache;
+static struct kmem_cache *rsb_cache;
 int __init dlm_memory_init(void)
@@ -26,6 +27,14 @@ int __init dlm_memory_init(void)
                                __alignof__(struct dlm_lkb), 0, NULL);
        if (!lkb_cache)
                ret = -ENOMEM;
+        rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
+                                __alignof__(struct dlm_rsb), 0, NULL);
+        if (!rsb_cache) {
+                kmem_cache_destroy(lkb_cache);
+                ret = -ENOMEM;
+        }
        return ret;
 }
@@ -33,6 +42,8 @@ void dlm_memory_exit(void)
 {
        if (lkb_cache)
                kmem_cache_destroy(lkb_cache);
+        if (rsb_cache)
+                kmem_cache_destroy(rsb_cache);
 }
 char *dlm_allocate_lvb(struct dlm_ls *ls)
@@ -48,16 +59,11 @@ void dlm_free_lvb(char *p)
        kfree(p);
 }
-/* FIXME: have some minimal space built-in to rsb for the name and
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
-   kmalloc a separate name if needed, like dentries are done */
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
 {
        struct dlm_rsb *r;
-        DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
+        r = kmem_cache_zalloc(rsb_cache, GFP_NOFS);
-        r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
        return r;
 }
@@ -65,7 +71,7 @@ void dlm_free_rsb(struct dlm_rsb *r)
 {
        if (r->res_lvbptr)
                dlm_free_lvb(r->res_lvbptr);
-        kfree(r);
+        kmem_cache_free(rsb_cache, r);
 }
 struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index 485fb29143bd..177c11cbb0a6 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -16,7 +16,7 @@
 int dlm_memory_init(void);
 void dlm_memory_exit(void);
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen);
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls);
 void dlm_free_rsb(struct dlm_rsb *r);
 struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
 void dlm_free_lkb(struct dlm_lkb *l);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index fd677c8c3d3b..774da3cf92c6 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -58,13 +58,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
        mutex_lock(&ls->ls_recoverd_active);
-        /*
+        dlm_callback_suspend(ls);
-         * Suspending and resuming dlm_astd ensures that no lkb's from this ls
-         * will be processed by dlm_astd during recovery.
-         */
-        dlm_astd_suspend();
-        dlm_astd_resume();
        /*
         * Free non-master tossed rsb's.  Master rsb's are kept on toss
@@ -202,6 +196,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
        dlm_adjust_timeouts(ls);
+        dlm_callback_resume(ls);
        error = enable_locking(ls, rv->seq);
        if (error) {
                log_debug(ls, "enable_locking failed %d", error);
@@ -222,8 +218,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
        dlm_grant_after_purge(ls);
-        dlm_astd_wake();
        log_debug(ls, "recover %llx done: %u ms",
                  (unsigned long long)rv->seq,
                  jiffies_to_msecs(jiffies - start));
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index e96bf3e9be88..d8ea60756403 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -213,9 +213,9 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
                goto out;
        }
-        if (list_empty(&lkb->lkb_astqueue)) {
+        if (list_empty(&lkb->lkb_cb_list)) {
                kref_get(&lkb->lkb_ref);
-                list_add_tail(&lkb->lkb_astqueue, &proc->asts);
+                list_add_tail(&lkb->lkb_cb_list, &proc->asts);
                wake_up_interruptible(&proc->wait);
        }
        spin_unlock(&proc->asts_spin);
@@ -832,24 +832,24 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
        }
        /* if we empty lkb_callbacks, we don't want to unlock the spinlock
-           without removing lkb_astqueue; so empty lkb_astqueue is always
+           without removing lkb_cb_list; so empty lkb_cb_list is always
           consistent with empty lkb_callbacks */
-        lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
+        lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list);
        rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid);
        if (rv < 0) {
                /* this shouldn't happen; lkb should have been removed from
                   list when resid was zero */
                log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id);
-                list_del_init(&lkb->lkb_astqueue);
+                list_del_init(&lkb->lkb_cb_list);
                spin_unlock(&proc->asts_spin);
                /* removes ref for proc->asts, may cause lkb to be freed */
                dlm_put_lkb(lkb);
                goto try_another;
        }
        if (!resid)
-                list_del_init(&lkb->lkb_astqueue);
+                list_del_init(&lkb->lkb_cb_list);
        spin_unlock(&proc->asts_spin);
        if (cb.flags & DLM_CB_SKIP) {
diff --git a/fs/exec.c b/fs/exec.c
index f9f12ad299af..842d5700c155 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -963,9 +963,18 @@ static int de_thread(struct task_struct *tsk)
                leader->group_leader = tsk;
                tsk->exit_signal = SIGCHLD;
+                leader->exit_signal = -1;
                BUG_ON(leader->exit_state != EXIT_ZOMBIE);
                leader->exit_state = EXIT_DEAD;
+                /*
+                 * We are going to release_task()->ptrace_unlink() silently,
+                 * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
+                 * the tracer wont't block again waiting for this thread.
+                 */
+                if (unlikely(leader->ptrace))
+                        __wake_up_parent(leader, leader->parent);
                write_unlock_irq(&tasklist_lock);
                release_task(leader);
@@ -1233,7 +1242,12 @@ int check_unsafe_exec(struct linux_binprm *bprm)
        unsigned n_fs;
        int res = 0;
-        bprm->unsafe = tracehook_unsafe_exec(p);
+        if (p->ptrace) {
+                if (p->ptrace & PT_PTRACE_CAP)
+                        bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
+                else
+                        bprm->unsafe |= LSM_UNSAFE_PTRACE;
+        }
        n_fs = 1;
        spin_lock(&p->fs->lock);
@@ -1361,6 +1375,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
        unsigned int depth = bprm->recursion_depth;
        int try,retval;
        struct linux_binfmt *fmt;
+        pid_t old_pid;
        retval = security_bprm_check(bprm);
        if (retval)
@@ -1370,6 +1385,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
        if (retval)
                return retval;
+        /* Need to fetch pid before load_binary changes it */
+        rcu_read_lock();
+        old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
+        rcu_read_unlock();
        retval = -ENOENT;
        for (try=0; try<2; try++) {
                read_lock(&binfmt_lock);
@@ -1389,7 +1409,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
                        bprm->recursion_depth = depth;
                        if (retval >= 0) {
                                if (depth == 0)
-                                        tracehook_report_exec(fmt, bprm, regs);
+                                        ptrace_event(PTRACE_EVENT_EXEC,
+                                                        old_pid);
                                put_binfmt(fmt);
                                allow_write_access(bprm->file);
                                if (bprm->file)
@@ -1777,7 +1798,7 @@ static int zap_process(struct task_struct *start, int exit_code)
        t = start;
        do {
-                task_clear_group_stop_pending(t);
+                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
                if (t != current && t->mm) {
                        sigaddset(&t->pending.signal, SIGKILL);
                        signal_wake_up(t, 1);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 2f343b4d7a7d..3f7a59bfa7ad 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -976,16 +976,12 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
        pagevec_init(&pvec, 0);
        next = 0;
-        while (next <= (loff_t)-1 &&
+        do {
-               pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)
+                if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
-               ) {
+                        break;
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
-                        pgoff_t page_index = page->index;
+                        next = page->index;
-                        ASSERTCMP(page_index, >=, next);
-                        next = page_index + 1;
                        if (PageFsCache(page)) {
                                __fscache_wait_on_page_write(cookie, page);
                                __fscache_uncache_page(cookie, page);
@@ -993,7 +989,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
                }
                pagevec_release(&pvec);
                cond_resched();
-        }
+        } while (++next);
        _leave("");
 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 2cd0e56b8893..7878c473ae62 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -854,11 +854,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
                        blen++;
                else {
                        if (bstart) {
-                                if (metadata)
+                                __gfs2_free_blocks(ip, bstart, blen, metadata);
-                                        __gfs2_free_meta(ip, bstart, blen);
-                                else
-                                        __gfs2_free_data(ip, bstart, blen);
                                btotal += blen;
                        }
@@ -870,11 +866,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
                gfs2_add_inode_blocks(&ip->i_inode, -1);
        }
        if (bstart) {
-                if (metadata)
+                __gfs2_free_blocks(ip, bstart, blen, metadata);
-                        __gfs2_free_meta(ip, bstart, blen);
-                else
-                        __gfs2_free_data(ip, bstart, blen);
                btotal += blen;
        }
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 091ee4779538..1cc2f8ec52a2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -339,6 +339,67 @@ fail:
        return (copied) ? copied : error;
 }
+/**
+ * gfs2_dir_get_hash_table - Get pointer to the dir hash table
+ * @ip: The inode in question
+ *
+ * Returns: The hash table or an error
+ */
+static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
+{
+        struct inode *inode = &ip->i_inode;
+        int ret;
+        u32 hsize;
+        __be64 *hc;
+        BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH));
+        hc = ip->i_hash_cache;
+        if (hc)
+                return hc;
+        hsize = 1 << ip->i_depth;
+        hsize *= sizeof(__be64);
+        if (hsize != i_size_read(&ip->i_inode)) {
+                gfs2_consist_inode(ip);
+                return ERR_PTR(-EIO);
+        }
+        hc = kmalloc(hsize, GFP_NOFS);
+        ret = -ENOMEM;
+        if (hc == NULL)
+                return ERR_PTR(-ENOMEM);
+        ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1);
+        if (ret < 0) {
+                kfree(hc);
+                return ERR_PTR(ret);
+        }
+        spin_lock(&inode->i_lock);
+        if (ip->i_hash_cache)
+                kfree(hc);
+        else
+                ip->i_hash_cache = hc;
+        spin_unlock(&inode->i_lock);
+        return ip->i_hash_cache;
+}
+/**
+ * gfs2_dir_hash_inval - Invalidate dir hash
+ * @ip: The directory inode
+ *
+ * Must be called with an exclusive glock, or during glock invalidation.
+ */
+void gfs2_dir_hash_inval(struct gfs2_inode *ip)
+{
+        __be64 *hc = ip->i_hash_cache;
+        ip->i_hash_cache = NULL;
+        kfree(hc);
+}
 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
 {
        return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
@@ -686,17 +747,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
 static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
                       u64 *leaf_out)
 {
-        __be64 leaf_no;
+        __be64 *hash;
-        int error;
-        error = gfs2_dir_read_data(dip, (char *)&leaf_no,
-                                    index * sizeof(__be64),
-                                    sizeof(__be64), 0);
-        if (error != sizeof(u64))
-                return (error < 0) ? error : -EIO;
-        *leaf_out = be64_to_cpu(leaf_no);
+        hash = gfs2_dir_get_hash_table(dip);
+        if (IS_ERR(hash))
+                return PTR_ERR(hash);
+        *leaf_out = be64_to_cpu(*(hash + index));
        return 0;
 }
@@ -966,6 +1022,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
        for (x = 0; x < half_len; x++)
                lp[x] = cpu_to_be64(bn);
+        gfs2_dir_hash_inval(dip);
        error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
                                    half_len * sizeof(u64));
        if (error != half_len * sizeof(u64)) {
@@ -1052,70 +1110,54 @@ fail_brelse:
 static int dir_double_exhash(struct gfs2_inode *dip)
 {
-        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct buffer_head *dibh;
        u32 hsize;
-        u64 *buf;
+        u32 hsize_bytes;
-        u64 *from, *to;
+        __be64 *hc;
-        u64 block;
+        __be64 *hc2, *h;
-        u64 disksize = i_size_read(&dip->i_inode);
        int x;
        int error = 0;
        hsize = 1 << dip->i_depth;
-        if (hsize * sizeof(u64) != disksize) {
+        hsize_bytes = hsize * sizeof(__be64);
-                gfs2_consist_inode(dip);
-                return -EIO;
-        }
-        /*  Allocate both the "from" and "to" buffers in one big chunk  */
+        hc = gfs2_dir_get_hash_table(dip);
+        if (IS_ERR(hc))
+                return PTR_ERR(hc);
-        buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
+        h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
-        if (!buf)
+        if (!hc2)
                return -ENOMEM;
-        for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) {
+        error = gfs2_meta_inode_buffer(dip, &dibh);
-                error = gfs2_dir_read_data(dip, (char *)buf,
+        if (error)
-                                            block * sdp->sd_hash_bsize,
+                goto out_kfree;
-                                            sdp->sd_hash_bsize, 1);
-                if (error != sdp->sd_hash_bsize) {
-                        if (error >= 0)
-                                error = -EIO;
-                        goto fail;
-                }
-                from = buf;
-                to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
-                for (x = sdp->sd_hash_ptrs; x--; from++) {
-                        *to++ = *from;  /*  No endianess worries  */
-                        *to++ = *from;
-                }
-                error = gfs2_dir_write_data(dip,
+        for (x = 0; x < hsize; x++) {
-                                             (char *)buf + sdp->sd_hash_bsize,
+                *h++ = *hc;
-                                             block * sdp->sd_sb.sb_bsize,
+                *h++ = *hc;
-                                             sdp->sd_sb.sb_bsize);
+                hc++;
-                if (error != sdp->sd_sb.sb_bsize) {
-                        if (error >= 0)
-                                error = -EIO;
-                        goto fail;
-                }
        }
-        kfree(buf);
+        error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2);
+        if (error != (hsize_bytes * 2))
-        error = gfs2_meta_inode_buffer(dip, &dibh);
+                goto fail;
-        if (!gfs2_assert_withdraw(sdp, !error)) {
-                dip->i_depth++;
-                gfs2_dinode_out(dip, dibh->b_data);
-                brelse(dibh);
-        }
-        return error;
+        gfs2_dir_hash_inval(dip);
+        dip->i_hash_cache = hc2;
+        dip->i_depth++;
+        gfs2_dinode_out(dip, dibh->b_data);
+        brelse(dibh);
+        return 0;
 fail:
-        kfree(buf);
+        /* Replace original hash table & size */
+        gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes);
+        i_size_write(&dip->i_inode, hsize_bytes);
+        gfs2_dinode_out(dip, dibh->b_data);
+        brelse(dibh);
+out_kfree:
+        kfree(hc2);
        return error;
 }
@@ -1348,6 +1390,7 @@ out:
        return error;
 }
 /**
 * dir_e_read - Reads the entries from a directory into a filldir buffer
 * @dip: dinode pointer
@@ -1362,9 +1405,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
                      filldir_t filldir)
 {
        struct gfs2_inode *dip = GFS2_I(inode);
-        struct gfs2_sbd *sdp = GFS2_SB(inode);
        u32 hsize, len = 0;
-        u32 ht_offset, lp_offset, ht_offset_cur = -1;
        u32 hash, index;
        __be64 *lp;
        int copied = 0;
@@ -1372,37 +1413,17 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
        unsigned depth = 0;
        hsize = 1 << dip->i_depth;
-        if (hsize * sizeof(u64) != i_size_read(inode)) {
-                gfs2_consist_inode(dip);
-                return -EIO;
-        }
        hash = gfs2_dir_offset2hash(*offset);
        index = hash >> (32 - dip->i_depth);
-        lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
+        lp = gfs2_dir_get_hash_table(dip);
-        if (!lp)
+        if (IS_ERR(lp))
-                return -ENOMEM;
+                return PTR_ERR(lp);
        while (index < hsize) {
-                lp_offset = index & (sdp->sd_hash_ptrs - 1);
-                ht_offset = index - lp_offset;
-                if (ht_offset_cur != ht_offset) {
-                        error = gfs2_dir_read_data(dip, (char *)lp,
-                                                ht_offset * sizeof(__be64),
-                                                sdp->sd_hash_bsize, 1);
-                        if (error != sdp->sd_hash_bsize) {
-                                if (error >= 0)
-                                        error = -EIO;
-                                goto out;
-                        }
-                        ht_offset_cur = ht_offset;
-                }
                error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
                                           &copied, &depth,
-                                           be64_to_cpu(lp[lp_offset]));
+                                           be64_to_cpu(lp[index]));
                if (error)
                        break;
@@ -1410,8 +1431,6 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
                index = (index & ~(len - 1)) + len;
        }
-out:
-        kfree(lp);
        if (error > 0)
                error = 0;
        return error;
@@ -1914,43 +1933,22 @@ out:
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
 {
-        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct buffer_head *bh;
        struct gfs2_leaf *leaf;
        u32 hsize, len;
-        u32 ht_offset, lp_offset, ht_offset_cur = -1;
        u32 index = 0, next_index;
        __be64 *lp;
        u64 leaf_no;
        int error = 0, last;
        hsize = 1 << dip->i_depth;
-        if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
-                gfs2_consist_inode(dip);
-                return -EIO;
-        }
-        lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
+        lp = gfs2_dir_get_hash_table(dip);
-        if (!lp)
+        if (IS_ERR(lp))
-                return -ENOMEM;
+                return PTR_ERR(lp);
        while (index < hsize) {
-                lp_offset = index & (sdp->sd_hash_ptrs - 1);
+                leaf_no = be64_to_cpu(lp[index]);
-                ht_offset = index - lp_offset;
-                if (ht_offset_cur != ht_offset) {
-                        error = gfs2_dir_read_data(dip, (char *)lp,
-                                                ht_offset * sizeof(__be64),
-                                                sdp->sd_hash_bsize, 1);
-                        if (error != sdp->sd_hash_bsize) {
-                                if (error >= 0)
-                                        error = -EIO;
-                                goto out;
-                        }
-                        ht_offset_cur = ht_offset;
-                }
-                leaf_no = be64_to_cpu(lp[lp_offset]);
                if (leaf_no) {
                        error = get_leaf(dip, leaf_no, &bh);
                        if (error)
@@ -1976,7 +1974,6 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
        }
 out:
-        kfree(lp);
        return error;
 }
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index e686af11becd..ff5772fbf024 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -35,6 +35,7 @@ extern int gfs2_diradd_alloc_required(struct inode *dir,
                                      const struct qstr *filename);
 extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
                                   struct buffer_head **bhp);
+extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
 static inline u32 gfs2_disk_hash(const char *data, int len)
 {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index f82cb5e1cb6b..edeb9e802903 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -174,7 +174,9 @@ void gfs2_set_inode_flags(struct inode *inode)
        struct gfs2_inode *ip = GFS2_I(inode);
        unsigned int flags = inode->i_flags;
-        flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+        flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
+        if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
+                inode->i_flags |= S_NOSEC;
        if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
                flags |= S_IMMUTABLE;
        if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1c1336e7b3b2..88e8a23d0026 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -409,6 +409,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
        if (held1 && held2 && list_empty(&gl->gl_holders))
                clear_bit(GLF_QUEUED, &gl->gl_flags);
+        if (new_state != gl->gl_target)
+                /* shorten our minimum hold time */
+                gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
+                                       GL_GLOCK_MIN_HOLD);
        gl->gl_state = new_state;
        gl->gl_tchange = jiffies;
 }
@@ -668,7 +672,7 @@ static void glock_work_func(struct work_struct *work)
            gl->gl_demote_state != LM_ST_EXCLUSIVE) {
                unsigned long holdtime, now = jiffies;
-                holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+                holdtime = gl->gl_tchange + gl->gl_hold_time;
                if (time_before(now, holdtime))
                        delay = holdtime - now;
@@ -679,9 +683,14 @@ static void glock_work_func(struct work_struct *work)
        }
        run_queue(gl, 0);
        spin_unlock(&gl->gl_spin);
-        if (!delay ||
+        if (!delay)
-            queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
                gfs2_glock_put(gl);
+        else {
+                if (gl->gl_name.ln_type != LM_TYPE_INODE)
+                        delay = 0;
+                if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+                        gfs2_glock_put(gl);
+        }
        if (drop_ref)
                gfs2_glock_put(gl);
 }
@@ -743,6 +752,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
        gl->gl_tchange = jiffies;
        gl->gl_object = NULL;
        gl->gl_sbd = sdp;
+        gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
        INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
        INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -855,8 +865,15 @@ static int gfs2_glock_demote_wait(void *word)
 static void wait_on_holder(struct gfs2_holder *gh)
 {
+        unsigned long time1 = jiffies;
        might_sleep();
        wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+        if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
+                /* Lengthen the minimum hold time. */
+                gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
+                                              GL_GLOCK_HOLD_INCR,
+                                              GL_GLOCK_MAX_HOLD);
 }
 static void wait_on_demote(struct gfs2_glock *gl)
@@ -1093,8 +1110,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        gfs2_glock_hold(gl);
        if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
-            !test_bit(GLF_DEMOTE, &gl->gl_flags))
+            !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
-                delay = gl->gl_ops->go_min_hold_time;
+            gl->gl_name.ln_type == LM_TYPE_INODE)
+                delay = gl->gl_hold_time;
        if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
                gfs2_glock_put(gl);
 }
@@ -1273,12 +1291,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
        unsigned long now = jiffies;
        gfs2_glock_hold(gl);
-        holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+        holdtime = gl->gl_tchange + gl->gl_hold_time;
-        if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
+        if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
+            gl->gl_name.ln_type == LM_TYPE_INODE) {
                if (time_before(now, holdtime))
                        delay = holdtime - now;
                if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
-                        delay = gl->gl_ops->go_min_hold_time;
+                        delay = gl->gl_hold_time;
        }
        spin_lock(&gl->gl_spin);
@@ -1667,7 +1686,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
        dtime *= 1000000/HZ; /* demote time in uSec */
        if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
                dtime = 0;
-        gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
+        gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
                  state2str(gl->gl_state),
                  gl->gl_name.ln_type,
                  (unsigned long long)gl->gl_name.ln_number,
@@ -1676,7 +1695,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
                  state2str(gl->gl_demote_state), dtime,
                  atomic_read(&gl->gl_ail_count),
                  atomic_read(&gl->gl_revokes),
-                  atomic_read(&gl->gl_ref));
+                  atomic_read(&gl->gl_ref), gl->gl_hold_time);
        list_for_each_entry(gh, &gl->gl_holders, gh_list) {
                error = dump_holder(seq, gh);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 6b2f757b9281..66707118af25 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -113,6 +113,12 @@ enum {
 #define GLR_TRYFAILED           13
+#define GL_GLOCK_MAX_HOLD        (long)(HZ / 5)
+#define GL_GLOCK_DFT_HOLD        (long)(HZ / 5)
+#define GL_GLOCK_MIN_HOLD        (long)(10)
+#define GL_GLOCK_HOLD_INCR       (long)(HZ / 20)
+#define GL_GLOCK_HOLD_DECR       (long)(HZ / 40)
 struct lm_lockops {
        const char *lm_proto_name;
        int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 2cca29316bd6..da21ecaafcc2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -26,6 +26,7 @@
 #include "rgrp.h"
 #include "util.h"
 #include "trans.h"
+#include "dir.h"
 /**
 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
@@ -218,6 +219,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
                if (ip) {
                        set_bit(GIF_INVALID, &ip->i_flags);
                        forget_all_cached_acls(&ip->i_inode);
+                        gfs2_dir_hash_inval(ip);
                }
        }
@@ -316,6 +318,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        ip->i_generation = be64_to_cpu(str->di_generation);
        ip->i_diskflags = be32_to_cpu(str->di_flags);
+        ip->i_eattr = be64_to_cpu(str->di_eattr);
+        /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
        gfs2_set_inode_flags(&ip->i_inode);
        height = be16_to_cpu(str->di_height);
        if (unlikely(height > GFS2_MAX_META_HEIGHT))
@@ -328,7 +332,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        ip->i_depth = (u8)depth;
        ip->i_entries = be32_to_cpu(str->di_entries);
-        ip->i_eattr = be64_to_cpu(str->di_eattr);
        if (S_ISREG(ip->i_inode.i_mode))
                gfs2_set_aops(&ip->i_inode);
@@ -549,7 +552,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
        .go_lock = inode_go_lock,
        .go_dump = inode_go_dump,
        .go_type = LM_TYPE_INODE,
-        .go_min_hold_time = HZ / 5,
        .go_flags = GLOF_ASPACE,
 };
@@ -560,7 +562,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
        .go_unlock = rgrp_go_unlock,
        .go_dump = gfs2_rgrp_dump,
        .go_type = LM_TYPE_RGRP,
-        .go_min_hold_time = HZ / 5,
        .go_flags = GLOF_ASPACE,
 };
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 81206e70cbf6..892ac37de8ae 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -163,7 +163,6 @@ struct gfs2_glock_operations {
        int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
        void (*go_callback) (struct gfs2_glock *gl);
        const int go_type;
-        const unsigned long go_min_hold_time;
        const unsigned long go_flags;
 #define GLOF_ASPACE 1
 };
@@ -221,6 +220,7 @@ struct gfs2_glock {
        unsigned int gl_hash;
        unsigned long gl_demote_time; /* time of first demote request */
+        long gl_hold_time;
        struct list_head gl_holders;
        const struct gfs2_glock_operations *gl_ops;
@@ -285,6 +285,7 @@ struct gfs2_inode {
        u64 i_goal;     /* goal block for allocations */
        struct rw_semaphore i_rw_mutex;
        struct list_head i_trunc_list;
+        __be64 *i_hash_cache;
        u32 i_entries;
        u32 i_diskflags;
        u8 i_height;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c2b34cd2abe0..29e1ace7953d 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -41,6 +41,7 @@ static void gfs2_init_inode_once(void *foo)
        init_rwsem(&ip->i_rw_mutex);
        INIT_LIST_HEAD(&ip->i_trunc_list);
        ip->i_alloc = NULL;
+        ip->i_hash_cache = NULL;
 }
 static void gfs2_init_glock_once(void *foo)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2a77071fb7b6..516516e0c2a2 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1094,6 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
        if (sdp->sd_args.ar_nobarrier)
                set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+        sb->s_flags |= MS_NOSEC;
        sb->s_magic = GFS2_MAGIC;
        sb->s_op = &gfs2_super_ops;
        sb->s_d_op = &gfs2_dops;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b780df3fd54..7f8af1eb02de 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1607,14 +1607,15 @@ rgrp_error:
 }
 /**
- * gfs2_free_data - free a contiguous run of data block(s)
+ * __gfs2_free_blocks - free a contiguous run of block(s)
 * @ip: the inode these blocks are being freed from
 * @bstart: first block of a run of contiguous blocks
 * @blen: the length of the block run
+ * @meta: 1 if the blocks represent metadata
 *
 */
-void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd;
@@ -1631,54 +1632,11 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
        gfs2_trans_add_rg(rgd);
        /* Directories keep their data in the metadata address space */
-        if (ip->i_depth)
+        if (meta || ip->i_depth)
                gfs2_meta_wipe(ip, bstart, blen);
 }
 /**
- * gfs2_free_data - free a contiguous run of data block(s)
- * @ip: the inode these blocks are being freed from
- * @bstart: first block of a run of contiguous blocks
- * @blen: the length of the block run
- *
- */
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        __gfs2_free_data(ip, bstart, blen);
-        gfs2_statfs_change(sdp, 0, +blen, 0);
-        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
-}
-/**
- * gfs2_free_meta - free a contiguous run of data block(s)
- * @ip: the inode these blocks are being freed from
- * @bstart: first block of a run of contiguous blocks
- * @blen: the length of the block run
- *
- */
-void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_rgrpd *rgd;
-        rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
-        if (!rgd)
-                return;
-        trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
-        rgd->rd_free += blen;
-        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
-        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-        gfs2_trans_add_rg(rgd);
-        gfs2_meta_wipe(ip, bstart, blen);
-}
-/**
 * gfs2_free_meta - free a contiguous run of data block(s)
 * @ip: the inode these blocks are being freed from
 * @bstart: first block of a run of contiguous blocks
@@ -1690,7 +1648,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        __gfs2_free_meta(ip, bstart, blen);
+        __gfs2_free_blocks(ip, bstart, blen, 1);
        gfs2_statfs_change(sdp, 0, +blen, 0);
        gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
 }
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index a80e3034ac47..d253f9a8c70e 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,9 +52,7 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
 extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
 extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
-extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
-extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
-extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
 extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fb0edf735483..b7beadd9ba4c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1533,7 +1533,7 @@ out:
        /* Case 3 starts here */
        truncate_inode_pages(&inode->i_data, 0);
        end_writeback(inode);
+        gfs2_dir_hash_inval(ip);
        ip->i_gl->gl_object = NULL;
        gfs2_glock_add_to_lru(ip->i_gl);
        gfs2_glock_put(ip->i_gl);
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2312de34bd42..2a734cfccc92 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -43,6 +43,10 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
                        node->tree->node_size - (rec + 1) * 2);
                if (!recoff)
                        return 0;
+                if (recoff > node->tree->node_size - 2) {
+                        printk(KERN_ERR "hfs: recoff %d too large\n", recoff);
+                        return 0;
+                }
                retval = hfs_bnode_read_u16(node, recoff) + 2;
                if (retval > node->tree->max_key_len + 2) {
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index b4ba1b319333..4dfbfec357e8 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -212,7 +212,9 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
        dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n",
                str->name, cnid, inode->i_nlink);
-        hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        if (err)
+                return err;
        hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
        entry_size = hfsplus_fill_cat_thread(sb, &entry,
@@ -269,7 +271,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
        dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n",
                str ? str->name : NULL, cnid);
-        hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        if (err)
+                return err;
        if (!str) {
                int len;
@@ -347,12 +351,14 @@ int hfsplus_rename_cat(u32 cnid,
        struct hfs_find_data src_fd, dst_fd;
        hfsplus_cat_entry entry;
        int entry_size, type;
-        int err = 0;
+        int err;
        dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n",
                cnid, src_dir->i_ino, src_name->name,
                dst_dir->i_ino, dst_name->name);
-        hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
+        err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
+        if (err)
+                return err;
        dst_fd = src_fd;
        /* find the old dir entry and read the data */
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4df5059c25da..25b2443a004c 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -38,7 +38,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
        sb = dir->i_sb;
        dentry->d_fsdata = NULL;
-        hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        if (err)
+                return ERR_PTR(err);
        hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
 again:
        err = hfs_brec_read(&fd, &entry, sizeof(entry));
@@ -132,7 +134,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
        if (filp->f_pos >= inode->i_size)
                return 0;
-        hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        if (err)
+                return err;
        hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
        err = hfs_brec_find(&fd);
        if (err)
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index b1991a2a08e0..5849e3ef35cc 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -119,22 +119,31 @@ static void __hfsplus_ext_write_extent(struct inode *inode,
        set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags);
 }
-static void hfsplus_ext_write_extent_locked(struct inode *inode)
+static int hfsplus_ext_write_extent_locked(struct inode *inode)
 {
+        int res;
        if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) {
                struct hfs_find_data fd;
-                hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+                res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+                if (res)
+                        return res;
                __hfsplus_ext_write_extent(inode, &fd);
                hfs_find_exit(&fd);
        }
+        return 0;
 }
-void hfsplus_ext_write_extent(struct inode *inode)
+int hfsplus_ext_write_extent(struct inode *inode)
 {
+        int res;
        mutex_lock(&HFSPLUS_I(inode)->extents_lock);
-        hfsplus_ext_write_extent_locked(inode);
+        res = hfsplus_ext_write_extent_locked(inode);
        mutex_unlock(&HFSPLUS_I(inode)->extents_lock);
+        return res;
 }
 static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
@@ -194,9 +203,11 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block)
            block < hip->cached_start + hip->cached_blocks)
                return 0;
-        hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
+        res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
-        res = __hfsplus_ext_cache_extent(&fd, inode, block);
+        if (!res) {
-        hfs_find_exit(&fd);
+                res = __hfsplus_ext_cache_extent(&fd, inode, block);
+                hfs_find_exit(&fd);
+        }
        return res;
 }
@@ -209,6 +220,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
        struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
        int res = -EIO;
        u32 ablock, dblock, mask;
+        sector_t sector;
        int was_dirty = 0;
        int shift;
@@ -255,10 +267,12 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
 done:
        dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n",
                inode->i_ino, (long long)iblock, dblock);
        mask = (1 << sbi->fs_shift) - 1;
-        map_bh(bh_result, sb,
+        sector = ((sector_t)dblock << sbi->fs_shift) +
-                (dblock << sbi->fs_shift) + sbi->blockoffset +
+                  sbi->blockoffset + (iblock & mask);
-                        (iblock & mask));
+        map_bh(bh_result, sb, sector);
        if (create) {
                set_buffer_new(bh_result);
                hip->phys_size += sb->s_blocksize;
@@ -371,7 +385,9 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid,
        if (total_blocks == blocks)
                return 0;
-        hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+        res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+        if (res)
+                return res;
        do {
                res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid,
                                                total_blocks, type);
@@ -469,7 +485,9 @@ out:
 insert_extent:
        dprint(DBG_EXTENT, "insert new extent\n");
-        hfsplus_ext_write_extent_locked(inode);
+        res = hfsplus_ext_write_extent_locked(inode);
+        if (res)
+                goto out;
        memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
        hip->cached_extents[0].start_block = cpu_to_be32(start);
@@ -500,7 +518,6 @@ void hfsplus_file_truncate(struct inode *inode)
                struct page *page;
                void *fsdata;
                u32 size = inode->i_size;
-                int res;
                res = pagecache_write_begin(NULL, mapping, size, 0,
                                                AOP_FLAG_UNINTERRUPTIBLE,
@@ -523,7 +540,12 @@ void hfsplus_file_truncate(struct inode *inode)
                goto out;
        mutex_lock(&hip->extents_lock);
-        hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+        res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
+        if (res) {
+                mutex_unlock(&hip->extents_lock);
+                /* XXX: We lack error handling of hfsplus_file_truncate() */
+                return;
+        }
        while (1) {
                if (alloc_cnt == hip->first_blocks) {
                        hfsplus_free_extents(sb, hip->first_extents,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 38184e360932..d7674d051f52 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -13,6 +13,7 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/buffer_head.h>
+#include <linux/blkdev.h>
 #include "hfsplus_raw.h"
 #define DBG_BNODE_REFS  0x00000001
@@ -110,7 +111,9 @@ struct hfsplus_vh;
 struct hfs_btree;
 struct hfsplus_sb_info {
+        void *s_vhdr_buf;
        struct hfsplus_vh *s_vhdr;
+        void *s_backup_vhdr_buf;
        struct hfsplus_vh *s_backup_vhdr;
        struct hfs_btree *ext_tree;
        struct hfs_btree *cat_tree;
@@ -258,6 +261,15 @@ struct hfsplus_readdir_data {
        struct hfsplus_cat_key key;
 };
+/*
+ * Find minimum acceptible I/O size for an hfsplus sb.
+ */
+static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
+{
+        return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev),
+                     HFSPLUS_SECTOR_SIZE);
+}
 #define hfs_btree_open hfsplus_btree_open
 #define hfs_btree_close hfsplus_btree_close
 #define hfs_btree_write hfsplus_btree_write
@@ -374,7 +386,7 @@ extern const struct file_operations hfsplus_dir_operations;
 /* extents.c */
 int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
-void hfsplus_ext_write_extent(struct inode *);
+int hfsplus_ext_write_extent(struct inode *);
 int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
 int hfsplus_free_fork(struct super_block *, u32,
                struct hfsplus_fork_raw *, int);
@@ -437,8 +449,8 @@ int hfsplus_compare_dentry(const struct dentry *parent,
 /* wrapper.c */
 int hfsplus_read_wrapper(struct super_block *);
 int hfs_part_find(struct super_block *, sector_t *, sector_t *);
-int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
+int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
-                void *data, int rw);
+                void *buf, void **data, int rw);
 /* time macros */
 #define __hfsp_mt2ut(t)         (be32_to_cpu(t) - 2082844800U)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 30486e01d003..4cc1e3a36ec7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -195,11 +195,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir,
        hip->flags = 0;
        set_bit(HFSPLUS_I_RSRC, &hip->flags);
-        hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
+        err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
-        err = hfsplus_find_cat(sb, dir->i_ino, &fd);
+        if (!err) {
-        if (!err)
+                err = hfsplus_find_cat(sb, dir->i_ino, &fd);
-                err = hfsplus_cat_read_inode(inode, &fd);
+                if (!err)
-        hfs_find_exit(&fd);
+                        err = hfsplus_cat_read_inode(inode, &fd);
+                hfs_find_exit(&fd);
+        }
        if (err) {
                iput(inode);
                return ERR_PTR(err);
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index 40ad88c12c64..eb355d81e279 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -88,11 +88,12 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm,
        return -ENOENT;
 }
-static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
+static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
-                sector_t *part_start, sector_t *part_size)
+                struct new_pmap *pm, sector_t *part_start, sector_t *part_size)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
        int size = be32_to_cpu(pm->pmMapBlkCnt);
+        int buf_size = hfsplus_min_io_size(sb);
        int res;
        int i = 0;
@@ -107,11 +108,14 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
                if (++i >= size)
                        return -ENOENT;
-                res = hfsplus_submit_bio(sb->s_bdev,
+                pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE);
-                                         *part_start + HFS_PMAP_BLK + i,
+                if ((u8 *)pm - (u8 *)buf >= buf_size) {
-                                         pm, READ);
+                        res = hfsplus_submit_bio(sb,
-                if (res)
+                                                 *part_start + HFS_PMAP_BLK + i,
-                        return res;
+                                                 buf, (void **)&pm, READ);
+                        if (res)
+                                return res;
+                }
        } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
        return -ENOENT;
@@ -124,15 +128,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
 int hfs_part_find(struct super_block *sb,
                sector_t *part_start, sector_t *part_size)
 {
-        void *data;
+        void *buf, *data;
        int res;
-        data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+        buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
-        if (!data)
+        if (!buf)
                return -ENOMEM;
-        res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
+        res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
-                                 data, READ);
+                                 buf, &data, READ);
        if (res)
                goto out;
@@ -141,13 +145,13 @@ int hfs_part_find(struct super_block *sb,
                res = hfs_parse_old_pmap(sb, data, part_start, part_size);
                break;
        case HFS_NEW_PMAP_MAGIC:
-                res = hfs_parse_new_pmap(sb, data, part_start, part_size);
+                res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size);
                break;
        default:
                res = -ENOENT;
                break;
        }
 out:
-        kfree(data);
+        kfree(buf);
        return res;
 }
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 84a47b709f51..c106ca22e812 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -73,11 +73,13 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
        if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
            inode->i_ino == HFSPLUS_ROOT_CNID) {
-                hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+                err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
-                err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+                if (!err) {
-                if (!err)
+                        err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
-                        err = hfsplus_cat_read_inode(inode, &fd);
+                        if (!err)
-                hfs_find_exit(&fd);
+                                err = hfsplus_cat_read_inode(inode, &fd);
+                        hfs_find_exit(&fd);
+                }
        } else {
                err = hfsplus_system_read_inode(inode);
        }
@@ -133,9 +135,13 @@ static int hfsplus_system_write_inode(struct inode *inode)
 static int hfsplus_write_inode(struct inode *inode,
                struct writeback_control *wbc)
 {
+        int err;
        dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
-        hfsplus_ext_write_extent(inode);
+        err = hfsplus_ext_write_extent(inode);
+        if (err)
+                return err;
        if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
            inode->i_ino == HFSPLUS_ROOT_CNID)
@@ -197,17 +203,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
                write_backup = 1;
        }
-        error2 = hfsplus_submit_bio(sb->s_bdev,
+        error2 = hfsplus_submit_bio(sb,
                                   sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
-                                   sbi->s_vhdr, WRITE_SYNC);
+                                   sbi->s_vhdr_buf, NULL, WRITE_SYNC);
        if (!error)
                error = error2;
        if (!write_backup)
                goto out;
-        error2 = hfsplus_submit_bio(sb->s_bdev,
+        error2 = hfsplus_submit_bio(sb,
                                  sbi->part_start + sbi->sect_count - 2,
-                                  sbi->s_backup_vhdr, WRITE_SYNC);
+                                  sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
        if (!error)
                error2 = error;
 out:
@@ -251,8 +257,8 @@ static void hfsplus_put_super(struct super_block *sb)
        hfs_btree_close(sbi->ext_tree);
        iput(sbi->alloc_file);
        iput(sbi->hidden_dir);
-        kfree(sbi->s_vhdr);
+        kfree(sbi->s_vhdr_buf);
-        kfree(sbi->s_backup_vhdr);
+        kfree(sbi->s_backup_vhdr_buf);
        unload_nls(sbi->nls);
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
@@ -393,6 +399,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        if (!sbi->rsrc_clump_blocks)
                sbi->rsrc_clump_blocks = 1;
+        err = generic_check_addressable(sbi->alloc_blksz_shift,
+                                        sbi->total_blocks);
+        if (err) {
+                printk(KERN_ERR "hfs: filesystem size too large.\n");
+                goto out_free_vhdr;
+        }
        /* Set up operations so we can load metadata */
        sb->s_op = &hfsplus_sops;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -417,6 +430,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
                sb->s_flags |= MS_RDONLY;
        }
+        err = -EINVAL;
        /* Load metadata objects (B*Trees) */
        sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
        if (!sbi->ext_tree) {
@@ -447,7 +462,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
        str.name = HFSP_HIDDENDIR_NAME;
-        hfs_find_init(sbi->cat_tree, &fd);
+        err = hfs_find_init(sbi->cat_tree, &fd);
+        if (err)
+                goto out_put_root;
        hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
        if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
                hfs_find_exit(&fd);
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index a3f0bfcc881e..a32998f29f0b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -142,7 +142,11 @@ int hfsplus_uni2asc(struct super_block *sb,
                /* search for single decomposed char */
                if (likely(compose))
                        ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
-                if (ce1 && (cc = ce1[0])) {
+                if (ce1)
+                        cc = ce1[0];
+                else
+                        cc = 0;
+                if (cc) {
                        /* start of a possibly decomposed Hangul char */
                        if (cc != 0xffff)
                                goto done;
@@ -209,7 +213,8 @@ int hfsplus_uni2asc(struct super_block *sb,
                                i++;
                                ce2 = ce1;
                        }
-                        if ((cc = ce2[0])) {
+                        cc = ce2[0];
+                        if (cc) {
                                ip += i;
                                ustrlen -= i;
                                goto done;
@@ -301,7 +306,11 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
        while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
                size = asc2unichar(sb, astr, len, &c);
-                if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+                if (decompose)
+                        dstr = decompose_unichar(c, &dsize);
+                else
+                        dstr = NULL;
+                if (dstr) {
                        if (outlen + dsize > HFSPLUS_MAX_STRLEN)
                                break;
                        do {
@@ -346,15 +355,23 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
                astr += size;
                len -= size;
-                if (decompose && (dstr = decompose_unichar(c, &dsize))) {
+                if (decompose)
+                        dstr = decompose_unichar(c, &dsize);
+                else
+                        dstr = NULL;
+                if (dstr) {
                        do {
                                c2 = *dstr++;
-                                if (!casefold || (c2 = case_fold(c2)))
+                                if (casefold)
+                                        c2 = case_fold(c2);
+                                if (!casefold || c2)
                                        hash = partial_name_hash(c2, hash);
                        } while (--dsize > 0);
                } else {
                        c2 = c;
-                        if (!casefold || (c2 = case_fold(c2)))
+                        if (casefold)
+                                c2 = case_fold(c2);
+                        if (!casefold || c2)
                                hash = partial_name_hash(c2, hash);
                }
        }
@@ -422,12 +439,14 @@ int hfsplus_compare_dentry(const struct dentry *parent,
                c1 = *dstr1;
                c2 = *dstr2;
                if (casefold) {
-                        if  (!(c1 = case_fold(c1))) {
+                        c1 = case_fold(c1);
+                        if (!c1) {
                                dstr1++;
                                dsize1--;
                                continue;
                        }
-                        if (!(c2 = case_fold(c2))) {
+                        c2 = case_fold(c2);
+                        if (!c2) {
                                dstr2++;
                                dsize2--;
                                continue;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 4ac88ff79aa6..10e515a0d452 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -31,25 +31,67 @@ static void hfsplus_end_io_sync(struct bio *bio, int err)
        complete(bio->bi_private);
 }
-int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
+/*
-                void *data, int rw)
+ * hfsplus_submit_bio - Perfrom block I/O
+ * @sb: super block of volume for I/O
+ * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
+ * @buf: buffer for I/O
+ * @data: output pointer for location of requested data
+ * @rw: direction of I/O
+ *
+ * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
+ * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
+ * @data will return a pointer to the start of the requested sector,
+ * which may not be the same location as @buf.
+ *
+ * If @sector is not aligned to the bdev logical block size it will
+ * be rounded down. For writes this means that @buf should contain data
+ * that starts at the rounded-down address. As long as the data was
+ * read using hfsplus_submit_bio() and the same buffer is used things
+ * will work correctly.
+ */
+int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
+                void *buf, void **data, int rw)
 {
        DECLARE_COMPLETION_ONSTACK(wait);
        struct bio *bio;
        int ret = 0;
+        unsigned int io_size;
+        loff_t start;
+        int offset;
+        /*
+         * Align sector to hardware sector size and find offset. We
+         * assume that io_size is a power of two, which _should_
+         * be true.
+         */
+        io_size = hfsplus_min_io_size(sb);
+        start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
+        offset = start & (io_size - 1);
+        sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
        bio = bio_alloc(GFP_NOIO, 1);
        bio->bi_sector = sector;
-        bio->bi_bdev = bdev;
+        bio->bi_bdev = sb->s_bdev;
        bio->bi_end_io = hfsplus_end_io_sync;
        bio->bi_private = &wait;
-        /*
+        if (!(rw & WRITE) && data)
-         * We always submit one sector at a time, so bio_add_page must not fail.
+                *data = (u8 *)buf + offset;
-         */
-        if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE,
+        while (io_size > 0) {
-                         offset_in_page(data)) != HFSPLUS_SECTOR_SIZE)
+                unsigned int page_offset = offset_in_page(buf);
-                BUG();
+                unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
+                                         io_size);
+                ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
+                if (ret != len) {
+                        ret = -EIO;
+                        goto out;
+                }
+                io_size -= len;
+                buf = (u8 *)buf + len;
+        }
        submit_bio(rw, bio);
        wait_for_completion(&wait);
@@ -57,8 +99,9 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
        if (!bio_flagged(bio, BIO_UPTODATE))
                ret = -EIO;
+out:
        bio_put(bio);
-        return ret;
+        return ret < 0 ? ret : 0;
 }
 static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
@@ -141,23 +184,19 @@ int hfsplus_read_wrapper(struct super_block *sb)
        if (hfsplus_get_last_session(sb, &part_start, &part_size))
                goto out;
-        if ((u64)part_start + part_size > 0x100000000ULL) {
-                pr_err("hfs: volumes larger than 2TB are not supported yet\n");
-                goto out;
-        }
        error = -ENOMEM;
-        sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+        sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
-        if (!sbi->s_vhdr)
+        if (!sbi->s_vhdr_buf)
                goto out;
-        sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
+        sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
-        if (!sbi->s_backup_vhdr)
+        if (!sbi->s_backup_vhdr_buf)
                goto out_free_vhdr;
 reread:
-        error = hfsplus_submit_bio(sb->s_bdev,
+        error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
-                                   part_start + HFSPLUS_VOLHEAD_SECTOR,
+                                   sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
-                                   sbi->s_vhdr, READ);
+                                   READ);
        if (error)
                goto out_free_backup_vhdr;
@@ -172,8 +211,9 @@ reread:
                if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
                        goto out_free_backup_vhdr;
                wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
-                part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
+                part_start += (sector_t)wd.ablk_start +
-                part_size = wd.embed_count * wd.ablk_size;
+                               (sector_t)wd.embed_start * wd.ablk_size;
+                part_size = (sector_t)wd.embed_count * wd.ablk_size;
                goto reread;
        default:
                /*
@@ -186,9 +226,9 @@ reread:
                goto reread;
        }
-        error = hfsplus_submit_bio(sb->s_bdev,
+        error = hfsplus_submit_bio(sb, part_start + part_size - 2,
-                                   part_start + part_size - 2,
+                                   sbi->s_backup_vhdr_buf,
-                                   sbi->s_backup_vhdr, READ);
+                                   (void **)&sbi->s_backup_vhdr, READ);
        if (error)
                goto out_free_backup_vhdr;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9b45ee84fbcc..3a1dafd228d1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -172,7 +172,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
        tpid = 0;
        if (pid_alive(p)) {
-                struct task_struct *tracer = tracehook_tracer_task(p);
+                struct task_struct *tracer = ptrace_parent(p);
                if (tracer)
                        tpid = task_pid_nr_ns(tracer, ns);
        }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3dc5e2a5cc38..91fb655a5cbf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -216,7 +216,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task)
        if (task_is_stopped_or_traced(task)) {
                int match;
                rcu_read_lock();
-                match = (tracehook_tracer_task(task) == current);
+                match = (ptrace_parent(task) == current);
                rcu_read_unlock();
                if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
                        return mm;
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 87cd0ead8633..fb3b5c813a30 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -78,7 +78,7 @@ static int nothing_to_commit(struct ubifs_info *c)
         * If the root TNC node is dirty, we definitely have something to
         * commit.
         */
-        if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
+        if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode))
                return 0;
        /*
@@ -418,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c)
        spin_lock(&c->cs_lock);
        if (c->cmt_state == COMMIT_BROKEN) {
-                err = -EINVAL;
+                err = -EROFS;
                goto out;
        }
@@ -444,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c)
         * re-check it.
         */
        if (c->cmt_state == COMMIT_BROKEN) {
-                err = -EINVAL;
+                err = -EROFS;
                goto out_cmt_unlock;
        }
@@ -576,7 +576,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
        struct idx_node *i;
        size_t sz;
-        if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
+        if (!dbg_is_chk_index(c))
                return 0;
        INIT_LIST_HEAD(&list);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 0bb2bcef0de9..eef109a1a927 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -27,13 +27,12 @@
 * various local functions of those subsystems.
 */
-#define UBIFS_DBG_PRESERVE_UBI
-#include "ubifs.h"
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/debugfs.h>
 #include <linux/math64.h>
+#include <linux/uaccess.h>
+#include <linux/random.h>
+#include "ubifs.h"
 #ifdef CONFIG_UBIFS_FS_DEBUG
@@ -42,15 +41,6 @@ DEFINE_SPINLOCK(dbg_lock);
 static char dbg_key_buf0[128];
 static char dbg_key_buf1[128];
-unsigned int ubifs_chk_flags;
-unsigned int ubifs_tst_flags;
-module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
-module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug_chks, "Debug check flags");
-MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
 static const char *get_key_fmt(int fmt)
 {
        switch (fmt) {
@@ -91,6 +81,28 @@ static const char *get_key_type(int type)
        }
 }
+static const char *get_dent_type(int type)
+{
+        switch (type) {
+        case UBIFS_ITYPE_REG:
+                return "file";
+        case UBIFS_ITYPE_DIR:
+                return "dir";
+        case UBIFS_ITYPE_LNK:
+                return "symlink";
+        case UBIFS_ITYPE_BLK:
+                return "blkdev";
+        case UBIFS_ITYPE_CHR:
+                return "char dev";
+        case UBIFS_ITYPE_FIFO:
+                return "fifo";
+        case UBIFS_ITYPE_SOCK:
+                return "socket";
+        default:
+                return "unknown/invalid type";
+        }
+}
 static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
                        char *buffer)
 {
@@ -234,9 +246,13 @@ static void dump_ch(const struct ubifs_ch *ch)
        printk(KERN_DEBUG "\tlen            %u\n", le32_to_cpu(ch->len));
 }
-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
+void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
 {
        const struct ubifs_inode *ui = ubifs_inode(inode);
+        struct qstr nm = { .name = NULL };
+        union ubifs_key key;
+        struct ubifs_dent_node *dent, *pdent = NULL;
+        int count = 2;
        printk(KERN_DEBUG "Dump in-memory inode:");
        printk(KERN_DEBUG "\tinode          %lu\n", inode->i_ino);
@@ -270,6 +286,32 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
        printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
        printk(KERN_DEBUG "\tread_in_a_row  %lu\n", ui->read_in_a_row);
        printk(KERN_DEBUG "\tdata_len       %d\n", ui->data_len);
+        if (!S_ISDIR(inode->i_mode))
+                return;
+        printk(KERN_DEBUG "List of directory entries:\n");
+        ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
+        lowest_dent_key(c, &key, inode->i_ino);
+        while (1) {
+                dent = ubifs_tnc_next_ent(c, &key, &nm);
+                if (IS_ERR(dent)) {
+                        if (PTR_ERR(dent) != -ENOENT)
+                                printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent));
+                        break;
+                }
+                printk(KERN_DEBUG "\t%d: %s (%s)\n",
+                       count++, dent->name, get_dent_type(dent->type));
+                nm.name = dent->name;
+                nm.len = le16_to_cpu(dent->nlen);
+                kfree(pdent);
+                pdent = dent;
+                key_read(c, &dent->key, &key);
+        }
+        kfree(pdent);
 }
 void dbg_dump_node(const struct ubifs_info *c, const void *node)
@@ -278,7 +320,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
        union ubifs_key key;
        const struct ubifs_ch *ch = node;
-        if (dbg_failure_mode)
+        if (dbg_is_tst_rcvry(c))
                return;
        /* If the magic is incorrect, just hexdump the first bytes */
@@ -834,7 +876,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
        struct ubifs_scan_node *snod;
        void *buf;
-        if (dbg_failure_mode)
+        if (dbg_is_tst_rcvry(c))
                return;
        printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
@@ -1080,6 +1122,7 @@ out:
 /**
 * dbg_check_synced_i_size - check synchronized inode size.
+ * @c: UBIFS file-system description object
 * @inode: inode to check
 *
 * If inode is clean, synchronized inode size has to be equivalent to current
@@ -1087,12 +1130,12 @@ out:
 * has to be locked). Returns %0 if synchronized inode size if correct, and
 * %-EINVAL if not.
 */
-int dbg_check_synced_i_size(struct inode *inode)
+int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
 {
        int err = 0;
        struct ubifs_inode *ui = ubifs_inode(inode);
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        if (!S_ISREG(inode->i_mode))
                return 0;
@@ -1125,7 +1168,7 @@ int dbg_check_synced_i_size(struct inode *inode)
 * Note, it is good idea to make sure the @dir->i_mutex is locked before
 * calling this function.
 */
-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
+int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
 {
        unsigned int nlink = 2;
        union ubifs_key key;
@@ -1133,7 +1176,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
        struct qstr nm = { .name = NULL };
        loff_t size = UBIFS_INO_NODE_SZ;
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        if (!S_ISDIR(dir->i_mode))
@@ -1167,12 +1210,14 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
                          "but calculated size is %llu", dir->i_ino,
                          (unsigned long long)i_size_read(dir),
                          (unsigned long long)size);
+                dbg_dump_inode(c, dir);
                dump_stack();
                return -EINVAL;
        }
        if (dir->i_nlink != nlink) {
                ubifs_err("directory inode %lu has nlink %u, but calculated "
                          "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
+                dbg_dump_inode(c, dir);
                dump_stack();
                return -EINVAL;
        }
@@ -1489,7 +1534,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
        long clean_cnt = 0, dirty_cnt = 0;
        int err, last;
-        if (!(ubifs_chk_flags & UBIFS_CHK_TNC))
+        if (!dbg_is_chk_index(c))
                return 0;
        ubifs_assert(mutex_is_locked(&c->tnc_mutex));
@@ -1736,7 +1781,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
        int err;
        long long calc = 0;
-        if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ))
+        if (!dbg_is_chk_index(c))
                return 0;
        err = dbg_walk_index(c, NULL, add_size, &calc);
@@ -2312,7 +2357,7 @@ int dbg_check_filesystem(struct ubifs_info *c)
        int err;
        struct fsck_data fsckd;
-        if (!(ubifs_chk_flags & UBIFS_CHK_FS))
+        if (!dbg_is_chk_fs(c))
                return 0;
        fsckd.inodes = RB_ROOT;
@@ -2347,7 +2392,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
        struct list_head *cur;
        struct ubifs_scan_node *sa, *sb;
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2414,7 +2459,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
        struct list_head *cur;
        struct ubifs_scan_node *sa, *sb;
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2491,214 +2536,141 @@ error_dump:
        return 0;
 }
-int dbg_force_in_the_gaps(void)
+static inline int chance(unsigned int n, unsigned int out_of)
 {
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        return !!((random32() % out_of) + 1 <= n);
-                return 0;
-        return !(random32() & 7);
 }
-/* Failure mode for recovery testing */
+static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
-#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
-struct failure_mode_info {
-        struct list_head list;
-        struct ubifs_info *c;
-};
-static LIST_HEAD(fmi_list);
-static DEFINE_SPINLOCK(fmi_lock);
-static unsigned int next;
-static int simple_rand(void)
-{
-        if (next == 0)
-                next = current->pid;
-        next = next * 1103515245 + 12345;
-        return (next >> 16) & 32767;
-}
-static void failure_mode_init(struct ubifs_info *c)
-{
-        struct failure_mode_info *fmi;
-        fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
-        if (!fmi) {
-                ubifs_err("Failed to register failure mode - no memory");
-                return;
-        }
-        fmi->c = c;
-        spin_lock(&fmi_lock);
-        list_add_tail(&fmi->list, &fmi_list);
-        spin_unlock(&fmi_lock);
-}
-static void failure_mode_exit(struct ubifs_info *c)
 {
-        struct failure_mode_info *fmi, *tmp;
+        struct ubifs_debug_info *d = c->dbg;
-        spin_lock(&fmi_lock);
-        list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
-                if (fmi->c == c) {
-                        list_del(&fmi->list);
-                        kfree(fmi);
-                }
-        spin_unlock(&fmi_lock);
-}
-static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
-{
-        struct failure_mode_info *fmi;
-        spin_lock(&fmi_lock);
-        list_for_each_entry(fmi, &fmi_list, list)
-                if (fmi->c->ubi == desc) {
-                        struct ubifs_info *c = fmi->c;
-                        spin_unlock(&fmi_lock);
-                        return c;
-                }
-        spin_unlock(&fmi_lock);
-        return NULL;
-}
-static int in_failure_mode(struct ubi_volume_desc *desc)
-{
-        struct ubifs_info *c = dbg_find_info(desc);
-        if (c && dbg_failure_mode)
-                return c->dbg->failure_mode;
-        return 0;
-}
-static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
+        ubifs_assert(dbg_is_tst_rcvry(c));
-{
-        struct ubifs_info *c = dbg_find_info(desc);
-        struct ubifs_debug_info *d;
-        if (!c || !dbg_failure_mode)
+        if (!d->pc_cnt) {
-                return 0;
+                /* First call - decide delay to the power cut */
-        d = c->dbg;
-        if (d->failure_mode)
-                return 1;
-        if (!d->fail_cnt) {
-                /* First call - decide delay to failure */
                if (chance(1, 2)) {
-                        unsigned int delay = 1 << (simple_rand() >> 11);
+                        unsigned long delay;
                        if (chance(1, 2)) {
-                                d->fail_delay = 1;
+                                d->pc_delay = 1;
-                                d->fail_timeout = jiffies +
+                                /* Fail withing 1 minute */
-                                                  msecs_to_jiffies(delay);
+                                delay = random32() % 60000;
-                                dbg_rcvry("failing after %ums", delay);
+                                d->pc_timeout = jiffies;
+                                d->pc_timeout += msecs_to_jiffies(delay);
+                                ubifs_warn("failing after %lums", delay);
                        } else {
-                                d->fail_delay = 2;
+                                d->pc_delay = 2;
-                                d->fail_cnt_max = delay;
+                                delay = random32() % 10000;
-                                dbg_rcvry("failing after %u calls", delay);
+                                /* Fail within 10000 operations */
+                                d->pc_cnt_max = delay;
+                                ubifs_warn("failing after %lu calls", delay);
                        }
                }
-                d->fail_cnt += 1;
+                d->pc_cnt += 1;
        }
        /* Determine if failure delay has expired */
-        if (d->fail_delay == 1) {
+        if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout))
-                if (time_before(jiffies, d->fail_timeout))
                        return 0;
-        } else if (d->fail_delay == 2)
+        if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max)
-                if (d->fail_cnt++ < d->fail_cnt_max)
                        return 0;
        if (lnum == UBIFS_SB_LNUM) {
-                if (write) {
+                if (write && chance(1, 2))
-                        if (chance(1, 2))
+                        return 0;
-                                return 0;
+                if (chance(19, 20))
-                } else if (chance(19, 20))
                        return 0;
-                dbg_rcvry("failing in super block LEB %d", lnum);
+                ubifs_warn("failing in super block LEB %d", lnum);
        } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
                if (chance(19, 20))
                        return 0;
-                dbg_rcvry("failing in master LEB %d", lnum);
+                ubifs_warn("failing in master LEB %d", lnum);
        } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
-                if (write) {
+                if (write && chance(99, 100))
-                        if (chance(99, 100))
-                                return 0;
-                } else if (chance(399, 400))
                        return 0;
-                dbg_rcvry("failing in log LEB %d", lnum);
+                if (chance(399, 400))
+                        return 0;
+                ubifs_warn("failing in log LEB %d", lnum);
        } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
-                if (write) {
+                if (write && chance(7, 8))
-                        if (chance(7, 8))
-                                return 0;
-                } else if (chance(19, 20))
                        return 0;
-                dbg_rcvry("failing in LPT LEB %d", lnum);
+                if (chance(19, 20))
+                        return 0;
+                ubifs_warn("failing in LPT LEB %d", lnum);
        } else if (lnum >= c->orph_first && lnum <= c->orph_last) {
-                if (write) {
+                if (write && chance(1, 2))
-                        if (chance(1, 2))
+                        return 0;
-                                return 0;
+                if (chance(9, 10))
-                } else if (chance(9, 10))
                        return 0;
-                dbg_rcvry("failing in orphan LEB %d", lnum);
+                ubifs_warn("failing in orphan LEB %d", lnum);
        } else if (lnum == c->ihead_lnum) {
                if (chance(99, 100))
                        return 0;
-                dbg_rcvry("failing in index head LEB %d", lnum);
+                ubifs_warn("failing in index head LEB %d", lnum);
        } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
                if (chance(9, 10))
                        return 0;
-                dbg_rcvry("failing in GC head LEB %d", lnum);
+                ubifs_warn("failing in GC head LEB %d", lnum);
        } else if (write && !RB_EMPTY_ROOT(&c->buds) &&
                   !ubifs_search_bud(c, lnum)) {
                if (chance(19, 20))
                        return 0;
-                dbg_rcvry("failing in non-bud LEB %d", lnum);
+                ubifs_warn("failing in non-bud LEB %d", lnum);
        } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
                   c->cmt_state == COMMIT_RUNNING_REQUIRED) {
                if (chance(999, 1000))
                        return 0;
-                dbg_rcvry("failing in bud LEB %d commit running", lnum);
+                ubifs_warn("failing in bud LEB %d commit running", lnum);
        } else {
                if (chance(9999, 10000))
                        return 0;
-                dbg_rcvry("failing in bud LEB %d commit not running", lnum);
+                ubifs_warn("failing in bud LEB %d commit not running", lnum);
        }
-        ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
-        d->failure_mode = 1;
+        d->pc_happened = 1;
+        ubifs_warn("========== Power cut emulated ==========");
        dump_stack();
        return 1;
 }
-static void cut_data(const void *buf, int len)
+static void cut_data(const void *buf, unsigned int len)
 {
-        int flen, i;
+        unsigned int from, to, i, ffs = chance(1, 2);
        unsigned char *p = (void *)buf;
-        flen = (len * (long long)simple_rand()) >> 15;
+        from = random32() % (len + 1);
-        for (i = flen; i < len; i++)
+        if (chance(1, 2))
-                p[i] = 0xff;
+                to = random32() % (len - from + 1);
-}
+        else
+                to = len;
-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+        if (from < to)
-                 int len, int check)
+                ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
-{
+                           ffs ? "0xFFs" : "random data");
-        if (in_failure_mode(desc))
-                return -EROFS;
+        if (ffs)
-        return ubi_leb_read(desc, lnum, buf, offset, len, check);
+                for (i = from; i < to; i++)
+                        p[i] = 0xFF;
+        else
+                for (i = from; i < to; i++)
+                        p[i] = random32() % 0x100;
 }
-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
+int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
-                  int offset, int len, int dtype)
+                  int offs, int len, int dtype)
 {
        int err, failing;
-        if (in_failure_mode(desc))
+        if (c->dbg->pc_happened)
                return -EROFS;
-        failing = do_fail(desc, lnum, 1);
+        failing = power_cut_emulated(c, lnum, 1);
        if (failing)
                cut_data(buf, len);
-        err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
+        err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
        if (err)
                return err;
        if (failing)
@@ -2706,162 +2678,207 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
        return 0;
 }
-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
+int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf,
                   int len, int dtype)
 {
        int err;
-        if (do_fail(desc, lnum, 1))
+        if (c->dbg->pc_happened)
                return -EROFS;
-        err = ubi_leb_change(desc, lnum, buf, len, dtype);
+        if (power_cut_emulated(c, lnum, 1))
+                return -EROFS;
+        err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
        if (err)
                return err;
-        if (do_fail(desc, lnum, 1))
+        if (power_cut_emulated(c, lnum, 1))
                return -EROFS;
        return 0;
 }
-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
+int dbg_leb_unmap(struct ubifs_info *c, int lnum)
 {
        int err;
-        if (do_fail(desc, lnum, 0))
+        if (c->dbg->pc_happened)
+                return -EROFS;
+        if (power_cut_emulated(c, lnum, 0))
                return -EROFS;
-        err = ubi_leb_erase(desc, lnum);
+        err = ubi_leb_unmap(c->ubi, lnum);
        if (err)
                return err;
-        if (do_fail(desc, lnum, 0))
+        if (power_cut_emulated(c, lnum, 0))
                return -EROFS;
        return 0;
 }
-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
+int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype)
 {
        int err;
-        if (do_fail(desc, lnum, 0))
+        if (c->dbg->pc_happened)
                return -EROFS;
-        err = ubi_leb_unmap(desc, lnum);
+        if (power_cut_emulated(c, lnum, 0))
+                return -EROFS;
+        err = ubi_leb_map(c->ubi, lnum, dtype);
        if (err)
                return err;
-        if (do_fail(desc, lnum, 0))
+        if (power_cut_emulated(c, lnum, 0))
                return -EROFS;
        return 0;
 }
-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
+/*
-{
+ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
-        if (in_failure_mode(desc))
+ * contain the stuff specific to particular file-system mounts.
-                return -EROFS;
+ */
-        return ubi_is_mapped(desc, lnum);
+static struct dentry *dfs_rootdir;
-}
-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
+static int dfs_file_open(struct inode *inode, struct file *file)
 {
-        int err;
+        file->private_data = inode->i_private;
+        return nonseekable_open(inode, file);
-        if (do_fail(desc, lnum, 0))
-                return -EROFS;
-        err = ubi_leb_map(desc, lnum, dtype);
-        if (err)
-                return err;
-        if (do_fail(desc, lnum, 0))
-                return -EROFS;
-        return 0;
 }
 /**
- * ubifs_debugging_init - initialize UBIFS debugging.
+ * provide_user_output - provide output to the user reading a debugfs file.
- * @c: UBIFS file-system description object
+ * @val: boolean value for the answer
+ * @u: the buffer to store the answer at
+ * @count: size of the buffer
+ * @ppos: position in the @u output buffer
 *
- * This function initializes debugging-related data for the file system.
+ * This is a simple helper function which stores @val boolean value in the user
- * Returns zero in case of success and a negative error code in case of
+ * buffer when the user reads one of UBIFS debugfs files. Returns amount of
+ * bytes written to @u in case of success and a negative error code in case of
 * failure.
 */
-int ubifs_debugging_init(struct ubifs_info *c)
+static int provide_user_output(int val, char __user *u, size_t count,
+                               loff_t *ppos)
 {
-        c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
+        char buf[3];
-        if (!c->dbg)
-                return -ENOMEM;
-        failure_mode_init(c);
+        if (val)
-        return 0;
+                buf[0] = '1';
+        else
+                buf[0] = '0';
+        buf[1] = '\n';
+        buf[2] = 0x00;
+        return simple_read_from_buffer(u, count, ppos, buf, 2);
 }
-/**
+static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
- * ubifs_debugging_exit - free debugging data.
+                             loff_t *ppos)
- * @c: UBIFS file-system description object
- */
-void ubifs_debugging_exit(struct ubifs_info *c)
 {
-        failure_mode_exit(c);
+        struct dentry *dent = file->f_path.dentry;
-        kfree(c->dbg);
+        struct ubifs_info *c = file->private_data;
-}
+        struct ubifs_debug_info *d = c->dbg;
+        int val;
+        if (dent == d->dfs_chk_gen)
+                val = d->chk_gen;
+        else if (dent == d->dfs_chk_index)
+                val = d->chk_index;
+        else if (dent == d->dfs_chk_orph)
+                val = d->chk_orph;
+        else if (dent == d->dfs_chk_lprops)
+                val = d->chk_lprops;
+        else if (dent == d->dfs_chk_fs)
+                val = d->chk_fs;
+        else if (dent == d->dfs_tst_rcvry)
+                val = d->tst_rcvry;
+        else
+                return -EINVAL;
-/*
+        return provide_user_output(val, u, count, ppos);
- * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
+}
- * contain the stuff specific to particular file-system mounts.
- */
-static struct dentry *dfs_rootdir;
 /**
- * dbg_debugfs_init - initialize debugfs file-system.
+ * interpret_user_input - interpret user debugfs file input.
+ * @u: user-provided buffer with the input
+ * @count: buffer size
 *
- * UBIFS uses debugfs file-system to expose various debugging knobs to
+ * This is a helper function which interpret user input to a boolean UBIFS
- * user-space. This function creates "ubifs" directory in the debugfs
+ * debugfs file. Returns %0 or %1 in case of success and a negative error code
- * file-system. Returns zero in case of success and a negative error code in
+ * in case of failure.
- * case of failure.
 */
-int dbg_debugfs_init(void)
+static int interpret_user_input(const char __user *u, size_t count)
 {
-        dfs_rootdir = debugfs_create_dir("ubifs", NULL);
+        size_t buf_size;
-        if (IS_ERR(dfs_rootdir)) {
+        char buf[8];
-                int err = PTR_ERR(dfs_rootdir);
-                ubifs_err("cannot create \"ubifs\" debugfs directory, "
-                          "error %d\n", err);
-                return err;
-        }
-        return 0;
+        buf_size = min_t(size_t, count, (sizeof(buf) - 1));
-}
+        if (copy_from_user(buf, u, buf_size))
+                return -EFAULT;
-/**
+        if (buf[0] == '1')
- * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
+                return 1;
- */
+        else if (buf[0] == '0')
-void dbg_debugfs_exit(void)
+                return 0;
-{
-        debugfs_remove(dfs_rootdir);
-}
-static int open_debugfs_file(struct inode *inode, struct file *file)
+        return -EINVAL;
-{
-        file->private_data = inode->i_private;
-        return nonseekable_open(inode, file);
 }
-static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
+static ssize_t dfs_file_write(struct file *file, const char __user *u,
-                                  size_t count, loff_t *ppos)
+                              size_t count, loff_t *ppos)
 {
        struct ubifs_info *c = file->private_data;
        struct ubifs_debug_info *d = c->dbg;
+        struct dentry *dent = file->f_path.dentry;
+        int val;
-        if (file->f_path.dentry == d->dfs_dump_lprops)
+        /*
+         * TODO: this is racy - the file-system might have already been
+         * unmounted and we'd oops in this case. The plan is to fix it with
+         * help of 'iterate_supers_type()' which we should have in v3.0: when
+         * a debugfs opened, we rember FS's UUID in file->private_data. Then
+         * whenever we access the FS via a debugfs file, we iterate all UBIFS
+         * superblocks and fine the one with the same UUID, and take the
+         * locking right.
+         *
+         * The other way to go suggested by Al Viro is to create a separate
+         * 'ubifs-debug' file-system instead.
+         */
+        if (file->f_path.dentry == d->dfs_dump_lprops) {
                dbg_dump_lprops(c);
-        else if (file->f_path.dentry == d->dfs_dump_budg)
+                return count;
+        }
+        if (file->f_path.dentry == d->dfs_dump_budg) {
                dbg_dump_budg(c, &c->bi);
-        else if (file->f_path.dentry == d->dfs_dump_tnc) {
+                return count;
+        }
+        if (file->f_path.dentry == d->dfs_dump_tnc) {
                mutex_lock(&c->tnc_mutex);
                dbg_dump_tnc(c);
                mutex_unlock(&c->tnc_mutex);
-        } else
+                return count;
+        }
+        val = interpret_user_input(u, count);
+        if (val < 0)
+                return val;
+        if (dent == d->dfs_chk_gen)
+                d->chk_gen = val;
+        else if (dent == d->dfs_chk_index)
+                d->chk_index = val;
+        else if (dent == d->dfs_chk_orph)
+                d->chk_orph = val;
+        else if (dent == d->dfs_chk_lprops)
+                d->chk_lprops = val;
+        else if (dent == d->dfs_chk_fs)
+                d->chk_fs = val;
+        else if (dent == d->dfs_tst_rcvry)
+                d->tst_rcvry = val;
+        else
                return -EINVAL;
        return count;
 }
 static const struct file_operations dfs_fops = {
-        .open = open_debugfs_file,
+        .open = dfs_file_open,
-        .write = write_debugfs_file,
+        .read = dfs_file_read,
+        .write = dfs_file_write,
        .owner = THIS_MODULE,
        .llseek = no_llseek,
 };
@@ -2880,12 +2897,20 @@ static const struct file_operations dfs_fops = {
 */
 int dbg_debugfs_init_fs(struct ubifs_info *c)
 {
-        int err;
+        int err, n;
        const char *fname;
        struct dentry *dent;
        struct ubifs_debug_info *d = c->dbg;
-        sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
+        n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
+                     c->vi.ubi_num, c->vi.vol_id);
+        if (n == UBIFS_DFS_DIR_LEN) {
+                /* The array size is too small */
+                fname = UBIFS_DFS_DIR_NAME;
+                dent = ERR_PTR(-EINVAL);
+                goto out;
+        }
        fname = d->dfs_dir_name;
        dent = debugfs_create_dir(fname, dfs_rootdir);
        if (IS_ERR_OR_NULL(dent))
@@ -2910,13 +2935,55 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
                goto out_remove;
        d->dfs_dump_tnc = dent;
+        fname = "chk_general";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+                                   &dfs_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        d->dfs_chk_gen = dent;
+        fname = "chk_index";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+                                   &dfs_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        d->dfs_chk_index = dent;
+        fname = "chk_orphans";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+                                   &dfs_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        d->dfs_chk_orph = dent;
+        fname = "chk_lprops";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+                                   &dfs_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        d->dfs_chk_lprops = dent;
+        fname = "chk_fs";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+                                   &dfs_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        d->dfs_chk_fs = dent;
+        fname = "tst_recovery";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+                                   &dfs_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        d->dfs_tst_rcvry = dent;
        return 0;
 out_remove:
        debugfs_remove_recursive(d->dfs_dir);
 out:
        err = dent ? PTR_ERR(dent) : -ENODEV;
-        ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
+        ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
                  fname, err);
        return err;
 }
@@ -2930,4 +2997,179 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c)
        debugfs_remove_recursive(c->dbg->dfs_dir);
 }
+struct ubifs_global_debug_info ubifs_dbg;
+static struct dentry *dfs_chk_gen;
+static struct dentry *dfs_chk_index;
+static struct dentry *dfs_chk_orph;
+static struct dentry *dfs_chk_lprops;
+static struct dentry *dfs_chk_fs;
+static struct dentry *dfs_tst_rcvry;
+static ssize_t dfs_global_file_read(struct file *file, char __user *u,
+                                    size_t count, loff_t *ppos)
+{
+        struct dentry *dent = file->f_path.dentry;
+        int val;
+        if (dent == dfs_chk_gen)
+                val = ubifs_dbg.chk_gen;
+        else if (dent == dfs_chk_index)
+                val = ubifs_dbg.chk_index;
+        else if (dent == dfs_chk_orph)
+                val = ubifs_dbg.chk_orph;
+        else if (dent == dfs_chk_lprops)
+                val = ubifs_dbg.chk_lprops;
+        else if (dent == dfs_chk_fs)
+                val = ubifs_dbg.chk_fs;
+        else if (dent == dfs_tst_rcvry)
+                val = ubifs_dbg.tst_rcvry;
+        else
+                return -EINVAL;
+        return provide_user_output(val, u, count, ppos);
+}
+static ssize_t dfs_global_file_write(struct file *file, const char __user *u,
+                                     size_t count, loff_t *ppos)
+{
+        struct dentry *dent = file->f_path.dentry;
+        int val;
+        val = interpret_user_input(u, count);
+        if (val < 0)
+                return val;
+        if (dent == dfs_chk_gen)
+                ubifs_dbg.chk_gen = val;
+        else if (dent == dfs_chk_index)
+                ubifs_dbg.chk_index = val;
+        else if (dent == dfs_chk_orph)
+                ubifs_dbg.chk_orph = val;
+        else if (dent == dfs_chk_lprops)
+                ubifs_dbg.chk_lprops = val;
+        else if (dent == dfs_chk_fs)
+                ubifs_dbg.chk_fs = val;
+        else if (dent == dfs_tst_rcvry)
+                ubifs_dbg.tst_rcvry = val;
+        else
+                return -EINVAL;
+        return count;
+}
+static const struct file_operations dfs_global_fops = {
+        .read = dfs_global_file_read,
+        .write = dfs_global_file_write,
+        .owner = THIS_MODULE,
+        .llseek = no_llseek,
+};
+/**
+ * dbg_debugfs_init - initialize debugfs file-system.
+ *
+ * UBIFS uses debugfs file-system to expose various debugging knobs to
+ * user-space. This function creates "ubifs" directory in the debugfs
+ * file-system. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int dbg_debugfs_init(void)
+{
+        int err;
+        const char *fname;
+        struct dentry *dent;
+        fname = "ubifs";
+        dent = debugfs_create_dir(fname, NULL);
+        if (IS_ERR_OR_NULL(dent))
+                goto out;
+        dfs_rootdir = dent;
+        fname = "chk_general";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+                                   &dfs_global_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        dfs_chk_gen = dent;
+        fname = "chk_index";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+                                   &dfs_global_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        dfs_chk_index = dent;
+        fname = "chk_orphans";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+                                   &dfs_global_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        dfs_chk_orph = dent;
+        fname = "chk_lprops";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+                                   &dfs_global_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        dfs_chk_lprops = dent;
+        fname = "chk_fs";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+                                   &dfs_global_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        dfs_chk_fs = dent;
+        fname = "tst_recovery";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+                                   &dfs_global_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        dfs_tst_rcvry = dent;
+        return 0;
+out_remove:
+        debugfs_remove_recursive(dfs_rootdir);
+out:
+        err = dent ? PTR_ERR(dent) : -ENODEV;
+        ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
+                  fname, err);
+        return err;
+}
+/**
+ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
+ */
+void dbg_debugfs_exit(void)
+{
+        debugfs_remove_recursive(dfs_rootdir);
+}
+/**
+ * ubifs_debugging_init - initialize UBIFS debugging.
+ * @c: UBIFS file-system description object
+ *
+ * This function initializes debugging-related data for the file system.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_debugging_init(struct ubifs_info *c)
+{
+        c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
+        if (!c->dbg)
+                return -ENOMEM;
+        return 0;
+}
+/**
+ * ubifs_debugging_exit - free debugging data.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_debugging_exit(struct ubifs_info *c)
+{
+        kfree(c->dbg);
+}
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index a811ac4a26bb..45174b534377 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,18 +31,25 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
 #ifdef CONFIG_UBIFS_FS_DEBUG
-#include <linux/random.h>
+/*
+ * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi"
+ * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
+ */
+#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
+#define UBIFS_DFS_DIR_LEN  (3 + 1 + 2*2 + 1)
 /**
 * ubifs_debug_info - per-FS debugging information.
 * @old_zroot: old index root - used by 'dbg_check_old_index()'
 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
- * @failure_mode: failure mode for recovery testing
+ *
- * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
+ * @pc_happened: non-zero if an emulated power cut happened
- * @fail_timeout: time in jiffies when delay of failure mode expires
+ * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
- * @fail_cnt: current number of calls to failure mode I/O functions
+ * @pc_timeout: time in jiffies when delay of failure mode expires
- * @fail_cnt_max: number of calls by which to delay failure mode
+ * @pc_cnt: current number of calls to failure mode I/O functions
+ * @pc_cnt_max: number of calls by which to delay failure mode
+ *
 * @chk_lpt_sz: used by LPT tree size checker
 * @chk_lpt_sz2: used by LPT tree size checker
 * @chk_lpt_wastage: used by LPT tree size checker
@@ -56,21 +63,36 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
 * @saved_free: saved amount of free space
 * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
 *
+ * @chk_gen: if general extra checks are enabled
+ * @chk_index: if index xtra checks are enabled
+ * @chk_orph: if orphans extra checks are enabled
+ * @chk_lprops: if lprops extra checks are enabled
+ * @chk_fs: if UBIFS contents extra checks are enabled
+ * @tst_rcvry: if UBIFS recovery testing mode enabled
+ *
 * @dfs_dir_name: name of debugfs directory containing this file-system's files
 * @dfs_dir: direntry object of the file-system debugfs directory
 * @dfs_dump_lprops: "dump lprops" debugfs knob
 * @dfs_dump_budg: "dump budgeting information" debugfs knob
 * @dfs_dump_tnc: "dump TNC" debugfs knob
+ * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks
+ * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks
+ * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks
+ * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
+ * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
+ * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
 */
 struct ubifs_debug_info {
        struct ubifs_zbranch old_zroot;
        int old_zroot_level;
        unsigned long long old_zroot_sqnum;
-        int failure_mode;
-        int fail_delay;
+        int pc_happened;
-        unsigned long fail_timeout;
+        int pc_delay;
-        unsigned int fail_cnt;
+        unsigned long pc_timeout;
-        unsigned int fail_cnt_max;
+        unsigned int pc_cnt;
+        unsigned int pc_cnt_max;
        long long chk_lpt_sz;
        long long chk_lpt_sz2;
        long long chk_lpt_wastage;
@@ -84,11 +106,43 @@ struct ubifs_debug_info {
        long long saved_free;
        int saved_idx_gc_cnt;
-        char dfs_dir_name[100];
+        unsigned int chk_gen:1;
+        unsigned int chk_index:1;
+        unsigned int chk_orph:1;
+        unsigned int chk_lprops:1;
+        unsigned int chk_fs:1;
+        unsigned int tst_rcvry:1;
+        char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1];
        struct dentry *dfs_dir;
        struct dentry *dfs_dump_lprops;
        struct dentry *dfs_dump_budg;
        struct dentry *dfs_dump_tnc;
+        struct dentry *dfs_chk_gen;
+        struct dentry *dfs_chk_index;
+        struct dentry *dfs_chk_orph;
+        struct dentry *dfs_chk_lprops;
+        struct dentry *dfs_chk_fs;
+        struct dentry *dfs_tst_rcvry;
+};
+/**
+ * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information.
+ *
+ * @chk_gen: if general extra checks are enabled
+ * @chk_index: if index xtra checks are enabled
+ * @chk_orph: if orphans extra checks are enabled
+ * @chk_lprops: if lprops extra checks are enabled
+ * @chk_fs: if UBIFS contents extra checks are enabled
+ * @tst_rcvry: if UBIFS recovery testing mode enabled
+ */
+struct ubifs_global_debug_info {
+        unsigned int chk_gen:1;
+        unsigned int chk_index:1;
+        unsigned int chk_orph:1;
+        unsigned int chk_lprops:1;
+        unsigned int chk_fs:1;
+        unsigned int tst_rcvry:1;
 };
 #define ubifs_assert(expr) do {                                                \
@@ -127,6 +181,8 @@ const char *dbg_key_str1(const struct ubifs_info *c,
 #define DBGKEY(key) dbg_key_str0(c, (key))
 #define DBGKEY1(key) dbg_key_str1(c, (key))
+extern spinlock_t dbg_lock;
 #define ubifs_dbg_msg(type, fmt, ...) do {                        \
        spin_lock(&dbg_lock);                                     \
        pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
@@ -162,41 +218,36 @@ const char *dbg_key_str1(const struct ubifs_info *c,
 /* Additional recovery messages */
 #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
-/*
+extern struct ubifs_global_debug_info ubifs_dbg;
- * Debugging check flags.
- *
- * UBIFS_CHK_GEN: general checks
- * UBIFS_CHK_TNC: check TNC
- * UBIFS_CHK_IDX_SZ: check index size
- * UBIFS_CHK_ORPH: check orphans
- * UBIFS_CHK_OLD_IDX: check the old index
- * UBIFS_CHK_LPROPS: check lprops
- * UBIFS_CHK_FS: check the file-system
- */
-enum {
-        UBIFS_CHK_GEN     = 0x1,
-        UBIFS_CHK_TNC     = 0x2,
-        UBIFS_CHK_IDX_SZ  = 0x4,
-        UBIFS_CHK_ORPH    = 0x8,
-        UBIFS_CHK_OLD_IDX = 0x10,
-        UBIFS_CHK_LPROPS  = 0x20,
-        UBIFS_CHK_FS      = 0x40,
-};
-/*
- * Special testing flags.
- *
- * UBIFS_TST_RCVRY: failure mode for recovery testing
- */
-enum {
-        UBIFS_TST_RCVRY             = 0x4,
-};
-extern spinlock_t dbg_lock;
-extern unsigned int ubifs_msg_flags;
+static inline int dbg_is_chk_gen(const struct ubifs_info *c)
-extern unsigned int ubifs_chk_flags;
+{
-extern unsigned int ubifs_tst_flags;
+        return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen);
+}
+static inline int dbg_is_chk_index(const struct ubifs_info *c)
+{
+        return !!(ubifs_dbg.chk_index || c->dbg->chk_index);
+}
+static inline int dbg_is_chk_orph(const struct ubifs_info *c)
+{
+        return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph);
+}
+static inline int dbg_is_chk_lprops(const struct ubifs_info *c)
+{
+        return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops);
+}
+static inline int dbg_is_chk_fs(const struct ubifs_info *c)
+{
+        return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs);
+}
+static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)
+{
+        return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry);
+}
+static inline int dbg_is_power_cut(const struct ubifs_info *c)
+{
+        return !!c->dbg->pc_happened;
+}
 int ubifs_debugging_init(struct ubifs_info *c);
 void ubifs_debugging_exit(struct ubifs_info *c);
@@ -207,7 +258,7 @@ const char *dbg_cstate(int cmt_state);
 const char *dbg_jhead(int jhead);
 const char *dbg_get_key_dump(const struct ubifs_info *c,
                             const union ubifs_key *key);
-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
+void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode);
 void dbg_dump_node(const struct ubifs_info *c, const void *node);
 void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
                       int offs);
@@ -240,8 +291,8 @@ int dbg_check_cats(struct ubifs_info *c);
 int dbg_check_ltab(struct ubifs_info *c);
 int dbg_chk_lpt_free_spc(struct ubifs_info *c);
 int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
-int dbg_check_synced_i_size(struct inode *inode);
+int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode);
-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
+int dbg_check_dir(struct ubifs_info *c, const struct inode *dir);
 int dbg_check_tnc(struct ubifs_info *c, int extra);
 int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
 int dbg_check_filesystem(struct ubifs_info *c);
@@ -254,54 +305,12 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
 int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
 int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
-/* Force the use of in-the-gaps method for testing */
+int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
-static inline int dbg_force_in_the_gaps_enabled(void)
+                  int len, int dtype);
-{
+int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
-        return ubifs_chk_flags & UBIFS_CHK_GEN;
+                   int dtype);
-}
+int dbg_leb_unmap(struct ubifs_info *c, int lnum);
-int dbg_force_in_the_gaps(void);
+int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype);
-/* Failure mode for recovery testing */
-#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
-#ifndef UBIFS_DBG_PRESERVE_UBI
-#define ubi_leb_read   dbg_leb_read
-#define ubi_leb_write  dbg_leb_write
-#define ubi_leb_change dbg_leb_change
-#define ubi_leb_erase  dbg_leb_erase
-#define ubi_leb_unmap  dbg_leb_unmap
-#define ubi_is_mapped  dbg_is_mapped
-#define ubi_leb_map    dbg_leb_map
-#endif
-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
-                 int len, int check);
-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
-                  int offset, int len, int dtype);
-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
-                   int len, int dtype);
-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
-static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
-                           int offset, int len)
-{
-        return dbg_leb_read(desc, lnum, buf, offset, len, 0);
-}
-static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
-                            const void *buf, int offset, int len)
-{
-        return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
-}
-static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
-                                    const void *buf, int len)
-{
-        return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
-}
 /* Debugfs-related stuff */
 int dbg_debugfs_init(void);
@@ -313,7 +322,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 /* Use "if (0)" to make compiler check arguments even if debugging is off */
 #define ubifs_assert(expr)  do {                                               \
-        if (0 && (expr))                                                       \
+        if (0)                                                                 \
                printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
                       __func__, __LINE__, current->pid);                      \
 } while (0)
@@ -323,6 +332,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
                ubifs_err(fmt, ##__VA_ARGS__);     \
 } while (0)
+#define DBGKEY(key)  ((char *)(key))
+#define DBGKEY1(key) ((char *)(key))
 #define ubifs_dbg_msg(fmt, ...) do {               \
        if (0)                                     \
                pr_debug(fmt "\n", ##__VA_ARGS__); \
@@ -346,9 +358,6 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 #define dbg_scan(fmt, ...)  ubifs_dbg_msg(fmt, ##__VA_ARGS__)
 #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define DBGKEY(key)  ((char *)(key))
-#define DBGKEY1(key) ((char *)(key))
 static inline int ubifs_debugging_init(struct ubifs_info *c)      { return 0; }
 static inline void ubifs_debugging_exit(struct ubifs_info *c)     { return; }
 static inline const char *dbg_ntype(int type)                     { return ""; }
@@ -357,7 +366,7 @@ static inline const char *dbg_jhead(int jhead)                    { return ""; }
 static inline const char *
 dbg_get_key_dump(const struct ubifs_info *c,
                 const union ubifs_key *key)                      { return ""; }
-static inline void dbg_dump_inode(const struct ubifs_info *c,
+static inline void dbg_dump_inode(struct ubifs_info *c,
                                  const struct inode *inode)      { return; }
 static inline void dbg_dump_node(const struct ubifs_info *c,
                                 const void *node)                { return; }
@@ -409,9 +418,11 @@ static inline int dbg_check_ltab(struct ubifs_info *c)            { return 0; }
 static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c)      { return 0; }
 static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
                                 int action, int len)             { return 0; }
-static inline int dbg_check_synced_i_size(struct inode *inode)    { return 0; }
+static inline int
-static inline int dbg_check_dir_size(struct ubifs_info *c,
+dbg_check_synced_i_size(const struct ubifs_info *c,
-                                     const struct inode *dir)     { return 0; }
+                        struct inode *inode)                      { return 0; }
+static inline int dbg_check_dir(struct ubifs_info *c,
+                                const struct inode *dir)          { return 0; }
 static inline int dbg_check_tnc(struct ubifs_info *c, int extra)  { return 0; }
 static inline int dbg_check_idx_size(struct ubifs_info *c,
                                     long long idx_size)          { return 0; }
@@ -431,9 +442,23 @@ static inline int
 dbg_check_nondata_nodes_order(struct ubifs_info *c,
                              struct list_head *head)             { return 0; }
-static inline int dbg_force_in_the_gaps(void)                     { return 0; }
+static inline int dbg_leb_write(struct ubifs_info *c, int lnum,
-#define dbg_force_in_the_gaps_enabled() 0
+                                const void *buf, int offset,
-#define dbg_failure_mode                0
+                                int len, int dtype)               { return 0; }
+static inline int dbg_leb_change(struct ubifs_info *c, int lnum,
+                                 const void *buf, int len,
+                                 int dtype)                       { return 0; }
+static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum)   { return 0; }
+static inline int dbg_leb_map(struct ubifs_info *c, int lnum,
+                              int dtype)                          { return 0; }
+static inline int dbg_is_chk_gen(const struct ubifs_info *c)      { return 0; }
+static inline int dbg_is_chk_index(const struct ubifs_info *c)    { return 0; }
+static inline int dbg_is_chk_orph(const struct ubifs_info *c)     { return 0; }
+static inline int dbg_is_chk_lprops(const struct ubifs_info *c)   { return 0; }
+static inline int dbg_is_chk_fs(const struct ubifs_info *c)       { return 0; }
+static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)    { return 0; }
+static inline int dbg_is_power_cut(const struct ubifs_info *c)    { return 0; }
 static inline int dbg_debugfs_init(void)                          { return 0; }
 static inline void dbg_debugfs_exit(void)                         { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ef5abd38f0bf..683492043317 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
         * UBIFS has to fully control "clean <-> dirty" transitions of inodes
         * to make budgeting work.
         */
-        inode->i_flags |= (S_NOCMTIME);
+        inode->i_flags |= S_NOCMTIME;
        inode_init_owner(inode, dir, mode);
        inode->i_mtime = inode->i_atime = inode->i_ctime =
@@ -172,9 +172,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
 #ifdef CONFIG_UBIFS_FS_DEBUG
-static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
+static int dbg_check_name(const struct ubifs_info *c,
+                          const struct ubifs_dent_node *dent,
+                          const struct qstr *nm)
 {
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        if (le16_to_cpu(dent->nlen) != nm->len)
                return -EINVAL;
@@ -185,7 +187,7 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
 #else
-#define dbg_check_name(dent, nm) 0
+#define dbg_check_name(c, dent, nm) 0
 #endif
@@ -219,7 +221,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
                goto out;
        }
-        if (dbg_check_name(dent, &dentry->d_name)) {
+        if (dbg_check_name(c, dent, &dentry->d_name)) {
                err = -EINVAL;
                goto out;
        }
@@ -522,7 +524,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
        ubifs_assert(mutex_is_locked(&dir->i_mutex));
        ubifs_assert(mutex_is_locked(&inode->i_mutex));
-        err = dbg_check_synced_i_size(inode);
+        err = dbg_check_synced_i_size(c, inode);
        if (err)
                return err;
@@ -577,7 +579,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
                inode->i_nlink, dir->i_ino);
        ubifs_assert(mutex_is_locked(&dir->i_mutex));
        ubifs_assert(mutex_is_locked(&inode->i_mutex));
-        err = dbg_check_synced_i_size(inode);
+        err = dbg_check_synced_i_size(c, inode);
        if (err)
                return err;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 89ef9a2f7837..f9c234bf33d3 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1263,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
        if (err)
                return err;
-        err = dbg_check_synced_i_size(inode);
+        err = dbg_check_synced_i_size(c, inode);
        if (err)
                return err;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3be645e012c9..9228950a658f 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -86,8 +86,125 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
                c->no_chk_data_crc = 0;
                c->vfs_sb->s_flags |= MS_RDONLY;
                ubifs_warn("switched to read-only mode, error %d", err);
+                dump_stack();
+        }
+}
+/*
+ * Below are simple wrappers over UBI I/O functions which include some
+ * additional checks and UBIFS debugging stuff. See corresponding UBI function
+ * for more information.
+ */
+int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
+                   int len, int even_ebadmsg)
+{
+        int err;
+        err = ubi_read(c->ubi, lnum, buf, offs, len);
+        /*
+         * In case of %-EBADMSG print the error message only if the
+         * @even_ebadmsg is true.
+         */
+        if (err && (err != -EBADMSG || even_ebadmsg)) {
+                ubifs_err("reading %d bytes from LEB %d:%d failed, error %d",
+                          len, lnum, offs, err);
+                dbg_dump_stack();
+        }
+        return err;
+}
+int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+                    int len, int dtype)
+{
+        int err;
+        ubifs_assert(!c->ro_media && !c->ro_mount);
+        if (c->ro_error)
+                return -EROFS;
+        if (!dbg_is_tst_rcvry(c))
+                err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
+        else
+                err = dbg_leb_write(c, lnum, buf, offs, len, dtype);
+        if (err) {
+                ubifs_err("writing %d bytes to LEB %d:%d failed, error %d",
+                          len, lnum, offs, err);
+                ubifs_ro_mode(c, err);
+                dbg_dump_stack();
+        }
+        return err;
+}
+int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
+                     int dtype)
+{
+        int err;
+        ubifs_assert(!c->ro_media && !c->ro_mount);
+        if (c->ro_error)
+                return -EROFS;
+        if (!dbg_is_tst_rcvry(c))
+                err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
+        else
+                err = dbg_leb_change(c, lnum, buf, len, dtype);
+        if (err) {
+                ubifs_err("changing %d bytes in LEB %d failed, error %d",
+                          len, lnum, err);
+                ubifs_ro_mode(c, err);
+                dbg_dump_stack();
+        }
+        return err;
+}
+int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
+{
+        int err;
+        ubifs_assert(!c->ro_media && !c->ro_mount);
+        if (c->ro_error)
+                return -EROFS;
+        if (!dbg_is_tst_rcvry(c))
+                err = ubi_leb_unmap(c->ubi, lnum);
+        else
+                err = dbg_leb_unmap(c, lnum);
+        if (err) {
+                ubifs_err("unmap LEB %d failed, error %d", lnum, err);
+                ubifs_ro_mode(c, err);
+                dbg_dump_stack();
+        }
+        return err;
+}
+int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype)
+{
+        int err;
+        ubifs_assert(!c->ro_media && !c->ro_mount);
+        if (c->ro_error)
+                return -EROFS;
+        if (!dbg_is_tst_rcvry(c))
+                err = ubi_leb_map(c->ubi, lnum, dtype);
+        else
+                err = dbg_leb_map(c, lnum, dtype);
+        if (err) {
+                ubifs_err("mapping LEB %d failed, error %d", lnum, err);
+                ubifs_ro_mode(c, err);
+                dbg_dump_stack();
+        }
+        return err;
+}
+int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
+{
+        int err;
+        err = ubi_is_mapped(c->ubi, lnum);
+        if (err < 0) {
+                ubifs_err("ubi_is_mapped failed for LEB %d, error %d",
+                          lnum, err);
                dbg_dump_stack();
        }
+        return err;
 }
 /**
@@ -406,14 +523,10 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
        dirt = sync_len - wbuf->used;
        if (dirt)
                ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
-        err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+        err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len,
-                            sync_len, wbuf->dtype);
+                              wbuf->dtype);
-        if (err) {
+        if (err)
-                ubifs_err("cannot write %d bytes to LEB %d:%d",
-                          sync_len, wbuf->lnum, wbuf->offs);
-                dbg_dump_stack();
                return err;
-        }
        spin_lock(&wbuf->lock);
        wbuf->offs += sync_len;
@@ -605,9 +718,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                if (aligned_len == wbuf->avail) {
                        dbg_io("flush jhead %s wbuf to LEB %d:%d",
                               dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
-                        err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
+                        err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf,
-                                            wbuf->offs, wbuf->size,
+                                              wbuf->offs, wbuf->size,
-                                            wbuf->dtype);
+                                              wbuf->dtype);
                        if (err)
                                goto out;
@@ -642,8 +755,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                dbg_io("flush jhead %s wbuf to LEB %d:%d",
                       dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
                memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
-                err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
+                err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs,
-                                    wbuf->size, wbuf->dtype);
+                                      wbuf->size, wbuf->dtype);
                if (err)
                        goto out;
@@ -661,8 +774,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                 */
                dbg_io("write %d bytes to LEB %d:%d",
                       wbuf->size, wbuf->lnum, wbuf->offs);
-                err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
+                err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs,
-                                    wbuf->size, wbuf->dtype);
+                                      wbuf->size, wbuf->dtype);
                if (err)
                        goto out;
@@ -683,8 +796,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                n <<= c->max_write_shift;
                dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
                       wbuf->offs);
-                err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
+                err = ubifs_leb_write(c, wbuf->lnum, buf + written,
-                                    wbuf->offs, n, wbuf->dtype);
+                                      wbuf->offs, n, wbuf->dtype);
                if (err)
                        goto out;
                wbuf->offs += n;
@@ -766,13 +879,9 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
                return -EROFS;
        ubifs_prepare_node(c, buf, len, 1);
-        err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
+        err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype);
-        if (err) {
+        if (err)
-                ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
-                          buf_len, lnum, offs, err);
                dbg_dump_node(c, buf);
-                dbg_dump_stack();
-        }
        return err;
 }
@@ -824,13 +933,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
        if (rlen > 0) {
                /* Read everything that goes before write-buffer */
-                err = ubi_read(c->ubi, lnum, buf, offs, rlen);
+                err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
-                if (err && err != -EBADMSG) {
+                if (err && err != -EBADMSG)
-                        ubifs_err("failed to read node %d from LEB %d:%d, "
-                                  "error %d", type, lnum, offs, err);
-                        dbg_dump_stack();
                        return err;
-                }
        }
        if (type != ch->node_type) {
@@ -885,12 +990,9 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
        ubifs_assert(!(offs & 7) && offs < c->leb_size);
        ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
-        err = ubi_read(c->ubi, lnum, buf, offs, len);
+        err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
-        if (err && err != -EBADMSG) {
+        if (err && err != -EBADMSG)
-                ubifs_err("cannot read node %d from LEB %d:%d, error %d",
-                          type, lnum, offs, err);
                return err;
-        }
        if (type != ch->node_type) {
                ubifs_err("bad node type (%d but expected %d)",
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index affea9494ae2..f9fd068d1ae0 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -262,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
                 * an unclean reboot, because the target LEB might have been
                 * unmapped, but not yet physically erased.
                 */
-                err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
+                err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM);
                if (err)
                        goto out_unlock;
        }
@@ -283,8 +283,6 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
        return 0;
 out_unlock:
-        if (err != -EAGAIN)
-                ubifs_ro_mode(c, err);
        mutex_unlock(&c->log_mutex);
        kfree(ref);
        kfree(bud);
@@ -752,7 +750,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
        struct ubifs_bud *bud;
        long long bud_bytes = 0;
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        spin_lock(&c->buds_lock);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 667884f4a615..f8a181e647cc 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
        pnode = (struct ubifs_pnode *)container_of(lprops - pos,
                                                   struct ubifs_pnode,
                                                   lprops[0]);
-        return !test_bit(COW_ZNODE, &pnode->flags) &&
+        return !test_bit(COW_CNODE, &pnode->flags) &&
               test_bit(DIRTY_CNODE, &pnode->flags);
 }
@@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c)
        struct list_head *pos;
        int i, cat;
-        if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+        if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
                return 0;
        list_for_each_entry(lprops, &c->empty_list, list) {
@@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
 {
        int i = 0, j, err = 0;
-        if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+        if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
                return;
        for (i = 0; i < heap->cnt; i++) {
@@ -1262,7 +1262,7 @@ int dbg_check_lprops(struct ubifs_info *c)
        int i, err;
        struct ubifs_lp_stats lst;
-        if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+        if (!dbg_is_chk_lprops(c))
                return 0;
        /*
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ef5155e109a2..6189c74d97f0 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
                        alen = ALIGN(len, c->min_io_size);
                        set_ltab(c, lnum, c->leb_size - alen, alen - len);
                        memset(p, 0xff, alen - len);
-                        err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+                        err = ubifs_leb_change(c, lnum++, buf, alen,
-                                             UBI_SHORTTERM);
+                                               UBI_SHORTTERM);
                        if (err)
                                goto out;
                        p = buf;
@@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
                                set_ltab(c, lnum, c->leb_size - alen,
                                            alen - len);
                                memset(p, 0xff, alen - len);
-                                err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+                                err = ubifs_leb_change(c, lnum++, buf, alen,
-                                                     UBI_SHORTTERM);
+                                                       UBI_SHORTTERM);
                                if (err)
                                        goto out;
                                p = buf;
@@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
                        alen = ALIGN(len, c->min_io_size);
                        set_ltab(c, lnum, c->leb_size - alen, alen - len);
                        memset(p, 0xff, alen - len);
-                        err = ubi_leb_change(c->ubi, lnum++, buf, alen,
+                        err = ubifs_leb_change(c, lnum++, buf, alen,
-                                             UBI_SHORTTERM);
+                                               UBI_SHORTTERM);
                        if (err)
                                goto out;
                        p = buf;
@@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
                alen = ALIGN(len, c->min_io_size);
                set_ltab(c, lnum, c->leb_size - alen, alen - len);
                memset(p, 0xff, alen - len);
-                err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM);
+                err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM);
                if (err)
                        goto out;
                p = buf;
@@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
        /* Write remaining buffer */
        memset(p, 0xff, alen - len);
-        err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM);
+        err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM);
        if (err)
                goto out;
@@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
                if (c->big_lpt)
                        nnode->num = calc_nnode_num_from_parent(c, parent, iip);
        } else {
-                err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
+                err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1);
                if (err)
                        goto out;
                err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1247,6 +1247,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
 out:
        ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
+        dbg_dump_stack();
        kfree(nnode);
        return err;
 }
@@ -1290,7 +1291,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
                        lprops->flags = ubifs_categorize_lprops(c, lprops);
                }
        } else {
-                err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz);
+                err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1);
                if (err)
                        goto out;
                err = unpack_pnode(c, buf, pnode);
@@ -1312,6 +1313,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
 out:
        ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
        dbg_dump_pnode(c, pnode, parent, iip);
+        dbg_dump_stack();
        dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
        kfree(pnode);
        return err;
@@ -1331,7 +1333,7 @@ static int read_ltab(struct ubifs_info *c)
        buf = vmalloc(c->ltab_sz);
        if (!buf)
                return -ENOMEM;
-        err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz);
+        err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1);
        if (err)
                goto out;
        err = unpack_ltab(c, buf);
@@ -1354,7 +1356,8 @@ static int read_lsave(struct ubifs_info *c)
        buf = vmalloc(c->lsave_sz);
        if (!buf)
                return -ENOMEM;
-        err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz);
+        err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs,
+                             c->lsave_sz, 1);
        if (err)
                goto out;
        err = unpack_lsave(c, buf);
@@ -1814,8 +1817,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
                if (c->big_lpt)
                        nnode->num = calc_nnode_num_from_parent(c, parent, iip);
        } else {
-                err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
+                err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
-                               c->nnode_sz);
+                                     c->nnode_sz, 1);
                if (err)
                        return ERR_PTR(err);
                err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1883,8 +1886,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
                ubifs_assert(branch->lnum >= c->lpt_first &&
                             branch->lnum <= c->lpt_last);
                ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
-                err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
+                err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
-                               c->pnode_sz);
+                                     c->pnode_sz, 1);
                if (err)
                        return ERR_PTR(err);
                err = unpack_pnode(c, buf, pnode);
@@ -2224,7 +2227,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
        struct ubifs_cnode *cn;
        int num, iip = 0, err;
-        if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+        if (!dbg_is_chk_lprops(c))
                return 0;
        while (cnode) {
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index dfcb5748a7dc..cddd6bd214f4 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -27,6 +27,7 @@
 #include <linux/crc16.h>
 #include <linux/slab.h>
+#include <linux/random.h>
 #include "ubifs.h"
 #ifdef CONFIG_UBIFS_FS_DEBUG
@@ -116,8 +117,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c)
                return 0;
        cnt += 1;
        while (1) {
-                ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags));
+                ubifs_assert(!test_bit(COW_CNODE, &cnode->flags));
-                __set_bit(COW_ZNODE, &cnode->flags);
+                __set_bit(COW_CNODE, &cnode->flags);
                cnext = next_dirty_cnode(cnode);
                if (!cnext) {
                        cnode->cnext = c->lpt_cnext;
@@ -465,7 +466,7 @@ static int write_cnodes(struct ubifs_info *c)
                 */
                clear_bit(DIRTY_CNODE, &cnode->flags);
                smp_mb__before_clear_bit();
-                clear_bit(COW_ZNODE, &cnode->flags);
+                clear_bit(COW_CNODE, &cnode->flags);
                smp_mb__after_clear_bit();
                offs += len;
                dbg_chk_lpt_sz(c, 1, len);
@@ -1160,11 +1161,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
        void *buf = c->lpt_buf;
        dbg_lp("LEB %d", lnum);
-        err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
-        if (err) {
+        err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
-                ubifs_err("cannot read LEB %d, error %d", lnum, err);
+        if (err)
                return err;
-        }
        while (1) {
                if (!is_a_node(c, buf, len)) {
                        int pad_len;
@@ -1640,7 +1641,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
        int ret;
        void *buf, *p;
-        if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+        if (!dbg_is_chk_lprops(c))
                return 0;
        buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
@@ -1650,11 +1651,11 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
        }
        dbg_lp("LEB %d", lnum);
-        err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
-        if (err) {
+        err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
-                dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
+        if (err)
                goto out;
-        }
        while (1) {
                if (!is_a_node(c, p, len)) {
                        int i, pad_len;
@@ -1711,7 +1712,7 @@ int dbg_check_ltab(struct ubifs_info *c)
 {
        int lnum, err, i, cnt;
-        if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+        if (!dbg_is_chk_lprops(c))
                return 0;
        /* Bring the entire tree into memory */
@@ -1754,7 +1755,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
        long long free = 0;
        int i;
-        if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+        if (!dbg_is_chk_lprops(c))
                return 0;
        for (i = 0; i < c->lpt_lebs; i++) {
@@ -1796,7 +1797,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
        long long chk_lpt_sz, lpt_sz;
        int err = 0;
-        if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+        if (!dbg_is_chk_lprops(c))
                return 0;
        switch (action) {
@@ -1901,11 +1902,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
                return;
        }
-        err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
+        err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
-        if (err) {
+        if (err)
-                ubifs_err("cannot read LEB %d, error %d", lnum, err);
                goto out;
-        }
        while (1) {
                offs = c->leb_size - len;
                if (!is_a_node(c, p, len)) {
@@ -2019,7 +2019,7 @@ static int dbg_populate_lsave(struct ubifs_info *c)
        struct ubifs_lpt_heap *heap;
        int i;
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        if (random32() & 3)
                return 0;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 0b5296a9a4c5..ee7cb5ebb6e8 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
 }
 /**
+ * ubifs_zn_obsolete - check if znode is obsolete.
+ * @znode: znode to check
+ *
+ * This helper function returns %1 if @znode is obsolete and %0 otherwise.
+ */
+static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode)
+{
+        return !!test_bit(OBSOLETE_ZNODE, &znode->flags);
+}
+/**
+ * ubifs_zn_cow - check if znode has to be copied on write.
+ * @znode: znode to check
+ *
+ * This helper function returns %1 if @znode is has COW flag set and %0
+ * otherwise.
+ */
+static inline int ubifs_zn_cow(const struct ubifs_znode *znode)
+{
+        return !!test_bit(COW_ZNODE, &znode->flags);
+}
+/**
 * ubifs_wake_up_bgt - wake up background thread.
 * @c: UBIFS file-system description object
 */
@@ -122,86 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
 }
 /**
- * ubifs_leb_unmap - unmap an LEB.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to unmap
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
-{
-        int err;
-        ubifs_assert(!c->ro_media && !c->ro_mount);
-        if (c->ro_error)
-                return -EROFS;
-        err = ubi_leb_unmap(c->ubi, lnum);
-        if (err) {
-                ubifs_err("unmap LEB %d failed, error %d", lnum, err);
-                return err;
-        }
-        return 0;
-}
-/**
- * ubifs_leb_write - write to a LEB.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to write
- * @buf: buffer to write from
- * @offs: offset within LEB to write to
- * @len: length to write
- * @dtype: data type
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
-                                  const void *buf, int offs, int len, int dtype)
-{
-        int err;
-        ubifs_assert(!c->ro_media && !c->ro_mount);
-        if (c->ro_error)
-                return -EROFS;
-        err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
-        if (err) {
-                ubifs_err("writing %d bytes at %d:%d, error %d",
-                          len, lnum, offs, err);
-                return err;
-        }
-        return 0;
-}
-/**
- * ubifs_leb_change - atomic LEB change.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to write
- * @buf: buffer to write from
- * @len: length to write
- * @dtype: data type
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
-                                   const void *buf, int len, int dtype)
-{
-        int err;
-        ubifs_assert(!c->ro_media && !c->ro_mount);
-        if (c->ro_error)
-                return -EROFS;
-        err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
-        if (err) {
-                ubifs_err("changing %d bytes in LEB %d, error %d",
-                          len, lnum, err);
-                return err;
-        }
-        return 0;
-}
-/**
 * ubifs_encode_dev - encode device node IDs.
 * @dev: UBIFS device node information
 * @rdev: device IDs to encode
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index a5422fffbd69..c542c73cfa3c 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -929,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c)
        struct check_info ci;
        int err;
-        if (!(ubifs_chk_flags & UBIFS_CHK_ORPH))
+        if (!dbg_is_chk_orph(c))
                return 0;
        ci.last_ino = 0;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 783d8e0beb76..af02790d9328 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -117,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
        if (!sbuf)
                return -ENOMEM;
-        err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
+        err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
        if (err && err != -EBADMSG)
                goto out_free;
@@ -213,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
        mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
        ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
-        err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
+        err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM);
        if (err)
                goto out;
-        err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
+        err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM);
        if (err)
                goto out;
 out:
@@ -274,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c)
                                if (cor1)
                                        goto out_err;
                                mst = mst1;
-                        } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
+                        } else if (offs1 == 0 &&
+                                   c->leb_size - offs2 - sz < sz) {
                                /* 1st LEB was unmapped and written, 2nd not */
                                if (cor1)
                                        goto out_err;
@@ -539,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
                        int len = ALIGN(endpt, c->min_io_size);
                        if (start) {
-                                err = ubi_read(c->ubi, lnum, sleb->buf, 0,
+                                err = ubifs_leb_read(c, lnum, sleb->buf, 0,
-                                               start);
+                                                     start, 1);
                                if (err)
                                        return err;
                        }
@@ -554,8 +555,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
                                        ubifs_pad(c, buf, pad_len);
                                }
                        }
-                        err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
+                        err = ubifs_leb_change(c, lnum, sleb->buf, len,
-                                             UBI_UNKNOWN);
+                                               UBI_UNKNOWN);
                        if (err)
                                return err;
                }
@@ -819,7 +820,8 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
                return -ENOMEM;
        if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
                goto out_err;
-        err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
+        err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
+                             UBIFS_CS_NODE_SZ, 0);
        if (err && err != -EBADMSG)
                goto out_free;
        ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
@@ -919,8 +921,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 *
 * This function returns %0 on success and a negative error code on failure.
 */
-static int recover_head(const struct ubifs_info *c, int lnum, int offs,
+static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
-                        void *sbuf)
 {
        int len = c->max_write_size, err;
@@ -931,15 +932,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
                return 0;
        /* Read at the head location and check it is empty flash */
-        err = ubi_read(c->ubi, lnum, sbuf, offs, len);
+        err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
        if (err || !is_empty(sbuf, len)) {
                dbg_rcvry("cleaning head at %d:%d", lnum, offs);
                if (offs == 0)
                        return ubifs_leb_unmap(c, lnum);
-                err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
+                err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
                if (err)
                        return err;
-                return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
+                return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN);
        }
        return 0;
@@ -962,7 +963,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
 *
 * This function returns %0 on success and a negative error code on failure.
 */
-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
+int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
 {
        int err;
@@ -993,7 +994,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
 *
 * This function returns %0 on success and a negative error code on failure.
 */
-static int clean_an_unclean_leb(const struct ubifs_info *c,
+static int clean_an_unclean_leb(struct ubifs_info *c,
                                struct ubifs_unclean_leb *ucleb, void *sbuf)
 {
        int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
@@ -1009,7 +1010,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
                return 0;
        }
-        err = ubi_read(c->ubi, lnum, buf, offs, len);
+        err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
        if (err && err != -EBADMSG)
                return err;
@@ -1069,7 +1070,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
        }
        /* Write back the LEB atomically */
-        err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
+        err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN);
        if (err)
                return err;
@@ -1089,7 +1090,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
 *
 * This function returns %0 on success and a negative error code on failure.
 */
-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
+int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
 {
        dbg_rcvry("recovery");
        while (!list_empty(&c->unclean_leb_list)) {
@@ -1454,7 +1455,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
        if (i_size >= e->d_size)
                return 0;
        /* Read the LEB */
-        err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
+        err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
        if (err)
                goto out;
        /* Change the size field and recalculate the CRC */
@@ -1470,7 +1471,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
                len -= 1;
        len = ALIGN(len + 1, c->min_io_size);
        /* Atomically write the fixed LEB back again */
-        err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+        err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
        if (err)
                goto out;
        dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 5e97161ce4d3..ccabaf1164b3 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -523,8 +523,7 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
        if (!list_is_last(&next->list, &jh->buds_list))
                return 0;
-        err = ubi_read(c->ubi, next->lnum, (char *)&data,
+        err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
-                       next->start, 4);
        if (err)
                return 0;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index c606f010e8df..93d938ad3d2a 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -674,15 +674,15 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len)
        if (len == 0) {
                dbg_mnt("unmap empty LEB %d", lnum);
-                return ubi_leb_unmap(c->ubi, lnum);
+                return ubifs_leb_unmap(c, lnum);
        }
        dbg_mnt("fixup LEB %d, data len %d", lnum, len);
-        err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
+        err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1);
        if (err)
                return err;
-        return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+        return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
 }
 /**
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 36216b46f772..37383e8011b1 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
        INIT_LIST_HEAD(&sleb->nodes);
        sleb->buf = sbuf;
-        err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs);
+        err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
        if (err && err != -EBADMSG) {
                ubifs_err("cannot read %d bytes from LEB %d:%d,"
                          " error %d", c->leb_size - offs, lnum, offs, err);
@@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
        int len;
        ubifs_err("corruption at LEB %d:%d", lnum, offs);
-        if (dbg_failure_mode)
+        if (dbg_is_tst_rcvry(c))
                return;
        len = c->leb_size - offs;
        if (len > 8192)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 529be0582029..b28121278d46 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
        if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
                return 4;
-        if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG)
+        if (ui->xattr && !S_ISREG(inode->i_mode))
                return 5;
        if (!ubifs_compr_present(ui->compr_type)) {
@@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
                           ubifs_compr_name(ui->compr_type));
        }
-        err = dbg_check_dir_size(c, inode);
+        err = dbg_check_dir(c, inode);
        return err;
 }
@@ -914,7 +914,7 @@ static int check_volume_empty(struct ubifs_info *c)
        c->empty = 1;
        for (lnum = 0; lnum < c->leb_cnt; lnum++) {
-                err = ubi_is_mapped(c->ubi, lnum);
+                err = ubifs_is_mapped(c, lnum);
                if (unlikely(err < 0))
                        return err;
                if (err == 1) {
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 91b4213dde84..066738647685 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c,
        __set_bit(DIRTY_ZNODE, &zn->flags);
        __clear_bit(COW_ZNODE, &zn->flags);
-        ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+        ubifs_assert(!ubifs_zn_obsolete(znode));
        __set_bit(OBSOLETE_ZNODE, &znode->flags);
        if (znode->level != 0) {
@@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
        struct ubifs_znode *zn;
        int err;
-        if (!test_bit(COW_ZNODE, &znode->flags)) {
+        if (!ubifs_zn_cow(znode)) {
                /* znode is not being committed */
                if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
                        atomic_long_inc(&c->dirty_zn_cnt);
@@ -462,7 +462,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
        dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
-        err = ubi_read(c->ubi, lnum, buf, offs, len);
+        err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
        if (err) {
                ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
                          type, lnum, offs, err);
@@ -1666,7 +1666,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
        if (!overlap) {
                /* We may safely unlock the write-buffer and read the data */
                spin_unlock(&wbuf->lock);
-                return ubi_read(c->ubi, lnum, buf, offs, len);
+                return ubifs_leb_read(c, lnum, buf, offs, len, 0);
        }
        /* Don't read under wbuf */
@@ -1680,7 +1680,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
        if (rlen > 0)
                /* Read everything that goes before write-buffer */
-                return ubi_read(c->ubi, lnum, buf, offs, rlen);
+                return ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
        return 0;
 }
@@ -1767,7 +1767,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
        if (wbuf)
                err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
        else
-                err = ubi_read(c->ubi, lnum, bu->buf, offs, len);
+                err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0);
        /* Check for a race with GC */
        if (maybe_leb_gced(c, lnum, bu->gc_seq))
@@ -2423,7 +2423,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
         */
        do {
-                ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+                ubifs_assert(!ubifs_zn_obsolete(znode));
                ubifs_assert(ubifs_zn_dirty(znode));
                zp = znode->parent;
@@ -2479,9 +2479,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
                        c->zroot.offs = zbr->offs;
                        c->zroot.len = zbr->len;
                        c->zroot.znode = znode;
-                        ubifs_assert(!test_bit(OBSOLETE_ZNODE,
+                        ubifs_assert(!ubifs_zn_obsolete(zp));
-                                     &zp->flags));
+                        ubifs_assert(ubifs_zn_dirty(zp));
-                        ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
                        atomic_long_dec(&c->dirty_zn_cnt);
                        if (zp->cnext) {
@@ -2865,7 +2864,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
                struct ubifs_znode *znode = cnext;
                cnext = cnext->cnext;
-                if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+                if (ubifs_zn_obsolete(znode))
                        kfree(znode);
        } while (cnext && cnext != c->cnext);
 }
@@ -3301,7 +3300,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
        if (!S_ISREG(inode->i_mode))
                return 0;
-        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+        if (!dbg_is_chk_gen(c))
                return 0;
        block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
@@ -3337,9 +3336,10 @@ out_dump:
        ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
                  "(data key %s)", (unsigned long)inode->i_ino, size,
                  ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
+        mutex_unlock(&c->tnc_mutex);
        dbg_dump_inode(c, inode);
        dbg_dump_stack();
-        err = -EINVAL;
+        return -EINVAL;
 out_unlock:
        mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 41920f357bbf..4c15f07a8bb2 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -22,6 +22,7 @@
 /* This file implements TNC functions for committing */
+#include <linux/random.h>
 #include "ubifs.h"
 /**
@@ -87,8 +88,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
        atomic_long_dec(&c->dirty_zn_cnt);
        ubifs_assert(ubifs_zn_dirty(znode));
-        ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+        ubifs_assert(ubifs_zn_cow(znode));
+        /*
+         * Note, unlike 'write_index()' we do not add memory barriers here
+         * because this function is called with @c->tnc_mutex locked.
+         */
        __clear_bit(DIRTY_ZNODE, &znode->flags);
        __clear_bit(COW_ZNODE, &znode->flags);
@@ -377,7 +382,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
                                c->gap_lebs = NULL;
                                return err;
                        }
-                        if (dbg_force_in_the_gaps_enabled()) {
+                        if (!dbg_is_chk_index(c)) {
                                /*
                                 * Do not print scary warnings if the debugging
                                 * option which forces in-the-gaps is enabled.
@@ -491,25 +496,6 @@ static int layout_in_empty_space(struct ubifs_info *c)
                else
                        next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
-                if (c->min_io_size == 1) {
-                        buf_offs += ALIGN(len, 8);
-                        if (next_len) {
-                                if (buf_offs + next_len <= c->leb_size)
-                                        continue;
-                                err = ubifs_update_one_lp(c, lnum, 0,
-                                                c->leb_size - buf_offs, 0, 0);
-                                if (err)
-                                        return err;
-                                lnum = -1;
-                                continue;
-                        }
-                        err = ubifs_update_one_lp(c, lnum,
-                                        c->leb_size - buf_offs, 0, 0, 0);
-                        if (err)
-                                return err;
-                        break;
-                }
                /* Update buffer positions */
                wlen = used + len;
                used += ALIGN(len, 8);
@@ -658,7 +644,7 @@ static int get_znodes_to_commit(struct ubifs_info *c)
        }
        cnt += 1;
        while (1) {
-                ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
+                ubifs_assert(!ubifs_zn_cow(znode));
                __set_bit(COW_ZNODE, &znode->flags);
                znode->alt = 0;
                cnext = find_next_dirty(znode);
@@ -704,7 +690,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
                c->ilebs[c->ileb_cnt++] = lnum;
                dbg_cmt("LEB %d", lnum);
        }
-        if (dbg_force_in_the_gaps())
+        if (dbg_is_chk_index(c) && !(random32() & 7))
                return -ENOSPC;
        return 0;
 }
@@ -830,7 +816,7 @@ static int write_index(struct ubifs_info *c)
        struct ubifs_idx_node *idx;
        struct ubifs_znode *znode, *cnext;
        int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
-        int avail, wlen, err, lnum_pos = 0;
+        int avail, wlen, err, lnum_pos = 0, blen, nxt_offs;
        cnext = c->enext;
        if (!cnext)
@@ -907,7 +893,7 @@ static int write_index(struct ubifs_info *c)
                cnext = znode->cnext;
                ubifs_assert(ubifs_zn_dirty(znode));
-                ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+                ubifs_assert(ubifs_zn_cow(znode));
                /*
                 * It is important that other threads should see %DIRTY_ZNODE
@@ -922,6 +908,28 @@ static int write_index(struct ubifs_info *c)
                clear_bit(COW_ZNODE, &znode->flags);
                smp_mb__after_clear_bit();
+                /*
+                 * We have marked the znode as clean but have not updated the
+                 * @c->clean_zn_cnt counter. If this znode becomes dirty again
+                 * before 'free_obsolete_znodes()' is called, then
+                 * @c->clean_zn_cnt will be decremented before it gets
+                 * incremented (resulting in 2 decrements for the same znode).
+                 * This means that @c->clean_zn_cnt may become negative for a
+                 * while.
+                 *
+                 * Q: why we cannot increment @c->clean_zn_cnt?
+                 * A: because we do not have the @c->tnc_mutex locked, and the
+                 *    following code would be racy and buggy:
+                 *
+                 *    if (!ubifs_zn_obsolete(znode)) {
+                 *            atomic_long_inc(&c->clean_zn_cnt);
+                 *            atomic_long_inc(&ubifs_clean_zn_cnt);
+                 *    }
+                 *
+                 *    Thus, we just delay the @c->clean_zn_cnt update until we
+                 *    have the mutex locked.
+                 */
                /* Do not access znode from this point on */
                /* Update buffer positions */
@@ -938,65 +946,38 @@ static int write_index(struct ubifs_info *c)
                else
                        next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
-                if (c->min_io_size == 1) {
+                nxt_offs = buf_offs + used + next_len;
-                        /*
+                if (next_len && nxt_offs <= c->leb_size) {
-                         * Write the prepared index node immediately if there is
+                        if (avail > 0)
-                         * no minimum IO size
-                         */
-                        err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
-                                              wlen, UBI_SHORTTERM);
-                        if (err)
-                                return err;
-                        buf_offs += ALIGN(wlen, 8);
-                        if (next_len) {
-                                used = 0;
-                                avail = buf_len;
-                                if (buf_offs + next_len > c->leb_size) {
-                                        err = ubifs_update_one_lp(c, lnum,
-                                                LPROPS_NC, 0, 0, LPROPS_TAKEN);
-                                        if (err)
-                                                return err;
-                                        lnum = -1;
-                                }
                                continue;
-                        }
+                        else
+                                blen = buf_len;
                } else {
-                        int blen, nxt_offs = buf_offs + used + next_len;
+                        wlen = ALIGN(wlen, 8);
+                        blen = ALIGN(wlen, c->min_io_size);
-                        if (next_len && nxt_offs <= c->leb_size) {
+                        ubifs_pad(c, c->cbuf + wlen, blen - wlen);
-                                if (avail > 0)
+                }
-                                        continue;
-                                else
+                /* The buffer is full or there are no more znodes to do */
-                                        blen = buf_len;
+                err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen,
-                        } else {
+                                      UBI_SHORTTERM);
-                                wlen = ALIGN(wlen, 8);
+                if (err)
-                                blen = ALIGN(wlen, c->min_io_size);
+                        return err;
-                                ubifs_pad(c, c->cbuf + wlen, blen - wlen);
+                buf_offs += blen;
-                        }
+                if (next_len) {
-                        /*
+                        if (nxt_offs > c->leb_size) {
-                         * The buffer is full or there are no more znodes
+                                err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0,
-                         * to do
+                                                          0, LPROPS_TAKEN);
-                         */
+                                if (err)
-                        err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
+                                        return err;
-                                              blen, UBI_SHORTTERM);
+                                lnum = -1;
-                        if (err)
-                                return err;
-                        buf_offs += blen;
-                        if (next_len) {
-                                if (nxt_offs > c->leb_size) {
-                                        err = ubifs_update_one_lp(c, lnum,
-                                                LPROPS_NC, 0, 0, LPROPS_TAKEN);
-                                        if (err)
-                                                return err;
-                                        lnum = -1;
-                                }
-                                used -= blen;
-                                if (used < 0)
-                                        used = 0;
-                                avail = buf_len - used;
-                                memmove(c->cbuf, c->cbuf + blen, used);
-                                continue;
                        }
+                        used -= blen;
+                        if (used < 0)
+                                used = 0;
+                        avail = buf_len - used;
+                        memmove(c->cbuf, c->cbuf + blen, used);
+                        continue;
                }
                break;
        }
@@ -1029,7 +1010,7 @@ static void free_obsolete_znodes(struct ubifs_info *c)
        do {
                znode = cnext;
                cnext = znode->cnext;
-                if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+                if (ubifs_zn_obsolete(znode))
                        kfree(znode);
                else {
                        znode->cnext = NULL;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4cd648501fa4..27f22551f805 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -230,14 +230,14 @@ enum {
 * LPT cnode flag bits.
 *
 * DIRTY_CNODE: cnode is dirty
- * COW_CNODE: cnode is being committed and must be copied before writing
 * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
- * so it can (and must) be freed when the commit is finished
+ *                 so it can (and must) be freed when the commit is finished
+ * COW_CNODE: cnode is being committed and must be copied before writing
 */
 enum {
        DIRTY_CNODE    = 0,
-        COW_CNODE      = 1,
+        OBSOLETE_CNODE = 1,
-        OBSOLETE_CNODE = 2,
+        COW_CNODE      = 2,
 };
 /*
@@ -1468,6 +1468,15 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
 /* io.c */
 void ubifs_ro_mode(struct ubifs_info *c, int err);
+int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
+                   int len, int even_ebadmsg);
+int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+                    int len, int dtype);
+int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
+                     int dtype);
+int ubifs_leb_unmap(struct ubifs_info *c, int lnum);
+int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype);
+int ubifs_is_mapped(const struct ubifs_info *c, int lnum);
 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
                           int dtype);
@@ -1747,8 +1756,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
                                         int offs, void *sbuf, int jhead);
 struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
                                             int offs, void *sbuf);
-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
+int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf);
-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf);
+int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
 int ubifs_rcvry_gc_commit(struct ubifs_info *c);
 int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
                             int deletion, loff_t new_size);
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 284a7c89697e..75bb316529dd 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -88,8 +88,6 @@ xfs-y				+= xfs_alloc.o \
                                   xfs_vnodeops.o \
                                   xfs_rw.o
-xfs-$(CONFIG_XFS_TRACE)         += xfs_btree_trace.o
 # Objects in linux/
 xfs-y                           += $(addprefix $(XFS_LINUX)/, \
                                   kmem.o \
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index a5dcd6a0f1b5..cac48fe22ad5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -264,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
                iattr.ia_mode = mode;
                iattr.ia_ctime = current_fs_time(inode->i_sb);
-                error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+                error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
        }
        return error;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b3b418f519f3..63e971e2b837 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
        isize = xfs_ioend_new_eof(ioend);
        if (isize) {
+                trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
                ip->i_d.di_size = isize;
                xfs_mark_inode_dirty(ip);
        }
@@ -894,11 +895,6 @@ out_invalidate:
 * For unwritten space on the page we need to start the conversion to
 * regular allocated space.
 * For any other dirty buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush the page, we
- * have to check the process flags first, if we are already in a transaction
- * or disk I/O during allocations is off, we need to fail the writepage and
- * redirty the page.
 */
 STATIC int
 xfs_vm_writepage(
@@ -906,7 +902,6 @@ xfs_vm_writepage(
        struct writeback_control *wbc)
 {
        struct inode            *inode = page->mapping->host;
-        int                     delalloc, unwritten;
        struct buffer_head      *bh, *head;
        struct xfs_bmbt_irec    imap;
        xfs_ioend_t             *ioend = NULL, *iohead = NULL;
@@ -938,15 +933,10 @@ xfs_vm_writepage(
                goto redirty;
        /*
-         * We need a transaction if there are delalloc or unwritten buffers
+         * Given that we do not allow direct reclaim to call us, we should
-         * on the page.
+         * never be called while in a filesystem transaction.
-         *
-         * If we need a transaction and the process flags say we are already
-         * in a transaction, or no IO is allowed then mark the page dirty
-         * again and leave the page as is.
         */
-        xfs_count_page_state(page, &delalloc, &unwritten);
+        if (WARN_ON(current->flags & PF_FSTRANS))
-        if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
                goto redirty;
        /* Is this page beyond the end of the file? */
@@ -970,7 +960,7 @@ xfs_vm_writepage(
        offset = page_offset(page);
        type = IO_OVERWRITE;
-        if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
+        if (wbc->sync_mode == WB_SYNC_NONE)
                nonblocking = 1;
        do {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5e68099db2a5..b2b411985591 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -499,16 +499,14 @@ found:
        spin_unlock(&pag->pag_buf_lock);
        xfs_perag_put(pag);
-        if (xfs_buf_cond_lock(bp)) {
+        if (!xfs_buf_trylock(bp)) {
-                /* failed, so wait for the lock if requested. */
+                if (flags & XBF_TRYLOCK) {
-                if (!(flags & XBF_TRYLOCK)) {
-                        xfs_buf_lock(bp);
-                        XFS_STATS_INC(xb_get_locked_waited);
-                } else {
                        xfs_buf_rele(bp);
                        XFS_STATS_INC(xb_busy_locked);
                        return NULL;
                }
+                xfs_buf_lock(bp);
+                XFS_STATS_INC(xb_get_locked_waited);
        }
        /*
@@ -594,10 +592,8 @@ _xfs_buf_read(
        ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
        ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
-        bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
+        bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
-                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+        bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
-        bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
-                        XBF_READ_AHEAD | _XBF_RUN_QUEUES);
        status = xfs_buf_iorequest(bp);
        if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
@@ -681,7 +677,6 @@ xfs_buf_read_uncached(
                return NULL;
        /* set up the buffer for a read IO */
-        xfs_buf_lock(bp);
        XFS_BUF_SET_ADDR(bp, daddr);
        XFS_BUF_READ(bp);
        XFS_BUF_BUSY(bp);
@@ -816,8 +811,6 @@ xfs_buf_get_uncached(
                goto fail_free_mem;
        }
-        xfs_buf_unlock(bp);
        trace_xfs_buf_get_uncached(bp, _RET_IP_);
        return bp;
@@ -896,8 +889,8 @@ xfs_buf_rele(
 *      to push on stale inode buffers.
 */
 int
-xfs_buf_cond_lock(
+xfs_buf_trylock(
-        xfs_buf_t               *bp)
+        struct xfs_buf          *bp)
 {
        int                     locked;
@@ -907,15 +900,8 @@ xfs_buf_cond_lock(
        else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
                xfs_log_force(bp->b_target->bt_mount, 0);
-        trace_xfs_buf_cond_lock(bp, _RET_IP_);
+        trace_xfs_buf_trylock(bp, _RET_IP_);
-        return locked ? 0 : -EBUSY;
+        return locked;
-}
-int
-xfs_buf_lock_value(
-        xfs_buf_t               *bp)
-{
-        return bp->b_sema.count;
 }
 /*
@@ -929,7 +915,7 @@ xfs_buf_lock_value(
 */
 void
 xfs_buf_lock(
-        xfs_buf_t               *bp)
+        struct xfs_buf          *bp)
 {
        trace_xfs_buf_lock(bp, _RET_IP_);
@@ -950,7 +936,7 @@ xfs_buf_lock(
 */
 void
 xfs_buf_unlock(
-        xfs_buf_t               *bp)
+        struct xfs_buf          *bp)
 {
        if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
                atomic_inc(&bp->b_hold);
@@ -1121,7 +1107,7 @@ xfs_bioerror_relse(
        XFS_BUF_UNDELAYWRITE(bp);
        XFS_BUF_DONE(bp);
        XFS_BUF_STALE(bp);
-        XFS_BUF_CLR_IODONE_FUNC(bp);
+        bp->b_iodone = NULL;
        if (!(fl & XBF_ASYNC)) {
                /*
                 * Mark b_error and B_ERROR _both_.
@@ -1223,23 +1209,21 @@ _xfs_buf_ioapply(
        total_nr_pages = bp->b_page_count;
        map_i = 0;
-        if (bp->b_flags & XBF_ORDERED) {
+        if (bp->b_flags & XBF_WRITE) {
-                ASSERT(!(bp->b_flags & XBF_READ));
+                if (bp->b_flags & XBF_SYNCIO)
-                rw = WRITE_FLUSH_FUA;
+                        rw = WRITE_SYNC;
-        } else if (bp->b_flags & XBF_LOG_BUFFER) {
+                else
-                ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+                        rw = WRITE;
-                bp->b_flags &= ~_XBF_RUN_QUEUES;
+                if (bp->b_flags & XBF_FUA)
-                rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+                        rw |= REQ_FUA;
-        } else if (bp->b_flags & _XBF_RUN_QUEUES) {
+                if (bp->b_flags & XBF_FLUSH)
-                ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+                        rw |= REQ_FLUSH;
-                bp->b_flags &= ~_XBF_RUN_QUEUES;
+        } else if (bp->b_flags & XBF_READ_AHEAD) {
-                rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
+                rw = READA;
        } else {
-                rw = (bp->b_flags & XBF_WRITE) ? WRITE :
+                rw = READ;
-                     (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
        }
 next_chunk:
        atomic_inc(&bp->b_io_remaining);
        nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
@@ -1694,15 +1678,14 @@ xfs_buf_delwri_split(
        list_for_each_entry_safe(bp, n, dwq, b_list) {
                ASSERT(bp->b_flags & XBF_DELWRI);
-                if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
+                if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) {
                        if (!force &&
                            time_before(jiffies, bp->b_queuetime + age)) {
                                xfs_buf_unlock(bp);
                                break;
                        }
-                        bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
+                        bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
-                                         _XBF_RUN_QUEUES);
                        bp->b_flags |= XBF_WRITE;
                        list_move_tail(&bp->b_list, list);
                        trace_xfs_buf_delwri_split(bp, _RET_IP_);
@@ -1738,14 +1721,6 @@ xfs_buf_cmp(
        return 0;
 }
-void
-xfs_buf_delwri_sort(
-        xfs_buftarg_t   *target,
-        struct list_head *list)
-{
-        list_sort(NULL, list, xfs_buf_cmp);
-}
 STATIC int
 xfsbufd(
        void            *data)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 50a7d5fb3b73..6a83b46b4bcf 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -46,43 +46,46 @@ typedef enum {
 #define XBF_READ        (1 << 0) /* buffer intended for reading from device */
 #define XBF_WRITE       (1 << 1) /* buffer intended for writing to device */
-#define XBF_MAPPED      (1 << 2) /* buffer mapped (b_addr valid) */
+#define XBF_READ_AHEAD  (1 << 2) /* asynchronous read-ahead */
+#define XBF_MAPPED      (1 << 3) /* buffer mapped (b_addr valid) */
 #define XBF_ASYNC       (1 << 4) /* initiator will not wait for completion */
 #define XBF_DONE        (1 << 5) /* all pages in the buffer uptodate */
 #define XBF_DELWRI      (1 << 6) /* buffer has dirty pages */
 #define XBF_STALE       (1 << 7) /* buffer has been staled, do not find it */
-#define XBF_ORDERED     (1 << 11)/* use ordered writes */
-#define XBF_READ_AHEAD  (1 << 12)/* asynchronous read-ahead */
+/* I/O hints for the BIO layer */
-#define XBF_LOG_BUFFER  (1 << 13)/* this is a buffer used for the log */
+#define XBF_SYNCIO      (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA         (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH       (1 << 12)/* flush the disk cache before a write */
 /* flags used only as arguments to access routines */
-#define XBF_LOCK        (1 << 14)/* lock requested */
+#define XBF_LOCK        (1 << 15)/* lock requested */
-#define XBF_TRYLOCK     (1 << 15)/* lock requested, but do not wait */
+#define XBF_TRYLOCK     (1 << 16)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK  (1 << 16)/* do not block in current thread */
+#define XBF_DONT_BLOCK  (1 << 17)/* do not block in current thread */
 /* flags used only internally */
-#define _XBF_PAGES      (1 << 18)/* backed by refcounted pages */
+#define _XBF_PAGES      (1 << 20)/* backed by refcounted pages */
-#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
+#define _XBF_KMEM       (1 << 21)/* backed by heap memory */
-#define _XBF_KMEM       (1 << 20)/* backed by heap memory */
+#define _XBF_DELWRI_Q   (1 << 22)/* buffer on delwri queue */
-#define _XBF_DELWRI_Q   (1 << 21)/* buffer on delwri queue */
 typedef unsigned int xfs_buf_flags_t;
 #define XFS_BUF_FLAGS \
        { XBF_READ,             "READ" }, \
        { XBF_WRITE,            "WRITE" }, \
+        { XBF_READ_AHEAD,       "READ_AHEAD" }, \
        { XBF_MAPPED,           "MAPPED" }, \
        { XBF_ASYNC,            "ASYNC" }, \
        { XBF_DONE,             "DONE" }, \
        { XBF_DELWRI,           "DELWRI" }, \
        { XBF_STALE,            "STALE" }, \
-        { XBF_ORDERED,          "ORDERED" }, \
+        { XBF_SYNCIO,           "SYNCIO" }, \
-        { XBF_READ_AHEAD,       "READ_AHEAD" }, \
+        { XBF_FUA,              "FUA" }, \
+        { XBF_FLUSH,            "FLUSH" }, \
        { XBF_LOCK,             "LOCK" },       /* should never be set */\
        { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
        { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
        { _XBF_PAGES,           "PAGES" }, \
-        { _XBF_RUN_QUEUES,      "RUN_QUEUES" }, \
        { _XBF_KMEM,            "KMEM" }, \
        { _XBF_DELWRI_Q,        "DELWRI_Q" }
@@ -91,11 +94,6 @@ typedef enum {
        XBT_FORCE_FLUSH = 1,
 } xfs_buftarg_flags_t;
-typedef struct xfs_bufhash {
-        struct list_head        bh_list;
-        spinlock_t              bh_lock;
-} xfs_bufhash_t;
 typedef struct xfs_buftarg {
        dev_t                   bt_dev;
        struct block_device     *bt_bdev;
@@ -151,7 +149,7 @@ typedef struct xfs_buf {
        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
        struct completion       b_iowait;       /* queue for I/O waiters */
        void                    *b_fspriv;
-        void                    *b_fspriv2;
+        struct xfs_trans        *b_transp;
        struct page             **b_pages;      /* array of page pointers */
        struct page             *b_page_array[XB_PAGES]; /* inline pages */
        unsigned long           b_queuetime;    /* time buffer was queued */
@@ -192,10 +190,11 @@ extern void xfs_buf_free(xfs_buf_t *);
 extern void xfs_buf_rele(xfs_buf_t *);
 /* Locking and Unlocking Buffers */
-extern int xfs_buf_cond_lock(xfs_buf_t *);
+extern int xfs_buf_trylock(xfs_buf_t *);
-extern int xfs_buf_lock_value(xfs_buf_t *);
 extern void xfs_buf_lock(xfs_buf_t *);
 extern void xfs_buf_unlock(xfs_buf_t *);
+#define xfs_buf_islocked(bp) \
+        ((bp)->b_sema.count <= 0)
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
@@ -234,8 +233,9 @@ extern void xfs_buf_terminate(void);
 #define XFS_BUF_BFLAGS(bp)      ((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp)   ((bp)->b_flags &= \
+#define XFS_BUF_ZEROFLAGS(bp) \
-                ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
+        ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
+                            XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
 void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_STALE(bp)       xfs_buf_stale(bp);
@@ -267,10 +267,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNASYNC(bp)     ((bp)->b_flags &= ~XBF_ASYNC)
 #define XFS_BUF_ISASYNC(bp)     ((bp)->b_flags & XBF_ASYNC)
-#define XFS_BUF_ORDERED(bp)     ((bp)->b_flags |= XBF_ORDERED)
-#define XFS_BUF_UNORDERED(bp)   ((bp)->b_flags &= ~XBF_ORDERED)
-#define XFS_BUF_ISORDERED(bp)   ((bp)->b_flags & XBF_ORDERED)
 #define XFS_BUF_HOLD(bp)        xfs_buf_hold(bp)
 #define XFS_BUF_READ(bp)        ((bp)->b_flags |= XBF_READ)
 #define XFS_BUF_UNREAD(bp)      ((bp)->b_flags &= ~XBF_READ)
@@ -280,14 +276,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNWRITE(bp)     ((bp)->b_flags &= ~XBF_WRITE)
 #define XFS_BUF_ISWRITE(bp)     ((bp)->b_flags & XBF_WRITE)
-#define XFS_BUF_IODONE_FUNC(bp)                 ((bp)->b_iodone)
-#define XFS_BUF_SET_IODONE_FUNC(bp, func)       ((bp)->b_iodone = (func))
-#define XFS_BUF_CLR_IODONE_FUNC(bp)             ((bp)->b_iodone = NULL)
-#define XFS_BUF_FSPRIVATE(bp, type)             ((type)(bp)->b_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(bp, val)          ((bp)->b_fspriv = (void*)(val))
-#define XFS_BUF_FSPRIVATE2(bp, type)            ((type)(bp)->b_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(bp, val)         ((bp)->b_fspriv2 = (void*)(val))
 #define XFS_BUF_SET_START(bp)                   do { } while (0)
 #define XFS_BUF_PTR(bp)                 (xfs_caddr_t)((bp)->b_addr)
@@ -313,10 +301,6 @@ xfs_buf_set_ref(
 #define XFS_BUF_ISPINNED(bp)    atomic_read(&((bp)->b_pin_count))
-#define XFS_BUF_VALUSEMA(bp)    xfs_buf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp)      (xfs_buf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp)       xfs_buf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x)     xfs_buf_lock(bp)
 #define XFS_BUF_FINISH_IOWAIT(bp)       complete(&bp->b_iowait);
 #define XFS_BUF_SET_TARGET(bp, target)  ((bp)->b_target = (target))
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index f4f878fc0083..75e5d322e48f 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -151,14 +151,14 @@ xfs_nfs_get_inode(
                 * We don't use ESTALE directly down the chain to not
                 * confuse applications using bulkstat that expect EINVAL.
                 */
-                if (error == EINVAL)
+                if (error == EINVAL || error == ENOENT)
                        error = ESTALE;
                return ERR_PTR(-error);
        }
        if (ip->i_d.di_gen != generation) {
                IRELE(ip);
-                return ERR_PTR(-ENOENT);
+                return ERR_PTR(-ESTALE);
        }
        return VFS_I(ip);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index fbbf657df0cd..cca00f49e092 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -943,7 +943,7 @@ xfs_file_fallocate(
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = new_size;
-                error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+                error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
        }
 out_unlock:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d44d92cd12b1..501e4f630548 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -39,6 +39,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
 #include "xfs_trace.h"
 #include <linux/capability.h>
@@ -497,12 +498,442 @@ xfs_vn_getattr(
        return 0;
 }
+int
+xfs_setattr_nonsize(
+        struct xfs_inode        *ip,
+        struct iattr            *iattr,
+        int                     flags)
+{
+        xfs_mount_t             *mp = ip->i_mount;
+        struct inode            *inode = VFS_I(ip);
+        int                     mask = iattr->ia_valid;
+        xfs_trans_t             *tp;
+        int                     error;
+        uid_t                   uid = 0, iuid = 0;
+        gid_t                   gid = 0, igid = 0;
+        struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
+        struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
+        trace_xfs_setattr(ip);
+        if (mp->m_flags & XFS_MOUNT_RDONLY)
+                return XFS_ERROR(EROFS);
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return XFS_ERROR(EIO);
+        error = -inode_change_ok(inode, iattr);
+        if (error)
+                return XFS_ERROR(error);
+        ASSERT((mask & ATTR_SIZE) == 0);
+        /*
+         * If disk quotas is on, we make sure that the dquots do exist on disk,
+         * before we start any other transactions. Trying to do this later
+         * is messy. We don't care to take a readlock to look at the ids
+         * in inode here, because we can't hold it across the trans_reserve.
+         * If the IDs do change before we take the ilock, we're covered
+         * because the i_*dquot fields will get updated anyway.
+         */
+        if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+                uint    qflags = 0;
+                if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+                        uid = iattr->ia_uid;
+                        qflags |= XFS_QMOPT_UQUOTA;
+                } else {
+                        uid = ip->i_d.di_uid;
+                }
+                if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+                        gid = iattr->ia_gid;
+                        qflags |= XFS_QMOPT_GQUOTA;
+                }  else {
+                        gid = ip->i_d.di_gid;
+                }
+                /*
+                 * We take a reference when we initialize udqp and gdqp,
+                 * so it is important that we never blindly double trip on
+                 * the same variable. See xfs_create() for an example.
+                 */
+                ASSERT(udqp == NULL);
+                ASSERT(gdqp == NULL);
+                error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+                                         qflags, &udqp, &gdqp);
+                if (error)
+                        return error;
+        }
+        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+        error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+        if (error)
+                goto out_dqrele;
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        /*
+         * Change file ownership.  Must be the owner or privileged.
+         */
+        if (mask & (ATTR_UID|ATTR_GID)) {
+                /*
+                 * These IDs could have changed since we last looked at them.
+                 * But, we're assured that if the ownership did change
+                 * while we didn't have the inode locked, inode's dquot(s)
+                 * would have changed also.
+                 */
+                iuid = ip->i_d.di_uid;
+                igid = ip->i_d.di_gid;
+                gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+                uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+                /*
+                 * Do a quota reservation only if uid/gid is actually
+                 * going to change.
+                 */
+                if (XFS_IS_QUOTA_RUNNING(mp) &&
+                    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+                     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+                        ASSERT(tp);
+                        error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+                                                capable(CAP_FOWNER) ?
+                                                XFS_QMOPT_FORCE_RES : 0);
+                        if (error)      /* out of quota */
+                                goto out_trans_cancel;
+                }
+        }
+        xfs_trans_ijoin(tp, ip);
+        /*
+         * Change file ownership.  Must be the owner or privileged.
+         */
+        if (mask & (ATTR_UID|ATTR_GID)) {
+                /*
+                 * CAP_FSETID overrides the following restrictions:
+                 *
+                 * The set-user-ID and set-group-ID bits of a file will be
+                 * cleared upon successful return from chown()
+                 */
+                if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                    !capable(CAP_FSETID))
+                        ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+                /*
+                 * Change the ownerships and register quota modifications
+                 * in the transaction.
+                 */
+                if (iuid != uid) {
+                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+                                ASSERT(mask & ATTR_UID);
+                                ASSERT(udqp);
+                                olddquot1 = xfs_qm_vop_chown(tp, ip,
+                                                        &ip->i_udquot, udqp);
+                        }
+                        ip->i_d.di_uid = uid;
+                        inode->i_uid = uid;
+                }
+                if (igid != gid) {
+                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+                                ASSERT(!XFS_IS_PQUOTA_ON(mp));
+                                ASSERT(mask & ATTR_GID);
+                                ASSERT(gdqp);
+                                olddquot2 = xfs_qm_vop_chown(tp, ip,
+                                                        &ip->i_gdquot, gdqp);
+                        }
+                        ip->i_d.di_gid = gid;
+                        inode->i_gid = gid;
+                }
+        }
+        /*
+         * Change file access modes.
+         */
+        if (mask & ATTR_MODE) {
+                umode_t mode = iattr->ia_mode;
+                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                        mode &= ~S_ISGID;
+                ip->i_d.di_mode &= S_IFMT;
+                ip->i_d.di_mode |= mode & ~S_IFMT;
+                inode->i_mode &= S_IFMT;
+                inode->i_mode |= mode & ~S_IFMT;
+        }
+        /*
+         * Change file access or modified times.
+         */
+        if (mask & ATTR_ATIME) {
+                inode->i_atime = iattr->ia_atime;
+                ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+                ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+                ip->i_update_core = 1;
+        }
+        if (mask & ATTR_CTIME) {
+                inode->i_ctime = iattr->ia_ctime;
+                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+                ip->i_update_core = 1;
+        }
+        if (mask & ATTR_MTIME) {
+                inode->i_mtime = iattr->ia_mtime;
+                ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+                ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+                ip->i_update_core = 1;
+        }
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        XFS_STATS_INC(xs_ig_attrchg);
+        if (mp->m_flags & XFS_MOUNT_WSYNC)
+                xfs_trans_set_sync(tp);
+        error = xfs_trans_commit(tp, 0);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        /*
+         * Release any dquot(s) the inode had kept before chown.
+         */
+        xfs_qm_dqrele(olddquot1);
+        xfs_qm_dqrele(olddquot2);
+        xfs_qm_dqrele(udqp);
+        xfs_qm_dqrele(gdqp);
+        if (error)
+                return XFS_ERROR(error);
+        /*
+         * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+         *           update.  We could avoid this with linked transactions
+         *           and passing down the transaction pointer all the way
+         *           to attr_set.  No previous user of the generic
+         *           Posix ACL code seems to care about this issue either.
+         */
+        if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+                error = -xfs_acl_chmod(inode);
+                if (error)
+                        return XFS_ERROR(error);
+        }
+        return 0;
+out_trans_cancel:
+        xfs_trans_cancel(tp, 0);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_dqrele:
+        xfs_qm_dqrele(udqp);
+        xfs_qm_dqrele(gdqp);
+        return error;
+}
+/*
+ * Truncate file.  Must have write permission and not be a directory.
+ */
+int
+xfs_setattr_size(
+        struct xfs_inode        *ip,
+        struct iattr            *iattr,
+        int                     flags)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        struct inode            *inode = VFS_I(ip);
+        int                     mask = iattr->ia_valid;
+        struct xfs_trans        *tp;
+        int                     error;
+        uint                    lock_flags;
+        uint                    commit_flags = 0;
+        trace_xfs_setattr(ip);
+        if (mp->m_flags & XFS_MOUNT_RDONLY)
+                return XFS_ERROR(EROFS);
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return XFS_ERROR(EIO);
+        error = -inode_change_ok(inode, iattr);
+        if (error)
+                return XFS_ERROR(error);
+        ASSERT(S_ISREG(ip->i_d.di_mode));
+        ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+                        ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+                        ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+        lock_flags = XFS_ILOCK_EXCL;
+        if (!(flags & XFS_ATTR_NOLOCK))
+                lock_flags |= XFS_IOLOCK_EXCL;
+        xfs_ilock(ip, lock_flags);
+        /*
+         * Short circuit the truncate case for zero length files.
+         */
+        if (iattr->ia_size == 0 &&
+            ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+                if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+                        goto out_unlock;
+                /*
+                 * Use the regular setattr path to update the timestamps.
+                 */
+                xfs_iunlock(ip, lock_flags);
+                iattr->ia_valid &= ~ATTR_SIZE;
+                return xfs_setattr_nonsize(ip, iattr, 0);
+        }
+        /*
+         * Make sure that the dquots are attached to the inode.
+         */
+        error = xfs_qm_dqattach_locked(ip, 0);
+        if (error)
+                goto out_unlock;
+        /*
+         * Now we can make the changes.  Before we join the inode to the
+         * transaction, take care of the part of the truncation that must be
+         * done without the inode lock.  This needs to be done before joining
+         * the inode to the transaction, because the inode cannot be unlocked
+         * once it is a part of the transaction.
+         */
+        if (iattr->ia_size > ip->i_size) {
+                /*
+                 * Do the first part of growing a file: zero any data in the
+                 * last block that is beyond the old EOF.  We need to do this
+                 * before the inode is joined to the transaction to modify
+                 * i_size.
+                 */
+                error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+                if (error)
+                        goto out_unlock;
+        }
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        lock_flags &= ~XFS_ILOCK_EXCL;
+        /*
+         * We are going to log the inode size change in this transaction so
+         * any previous writes that are beyond the on disk EOF and the new
+         * EOF that have not been written out need to be written here.  If we
+         * do not write the data out, we expose ourselves to the null files
+         * problem.
+         *
+         * Only flush from the on disk size to the smaller of the in memory
+         * file size or the new size as that's the range we really care about
+         * here and prevents waiting for other data not within the range we
+         * care about here.
+         */
+        if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
+                error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
+                                        XBF_ASYNC, FI_NONE);
+                if (error)
+                        goto out_unlock;
+        }
+        /*
+         * Wait for all I/O to complete.
+         */
+        xfs_ioend_wait(ip);
+        error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+                                     xfs_get_blocks);
+        if (error)
+                goto out_unlock;
+        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+        error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                 XFS_TRANS_PERM_LOG_RES,
+                                 XFS_ITRUNCATE_LOG_COUNT);
+        if (error)
+                goto out_trans_cancel;
+        truncate_setsize(inode, iattr->ia_size);
+        commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+        lock_flags |= XFS_ILOCK_EXCL;
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip);
+        /*
+         * Only change the c/mtime if we are changing the size or we are
+         * explicitly asked to change it.  This handles the semantic difference
+         * between truncate() and ftruncate() as implemented in the VFS.
+         *
+         * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+         * special case where we need to update the times despite not having
+         * these flags set.  For all other operations the VFS set these flags
+         * explicitly if it wants a timestamp update.
+         */
+        if (iattr->ia_size != ip->i_size &&
+            (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+                iattr->ia_ctime = iattr->ia_mtime =
+                        current_fs_time(inode->i_sb);
+                mask |= ATTR_CTIME | ATTR_MTIME;
+        }
+        if (iattr->ia_size > ip->i_size) {
+                ip->i_d.di_size = iattr->ia_size;
+                ip->i_size = iattr->ia_size;
+        } else if (iattr->ia_size <= ip->i_size ||
+                   (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+                error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+                if (error)
+                        goto out_trans_abort;
+                /*
+                 * Truncated "down", so we're removing references to old data
+                 * here - if we delay flushing for a long time, we expose
+                 * ourselves unduly to the notorious NULL files problem.  So,
+                 * we mark this inode and flush it when the file is closed,
+                 * and do not wait the usual (long) time for writeout.
+                 */
+                xfs_iflags_set(ip, XFS_ITRUNCATED);
+        }
+        if (mask & ATTR_CTIME) {
+                inode->i_ctime = iattr->ia_ctime;
+                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+                ip->i_update_core = 1;
+        }
+        if (mask & ATTR_MTIME) {
+                inode->i_mtime = iattr->ia_mtime;
+                ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+                ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+                ip->i_update_core = 1;
+        }
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        XFS_STATS_INC(xs_ig_attrchg);
+        if (mp->m_flags & XFS_MOUNT_WSYNC)
+                xfs_trans_set_sync(tp);
+        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+        if (lock_flags)
+                xfs_iunlock(ip, lock_flags);
+        return error;
+out_trans_abort:
+        commit_flags |= XFS_TRANS_ABORT;
+out_trans_cancel:
+        xfs_trans_cancel(tp, commit_flags);
+        goto out_unlock;
+}
 STATIC int
 xfs_vn_setattr(
        struct dentry   *dentry,
        struct iattr    *iattr)
 {
-        return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
+        if (iattr->ia_valid & ATTR_SIZE)
+                return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+        return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
 }
 #define XFS_FIEMAP_FLAGS        (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8633521b3b2e..d42f814e4d35 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -33,7 +33,6 @@
 #endif
 #include <xfs_types.h>
-#include <xfs_arch.h>
 #include <kmem.h>
 #include <mrlock.h>
@@ -88,6 +87,12 @@
 #include <xfs_buf.h>
 #include <xfs_message.h>
+#ifdef __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
 /*
 * Feature macros (disable/enable)
 */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a9c6ccff7b48..9a72dda58bd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -33,7 +33,6 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
 #include "xfs_rtalloc.h"
@@ -1407,33 +1406,31 @@ xfs_fs_fill_super(
        sb->s_time_gran = 1;
        set_posix_acl_flag(sb);
-        error = xfs_syncd_init(mp);
+        error = xfs_mountfs(mp);
        if (error)
                goto out_filestream_unmount;
-        error = xfs_mountfs(mp);
+        error = xfs_syncd_init(mp);
        if (error)
-                goto out_syncd_stop;
+                goto out_unmount;
        root = igrab(VFS_I(mp->m_rootip));
        if (!root) {
                error = ENOENT;
-                goto fail_unmount;
+                goto out_syncd_stop;
        }
        if (is_bad_inode(root)) {
                error = EINVAL;
-                goto fail_vnrele;
+                goto out_syncd_stop;
        }
        sb->s_root = d_alloc_root(root);
        if (!sb->s_root) {
                error = ENOMEM;
-                goto fail_vnrele;
+                goto out_iput;
        }
        return 0;
- out_syncd_stop:
-        xfs_syncd_stop(mp);
 out_filestream_unmount:
        xfs_filestream_unmount(mp);
 out_free_sb:
@@ -1448,17 +1445,11 @@ xfs_fs_fill_super(
 out:
        return -error;
- fail_vnrele:
+ out_iput:
-        if (sb->s_root) {
+        iput(root);
-                dput(sb->s_root);
+ out_syncd_stop:
-                sb->s_root = NULL;
-        } else {
-                iput(root);
-        }
- fail_unmount:
        xfs_syncd_stop(mp);
+ out_unmount:
        /*
         * Blow away any referenced inode in the filestreams cache.
         * This can and will cause log traffic as inodes go inactive
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 9bd7e895a4e2..e4c938afb910 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -361,14 +361,12 @@ xfs_quiesce_data(
 {
        int                     error, error2 = 0;
-        /* push non-blocking */
-        xfs_sync_data(mp, 0);
        xfs_qm_sync(mp, SYNC_TRYLOCK);
-        /* push and block till complete */
-        xfs_sync_data(mp, SYNC_WAIT);
        xfs_qm_sync(mp, SYNC_WAIT);
+        /* force out the newly dirtied log buffers */
+        xfs_log_force(mp, XFS_LOG_SYNC);
        /* write superblock and hoover up shutdown errors */
        error = xfs_sync_fsdata(mp);
@@ -438,7 +436,7 @@ xfs_quiesce_attr(
        WARN_ON(atomic_read(&mp->m_active_trans) != 0);
        /* Push the superblock and write an unmount record */
-        error = xfs_log_sbcount(mp, 1);
+        error = xfs_log_sbcount(mp);
        if (error)
                xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
                                "Frozen image may not be consistent.");
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 2e1568597764..941202e7ac6e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,14 +21,6 @@
 struct xfs_mount;
 struct xfs_perag;
-typedef struct xfs_sync_work {
-        struct list_head        w_list;
-        struct xfs_mount        *w_mount;
-        void                    *w_data;        /* syncer routine argument */
-        void                    (*w_syncer)(struct xfs_mount *, void *);
-        struct completion       *w_completion;
-} xfs_sync_work_t;
 #define SYNC_WAIT               0x0001  /* wait for i/o to complete */
 #define SYNC_TRYLOCK            0x0002  /* only try to lock inodes */
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index d48b7a579ae1..fda0708ef2ea 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -293,7 +293,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
                __entry->buffer_length = bp->b_buffer_length;
                __entry->hold = atomic_read(&bp->b_hold);
                __entry->pincount = atomic_read(&bp->b_pin_count);
-                __entry->lockval = xfs_buf_lock_value(bp);
+                __entry->lockval = bp->b_sema.count;
                __entry->flags = bp->b_flags;
                __entry->caller_ip = caller_ip;
        ),
@@ -323,7 +323,7 @@ DEFINE_BUF_EVENT(xfs_buf_bawrite);
 DEFINE_BUF_EVENT(xfs_buf_bdwrite);
 DEFINE_BUF_EVENT(xfs_buf_lock);
 DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_cond_lock);
+DEFINE_BUF_EVENT(xfs_buf_trylock);
 DEFINE_BUF_EVENT(xfs_buf_unlock);
 DEFINE_BUF_EVENT(xfs_buf_iowait);
 DEFINE_BUF_EVENT(xfs_buf_iowait_done);
@@ -366,7 +366,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
                __entry->flags = flags;
                __entry->hold = atomic_read(&bp->b_hold);
                __entry->pincount = atomic_read(&bp->b_pin_count);
-                __entry->lockval = xfs_buf_lock_value(bp);
+                __entry->lockval = bp->b_sema.count;
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
@@ -409,7 +409,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                __entry->buffer_length = bp->b_buffer_length;
                __entry->hold = atomic_read(&bp->b_hold);
                __entry->pincount = atomic_read(&bp->b_pin_count);
-                __entry->lockval = xfs_buf_lock_value(bp);
+                __entry->lockval = bp->b_sema.count;
                __entry->error = error;
                __entry->flags = bp->b_flags;
                __entry->caller_ip = caller_ip;
@@ -454,7 +454,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
                __entry->buf_flags = bip->bli_buf->b_flags;
                __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
                __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
-                __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
+                __entry->buf_lockval = bip->bli_buf->b_sema.count;
                __entry->li_desc = bip->bli_item.li_desc;
                __entry->li_flags = bip->bli_item.li_flags;
        ),
@@ -998,7 +998,8 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_STRUCT__entry(
                __field(dev_t, dev)
                __field(xfs_ino_t, ino)
-                __field(loff_t, size)
+                __field(loff_t, isize)
+                __field(loff_t, disize)
                __field(loff_t, new_size)
                __field(loff_t, offset)
                __field(size_t, count)
@@ -1006,16 +1007,18 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_fast_assign(
                __entry->dev = VFS_I(ip)->i_sb->s_dev;
                __entry->ino = ip->i_ino;
-                __entry->size = ip->i_d.di_size;
+                __entry->isize = ip->i_size;
+                __entry->disize = ip->i_d.di_size;
                __entry->new_size = ip->i_new_size;
                __entry->offset = offset;
                __entry->count = count;
        ),
-        TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+        TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
                  "offset 0x%llx count %zd",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
-                  __entry->size,
+                  __entry->isize,
+                  __entry->disize,
                  __entry->new_size,
                  __entry->offset,
                  __entry->count)
@@ -1028,40 +1031,7 @@ DEFINE_EVENT(xfs_simple_io_class, name,	\
 DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
 DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
 DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
+DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
-TRACE_EVENT(xfs_itruncate_start,
-        TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
-                 xfs_off_t toss_start, xfs_off_t toss_finish),
-        TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
-        TP_STRUCT__entry(
-                __field(dev_t, dev)
-                __field(xfs_ino_t, ino)
-                __field(xfs_fsize_t, size)
-                __field(xfs_fsize_t, new_size)
-                __field(xfs_off_t, toss_start)
-                __field(xfs_off_t, toss_finish)
-                __field(int, flag)
-        ),
-        TP_fast_assign(
-                __entry->dev = VFS_I(ip)->i_sb->s_dev;
-                __entry->ino = ip->i_ino;
-                __entry->size = ip->i_d.di_size;
-                __entry->new_size = new_size;
-                __entry->toss_start = toss_start;
-                __entry->toss_finish = toss_finish;
-                __entry->flag = flag;
-        ),
-        TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
-                  "toss start 0x%llx toss finish 0x%llx",
-                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->ino,
-                  __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
-                  __entry->size,
-                  __entry->new_size,
-                  __entry->toss_start,
-                  __entry->toss_finish)
-);
 DECLARE_EVENT_CLASS(xfs_itrunc_class,
        TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
@@ -1089,8 +1059,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
 DEFINE_EVENT(xfs_itrunc_class, name, \
        TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
        TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
 TRACE_EVENT(xfs_pagecache_inval,
        TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 6fa214603819..837f31158d43 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -220,7 +220,7 @@ xfs_qm_adjust_dqtimers(
 {
        ASSERT(d->d_id);
-#ifdef QUOTADEBUG
+#ifdef DEBUG
        if (d->d_blk_hardlimit)
                ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
                       be64_to_cpu(d->d_blk_hardlimit));
@@ -231,6 +231,7 @@ xfs_qm_adjust_dqtimers(
                ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
                       be64_to_cpu(d->d_rtb_hardlimit));
 #endif
        if (!d->d_btimer) {
                if ((d->d_blk_softlimit &&
                     (be64_to_cpu(d->d_bcount) >=
@@ -318,7 +319,7 @@ xfs_qm_init_dquot_blk(
        ASSERT(tp);
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+        ASSERT(xfs_buf_islocked(bp));
        d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
@@ -534,7 +535,7 @@ xfs_qm_dqtobp(
        }
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+        ASSERT(xfs_buf_islocked(bp));
        /*
         * calculate the location of the dquot inside the buffer.
@@ -622,7 +623,7 @@ xfs_qm_dqread(
         * brelse it because we have the changes incore.
         */
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+        ASSERT(xfs_buf_islocked(bp));
        xfs_trans_brelse(tp, bp);
        return (error);
@@ -1423,45 +1424,6 @@ xfs_qm_dqpurge(
 }
-#ifdef QUOTADEBUG
-void
-xfs_qm_dqprint(xfs_dquot_t *dqp)
-{
-        struct xfs_mount        *mp = dqp->q_mount;
-        xfs_debug(mp, "-----------KERNEL DQUOT----------------");
-        xfs_debug(mp, "---- dquotID =  %d",
-                (int)be32_to_cpu(dqp->q_core.d_id));
-        xfs_debug(mp, "---- type    =  %s", DQFLAGTO_TYPESTR(dqp));
-        xfs_debug(mp, "---- fs      =  0x%p", dqp->q_mount);
-        xfs_debug(mp, "---- blkno   =  0x%x", (int) dqp->q_blkno);
-        xfs_debug(mp, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
-        xfs_debug(mp, "---- blkhlimit =  %Lu (0x%x)",
-                be64_to_cpu(dqp->q_core.d_blk_hardlimit),
-                (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
-        xfs_debug(mp, "---- blkslimit =  %Lu (0x%x)",
-                be64_to_cpu(dqp->q_core.d_blk_softlimit),
-                (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
-        xfs_debug(mp, "---- inohlimit =  %Lu (0x%x)",
-                be64_to_cpu(dqp->q_core.d_ino_hardlimit),
-                (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
-        xfs_debug(mp, "---- inoslimit =  %Lu (0x%x)",
-                be64_to_cpu(dqp->q_core.d_ino_softlimit),
-                (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
-        xfs_debug(mp, "---- bcount  =  %Lu (0x%x)",
-                be64_to_cpu(dqp->q_core.d_bcount),
-                (int)be64_to_cpu(dqp->q_core.d_bcount));
-        xfs_debug(mp, "---- icount  =  %Lu (0x%x)",
-                be64_to_cpu(dqp->q_core.d_icount),
-                (int)be64_to_cpu(dqp->q_core.d_icount));
-        xfs_debug(mp, "---- btimer  =  %d",
-                (int)be32_to_cpu(dqp->q_core.d_btimer));
-        xfs_debug(mp, "---- itimer  =  %d",
-                (int)be32_to_cpu(dqp->q_core.d_itimer));
-        xfs_debug(mp, "---------------------------");
-}
-#endif
 /*
 * Give the buffer a little push if it is incore and
 * wait on the flush lock.
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5da3a23b820d..34b7e945dbfa 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -116,12 +116,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
                                     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
                                     (XFS_IS_OQUOTA_ON((d)->q_mount))))
-#ifdef QUOTADEBUG
-extern void             xfs_qm_dqprint(xfs_dquot_t *);
-#else
-#define xfs_qm_dqprint(a)
-#endif
 extern void             xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int              xfs_qm_dqflush(xfs_dquot_t *, uint);
 extern int              xfs_qm_dqpurge(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index b94dace4e785..46e54ad9a2dc 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -67,32 +67,6 @@ static struct shrinker xfs_qm_shaker = {
        .seeks = DEFAULT_SEEKS,
 };
-#ifdef DEBUG
-extern struct mutex     qcheck_lock;
-#endif
-#ifdef QUOTADEBUG
-static void
-xfs_qm_dquot_list_print(
-        struct xfs_mount *mp)
-{
-        xfs_dquot_t     *dqp;
-        int             i = 0;
-        list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
-                xfs_debug(mp, "   %d. \"%d (%s)\"   "
-                                  "bcnt = %lld, icnt = %lld, refs = %d",
-                        i++, be32_to_cpu(dqp->q_core.d_id),
-                        DQFLAGTO_TYPESTR(dqp),
-                        (long long)be64_to_cpu(dqp->q_core.d_bcount),
-                        (long long)be64_to_cpu(dqp->q_core.d_icount),
-                        dqp->q_nrefs);
-        }
-}
-#else
-static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
-#endif
 /*
 * Initialize the XQM structure.
 * Note that there is not one quota manager per file system.
@@ -165,9 +139,6 @@ xfs_Gqm_init(void)
        atomic_set(&xqm->qm_totaldquots, 0);
        xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
        xqm->qm_nrefs = 0;
-#ifdef DEBUG
-        mutex_init(&qcheck_lock);
-#endif
        return xqm;
 out_free_udqhash:
@@ -204,9 +175,6 @@ xfs_qm_destroy(
        mutex_lock(&xqm->qm_dqfrlist_lock);
        list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
                xfs_dqlock(dqp);
-#ifdef QUOTADEBUG
-                xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
-#endif
                list_del_init(&dqp->q_freelist);
                xfs_Gqm->qm_dqfrlist_cnt--;
                xfs_dqunlock(dqp);
@@ -214,9 +182,6 @@ xfs_qm_destroy(
        }
        mutex_unlock(&xqm->qm_dqfrlist_lock);
        mutex_destroy(&xqm->qm_dqfrlist_lock);
-#ifdef DEBUG
-        mutex_destroy(&qcheck_lock);
-#endif
        kmem_free(xqm);
 }
@@ -409,11 +374,6 @@ xfs_qm_mount_quotas(
                xfs_warn(mp, "Failed to initialize disk quotas.");
                return;
        }
-#ifdef QUOTADEBUG
-        if (XFS_IS_QUOTA_ON(mp))
-                xfs_qm_internalqcheck(mp);
-#endif
 }
 /*
@@ -866,8 +826,8 @@ xfs_qm_dqattach_locked(
        }
 done:
-#ifdef QUOTADEBUG
+#ifdef DEBUG
-        if (! error) {
+        if (!error) {
                if (XFS_IS_UQUOTA_ON(mp))
                        ASSERT(ip->i_udquot);
                if (XFS_IS_OQUOTA_ON(mp))
@@ -1733,8 +1693,6 @@ xfs_qm_quotacheck(
        mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
        mp->m_qflags |= flags;
-        xfs_qm_dquot_list_print(mp);
 error_return:
        if (error) {
                xfs_warn(mp,
@@ -2096,9 +2054,6 @@ xfs_qm_write_sb_changes(
        xfs_trans_t     *tp;
        int             error;
-#ifdef QUOTADEBUG
-        xfs_notice(mp, "Writing superblock quota changes");
-#endif
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
        if ((error = xfs_trans_reserve(tp, 0,
                                      mp->m_sb.sb_sectsize + 128, 0,
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 567b29b9f1b3..43b9abe1052c 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -163,10 +163,4 @@ extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
 extern int              xfs_qm_scall_quotaon(xfs_mount_t *, uint);
 extern int              xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-#ifdef DEBUG
-extern int              xfs_qm_internalqcheck(xfs_mount_t *);
-#else
-#define xfs_qm_internalqcheck(mp)       (0)
-#endif
 #endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 2dadb15d5ca9..609246f42e6c 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -263,7 +263,7 @@ xfs_qm_scall_trunc_qfile(
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip);
-        error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
+        error = xfs_itruncate_data(&tp, ip, 0);
        if (error) {
                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
                                     XFS_TRANS_ABORT);
@@ -622,7 +622,6 @@ xfs_qm_scall_setqlim(
        xfs_trans_log_dquot(tp, dqp);
        error = xfs_trans_commit(tp, 0);
-        xfs_qm_dqprint(dqp);
        xfs_qm_dqrele(dqp);
 out_unlock:
@@ -657,7 +656,6 @@ xfs_qm_scall_getquota(
                xfs_qm_dqput(dqp);
                return XFS_ERROR(ENOENT);
        }
-        /* xfs_qm_dqprint(dqp); */
        /*
         * Convert the disk dquot to the exportable format
         */
@@ -906,354 +904,3 @@ xfs_qm_dqrele_all_inodes(
        ASSERT(mp->m_quotainfo);
        xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
 }
-/*------------------------------------------------------------------------*/
-#ifdef DEBUG
-/*
- * This contains all the test functions for XFS disk quotas.
- * Currently it does a quota accounting check. ie. it walks through
- * all inodes in the file system, calculating the dquot accounting fields,
- * and prints out any inconsistencies.
- */
-xfs_dqhash_t *qmtest_udqtab;
-xfs_dqhash_t *qmtest_gdqtab;
-int           qmtest_hashmask;
-int           qmtest_nfails;
-struct mutex  qcheck_lock;
-#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-                                 (__psunsigned_t)(id)) & \
-                                (qmtest_hashmask - 1))
-#define DQTEST_HASH(mp, id, type)   ((type & XFS_DQ_USER) ? \
-                                     (qmtest_udqtab + \
-                                      DQTEST_HASHVAL(mp, id)) : \
-                                     (qmtest_gdqtab + \
-                                      DQTEST_HASHVAL(mp, id)))
-#define DQTEST_LIST_PRINT(l, NXT, title) \
-{ \
-          xfs_dqtest_t  *dqp; int i = 0;\
-          xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
-          for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
-               dqp = (xfs_dqtest_t *)dqp->NXT) { \
-                xfs_debug(dqp->q_mount,         \
-                        "  %d. \"%d (%s)\"  bcnt = %d, icnt = %d", \
-                         ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp),      \
-                         dqp->d_bcount, dqp->d_icount); } \
-}
-typedef struct dqtest {
-        uint             dq_flags;      /* various flags (XFS_DQ_*) */
-        struct list_head q_hashlist;
-        xfs_dqhash_t    *q_hash;        /* the hashchain header */
-        xfs_mount_t     *q_mount;       /* filesystem this relates to */
-        xfs_dqid_t      d_id;           /* user id or group id */
-        xfs_qcnt_t      d_bcount;       /* # disk blocks owned by the user */
-        xfs_qcnt_t      d_icount;       /* # inodes owned by the user */
-} xfs_dqtest_t;
-STATIC void
-xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
-{
-        list_add(&dqp->q_hashlist, &h->qh_list);
-        h->qh_version++;
-        h->qh_nelems++;
-}
-STATIC void
-xfs_qm_dqtest_print(
-        struct xfs_mount        *mp,
-        struct dqtest           *d)
-{
-        xfs_debug(mp, "-----------DQTEST DQUOT----------------");
-        xfs_debug(mp, "---- dquot ID = %d", d->d_id);
-        xfs_debug(mp, "---- fs       = 0x%p", d->q_mount);
-        xfs_debug(mp, "---- bcount   = %Lu (0x%x)",
-                d->d_bcount, (int)d->d_bcount);
-        xfs_debug(mp, "---- icount   = %Lu (0x%x)",
-                d->d_icount, (int)d->d_icount);
-        xfs_debug(mp, "---------------------------");
-}
-STATIC void
-xfs_qm_dqtest_failed(
-        xfs_dqtest_t    *d,
-        xfs_dquot_t     *dqp,
-        char            *reason,
-        xfs_qcnt_t      a,
-        xfs_qcnt_t      b,
-        int             error)
-{
-        qmtest_nfails++;
-        if (error)
-                xfs_debug(dqp->q_mount,
-                        "quotacheck failed id=%d, err=%d\nreason: %s",
-                        d->d_id, error, reason);
-        else
-                xfs_debug(dqp->q_mount,
-                        "quotacheck failed id=%d (%s) [%d != %d]",
-                        d->d_id, reason, (int)a, (int)b);
-        xfs_qm_dqtest_print(dqp->q_mount, d);
-        if (dqp)
-                xfs_qm_dqprint(dqp);
-}
-STATIC int
-xfs_dqtest_cmp2(
-        xfs_dqtest_t    *d,
-        xfs_dquot_t     *dqp)
-{
-        int err = 0;
-        if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) {
-                xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
-                        be64_to_cpu(dqp->q_core.d_icount),
-                        d->d_icount, 0);
-                err++;
-        }
-        if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) {
-                xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
-                        be64_to_cpu(dqp->q_core.d_bcount),
-                        d->d_bcount, 0);
-                err++;
-        }
-        if (dqp->q_core.d_blk_softlimit &&
-            be64_to_cpu(dqp->q_core.d_bcount) >=
-            be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
-                if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
-                        xfs_debug(dqp->q_mount,
-                                "%d [%s] BLK TIMER NOT STARTED",
-                                d->d_id, DQFLAGTO_TYPESTR(d));
-                        err++;
-                }
-        }
-        if (dqp->q_core.d_ino_softlimit &&
-            be64_to_cpu(dqp->q_core.d_icount) >=
-            be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
-                if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
-                        xfs_debug(dqp->q_mount,
-                                "%d [%s] INO TIMER NOT STARTED",
-                                d->d_id, DQFLAGTO_TYPESTR(d));
-                        err++;
-                }
-        }
-#ifdef QUOTADEBUG
-        if (!err) {
-                xfs_debug(dqp->q_mount, "%d [%s] qchecked",
-                        d->d_id, DQFLAGTO_TYPESTR(d));
-        }
-#endif
-        return (err);
-}
-STATIC void
-xfs_dqtest_cmp(
-        xfs_dqtest_t    *d)
-{
-        xfs_dquot_t     *dqp;
-        int             error;
-        /* xfs_qm_dqtest_print(d); */
-        if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
-                                 &dqp))) {
-                xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
-                return;
-        }
-        xfs_dqtest_cmp2(d, dqp);
-        xfs_qm_dqput(dqp);
-}
-STATIC int
-xfs_qm_internalqcheck_dqget(
-        xfs_mount_t     *mp,
-        xfs_dqid_t      id,
-        uint            type,
-        xfs_dqtest_t    **O_dq)
-{
-        xfs_dqtest_t    *d;
-        xfs_dqhash_t    *h;
-        h = DQTEST_HASH(mp, id, type);
-        list_for_each_entry(d, &h->qh_list, q_hashlist) {
-                if (d->d_id == id && mp == d->q_mount) {
-                        *O_dq = d;
-                        return (0);
-                }
-        }
-        d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
-        d->dq_flags = type;
-        d->d_id = id;
-        d->q_mount = mp;
-        d->q_hash = h;
-        INIT_LIST_HEAD(&d->q_hashlist);
-        xfs_qm_hashinsert(h, d);
-        *O_dq = d;
-        return (0);
-}
-STATIC void
-xfs_qm_internalqcheck_get_dquots(
-        xfs_mount_t     *mp,
-        xfs_dqid_t      uid,
-        xfs_dqid_t      projid,
-        xfs_dqid_t      gid,
-        xfs_dqtest_t    **ud,
-        xfs_dqtest_t    **gd)
-{
-        if (XFS_IS_UQUOTA_ON(mp))
-                xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
-        if (XFS_IS_GQUOTA_ON(mp))
-                xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
-        else if (XFS_IS_PQUOTA_ON(mp))
-                xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
-}
-STATIC void
-xfs_qm_internalqcheck_dqadjust(
-        xfs_inode_t             *ip,
-        xfs_dqtest_t            *d)
-{
-        d->d_icount++;
-        d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
-}
-STATIC int
-xfs_qm_internalqcheck_adjust(
-        xfs_mount_t     *mp,            /* mount point for filesystem */
-        xfs_ino_t       ino,            /* inode number to get data for */
-        void            __user *buffer, /* not used */
-        int             ubsize,         /* not used */
-        int             *ubused,        /* not used */
-        int             *res)           /* bulkstat result code */
-{
-        xfs_inode_t             *ip;
-        xfs_dqtest_t            *ud, *gd;
-        uint                    lock_flags;
-        boolean_t               ipreleased;
-        int                     error;
-        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-        if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
-                *res = BULKSTAT_RV_NOTHING;
-                xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
-                        __func__, (unsigned long long) ino,
-                        (unsigned long long) mp->m_sb.sb_uquotino,
-                        (unsigned long long) mp->m_sb.sb_gquotino);
-                return XFS_ERROR(EINVAL);
-        }
-        ipreleased = B_FALSE;
- again:
-        lock_flags = XFS_ILOCK_SHARED;
-        if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
-                *res = BULKSTAT_RV_NOTHING;
-                return (error);
-        }
-        /*
-         * This inode can have blocks after eof which can get released
-         * when we send it to inactive. Since we don't check the dquot
-         * until the after all our calculations are done, we must get rid
-         * of those now.
-         */
-        if (! ipreleased) {
-                xfs_iunlock(ip, lock_flags);
-                IRELE(ip);
-                ipreleased = B_TRUE;
-                goto again;
-        }
-        xfs_qm_internalqcheck_get_dquots(mp,
-                                        (xfs_dqid_t) ip->i_d.di_uid,
-                                        (xfs_dqid_t) xfs_get_projid(ip),
-                                        (xfs_dqid_t) ip->i_d.di_gid,
-                                        &ud, &gd);
-        if (XFS_IS_UQUOTA_ON(mp)) {
-                ASSERT(ud);
-                xfs_qm_internalqcheck_dqadjust(ip, ud);
-        }
-        if (XFS_IS_OQUOTA_ON(mp)) {
-                ASSERT(gd);
-                xfs_qm_internalqcheck_dqadjust(ip, gd);
-        }
-        xfs_iunlock(ip, lock_flags);
-        IRELE(ip);
-        *res = BULKSTAT_RV_DIDONE;
-        return (0);
-}
-/* PRIVATE, debugging */
-int
-xfs_qm_internalqcheck(
-        xfs_mount_t     *mp)
-{
-        xfs_ino_t       lastino;
-        int             done, count;
-        int             i;
-        int             error;
-        lastino = 0;
-        qmtest_hashmask = 32;
-        count = 5;
-        done = 0;
-        qmtest_nfails = 0;
-        if (! XFS_IS_QUOTA_ON(mp))
-                return XFS_ERROR(ESRCH);
-        xfs_log_force(mp, XFS_LOG_SYNC);
-        XFS_bflush(mp->m_ddev_targp);
-        xfs_log_force(mp, XFS_LOG_SYNC);
-        XFS_bflush(mp->m_ddev_targp);
-        mutex_lock(&qcheck_lock);
-        /* There should be absolutely no quota activity while this
-           is going on. */
-        qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
-                                    sizeof(xfs_dqhash_t), KM_SLEEP);
-        qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
-                                    sizeof(xfs_dqhash_t), KM_SLEEP);
-        do {
-                /*
-                 * Iterate thru all the inodes in the file system,
-                 * adjusting the corresponding dquot counters
-                 */
-                error = xfs_bulkstat(mp, &lastino, &count,
-                                 xfs_qm_internalqcheck_adjust,
-                                 0, NULL, &done);
-                if (error) {
-                        xfs_debug(mp, "Bulkstat returned error 0x%x", error);
-                        break;
-                }
-        } while (!done);
-        xfs_debug(mp, "Checking results against system dquots");
-        for (i = 0; i < qmtest_hashmask; i++) {
-                xfs_dqtest_t    *d, *n;
-                xfs_dqhash_t    *h;
-                h = &qmtest_udqtab[i];
-                list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
-                        xfs_dqtest_cmp(d);
-                        kmem_free(d);
-                }
-                h = &qmtest_gdqtab[i];
-                list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
-                        xfs_dqtest_cmp(d);
-                        kmem_free(d);
-                }
-        }
-        if (qmtest_nfails) {
-                xfs_debug(mp, "******** quotacheck failed  ********");
-                xfs_debug(mp, "failures = %d", qmtest_nfails);
-        } else {
-                xfs_debug(mp, "******** quotacheck successful! ********");
-        }
-        kmem_free(qmtest_udqtab);
-        kmem_free(qmtest_gdqtab);
-        mutex_unlock(&qcheck_lock);
-        return (qmtest_nfails);
-}
-#endif /* DEBUG */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 2a3648731331..4d00ee67792d 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -59,7 +59,7 @@ xfs_trans_dqjoin(
        xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
        /*
-         * Initialize i_transp so we can later determine if this dquot is
+         * Initialize d_transp so we can later determine if this dquot is
         * associated with this transaction.
         */
        dqp->q_transp = tp;
@@ -387,18 +387,18 @@ xfs_trans_apply_dquot_deltas(
                                qtrx->qt_delbcnt_delta;
                        totalrtbdelta = qtrx->qt_rtbcount_delta +
                                qtrx->qt_delrtb_delta;
-#ifdef QUOTADEBUG
+#ifdef DEBUG
                        if (totalbdelta < 0)
                                ASSERT(be64_to_cpu(d->d_bcount) >=
-                                       (xfs_qcnt_t) -totalbdelta);
+                                       -totalbdelta);
                        if (totalrtbdelta < 0)
                                ASSERT(be64_to_cpu(d->d_rtbcount) >=
-                                       (xfs_qcnt_t) -totalrtbdelta);
+                                       -totalrtbdelta);
                        if (qtrx->qt_icount_delta < 0)
                                ASSERT(be64_to_cpu(d->d_icount) >=
-                                       (xfs_qcnt_t) -qtrx->qt_icount_delta);
+                                       -qtrx->qt_icount_delta);
 #endif
                        if (totalbdelta)
                                be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
@@ -642,11 +642,6 @@ xfs_trans_dqresv(
            ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
             (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
              (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
-#ifdef QUOTADEBUG
-                xfs_debug(mp,
-                        "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
-                        nblks, *resbcountp, hardlimit);
-#endif
                if (nblks > 0) {
                        /*
                         * dquot is locked already. See if we'd go over the
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 5ad8ad3a1dcd..53ec3ea9a625 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,6 @@
 #define STATIC
 #define DEBUG 1
 #define XFS_BUF_LOCK_TRACKING 1
-/* #define QUOTADEBUG 1 */
 #endif
 #include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 95862bbff56b..1e00b3ef6274 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -570,9 +570,7 @@ xfs_alloc_ag_vextent_exact(
        xfs_agblock_t   tbno;   /* start block of trimmed extent */
        xfs_extlen_t    tlen;   /* length of trimmed extent */
        xfs_agblock_t   tend;   /* end block of trimmed extent */
-        xfs_agblock_t   end;    /* end of allocated extent */
        int             i;      /* success/failure of operation */
-        xfs_extlen_t    rlen;   /* length of returned extent */
        ASSERT(args->alignment == 1);
@@ -625,18 +623,16 @@ xfs_alloc_ag_vextent_exact(
         *
         * Fix the length according to mod and prod if given.
         */
-        end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
+        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
-        args->len = end - args->agbno;
+                                                - args->agbno;
        xfs_alloc_fix_len(args);
        if (!xfs_alloc_fix_minleft(args))
                goto not_found;
-        rlen = args->len;
+        ASSERT(args->agbno + args->len <= tend);
-        ASSERT(args->agbno + rlen <= tend);
-        end = args->agbno + rlen;
        /*
-         * We are allocating agbno for rlen [agbno .. end]
+         * We are allocating agbno for args->len
         * Allocate/initialize a cursor for the by-size btree.
         */
        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
@@ -2127,7 +2123,7 @@ xfs_read_agf(
         * Validate the magic number of the agf block.
         */
        agf_ok =
-                be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
+                agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
                XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
                be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
                be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 2b3518826a69..ffb3386e45c1 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -31,7 +31,6 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -311,72 +310,6 @@ xfs_allocbt_recs_inorder(
 }
 #endif  /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t        *xfs_allocbt_trace_buf;
-STATIC void
-xfs_allocbt_trace_enter(
-        struct xfs_btree_cur    *cur,
-        const char              *func,
-        char                    *s,
-        int                     type,
-        int                     line,
-        __psunsigned_t          a0,
-        __psunsigned_t          a1,
-        __psunsigned_t          a2,
-        __psunsigned_t          a3,
-        __psunsigned_t          a4,
-        __psunsigned_t          a5,
-        __psunsigned_t          a6,
-        __psunsigned_t          a7,
-        __psunsigned_t          a8,
-        __psunsigned_t          a9,
-        __psunsigned_t          a10)
-{
-        ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
-                (void *)func, (void *)s, NULL, (void *)cur,
-                (void *)a0, (void *)a1, (void *)a2, (void *)a3,
-                (void *)a4, (void *)a5, (void *)a6, (void *)a7,
-                (void *)a8, (void *)a9, (void *)a10);
-}
-STATIC void
-xfs_allocbt_trace_cursor(
-        struct xfs_btree_cur    *cur,
-        __uint32_t              *s0,
-        __uint64_t              *l0,
-        __uint64_t              *l1)
-{
-        *s0 = cur->bc_private.a.agno;
-        *l0 = cur->bc_rec.a.ar_startblock;
-        *l1 = cur->bc_rec.a.ar_blockcount;
-}
-STATIC void
-xfs_allocbt_trace_key(
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_key     *key,
-        __uint64_t              *l0,
-        __uint64_t              *l1)
-{
-        *l0 = be32_to_cpu(key->alloc.ar_startblock);
-        *l1 = be32_to_cpu(key->alloc.ar_blockcount);
-}
-STATIC void
-xfs_allocbt_trace_record(
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_rec     *rec,
-        __uint64_t              *l0,
-        __uint64_t              *l1,
-        __uint64_t              *l2)
-{
-        *l0 = be32_to_cpu(rec->alloc.ar_startblock);
-        *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
-        *l2 = 0;
-}
-#endif /* XFS_BTREE_TRACE */
 static const struct xfs_btree_ops xfs_allocbt_ops = {
        .rec_len                = sizeof(xfs_alloc_rec_t),
        .key_len                = sizeof(xfs_alloc_key_t),
@@ -393,18 +326,10 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
        .init_rec_from_cur      = xfs_allocbt_init_rec_from_cur,
        .init_ptr_from_cur      = xfs_allocbt_init_ptr_from_cur,
        .key_diff               = xfs_allocbt_key_diff,
 #ifdef DEBUG
        .keys_inorder           = xfs_allocbt_keys_inorder,
        .recs_inorder           = xfs_allocbt_recs_inorder,
 #endif
-#ifdef XFS_BTREE_TRACE
-        .trace_enter            = xfs_allocbt_trace_enter,
-        .trace_cursor           = xfs_allocbt_trace_cursor,
-        .trace_key              = xfs_allocbt_trace_key,
-        .trace_record           = xfs_allocbt_trace_record,
-#endif
 };
 /*
@@ -427,13 +352,16 @@ xfs_allocbt_init_cursor(
        cur->bc_tp = tp;
        cur->bc_mp = mp;
-        cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
        cur->bc_btnum = btnum;
        cur->bc_blocklog = mp->m_sb.sb_blocklog;
        cur->bc_ops = &xfs_allocbt_ops;
-        if (btnum == XFS_BTNUM_CNT)
+        if (btnum == XFS_BTNUM_CNT) {
+                cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
                cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
+        } else {
+                cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
+        }
        cur->bc_private.a.agbp = agbp;
        cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
deleted file mode 100644
index 0902249354a0..000000000000
--- a/fs/xfs/xfs_arch.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_ARCH_H__
-#define __XFS_ARCH_H__
-#ifndef XFS_BIG_INUMS
-# error XFS_BIG_INUMS must be defined true or false
-#endif
-#ifdef __KERNEL__
-#include <asm/byteorder.h>
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-#else /* __KERNEL__ */
-#if __BYTE_ORDER == __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-#ifdef XFS_NATIVE_HOST
-#define cpu_to_be16(val)        ((__force __be16)(__u16)(val))
-#define cpu_to_be32(val)        ((__force __be32)(__u32)(val))
-#define cpu_to_be64(val)        ((__force __be64)(__u64)(val))
-#define be16_to_cpu(val)        ((__force __u16)(__be16)(val))
-#define be32_to_cpu(val)        ((__force __u32)(__be32)(val))
-#define be64_to_cpu(val)        ((__force __u64)(__be64)(val))
-#else
-#define cpu_to_be16(val)        ((__force __be16)__swab16((__u16)(val)))
-#define cpu_to_be32(val)        ((__force __be32)__swab32((__u32)(val)))
-#define cpu_to_be64(val)        ((__force __be64)__swab64((__u64)(val)))
-#define be16_to_cpu(val)        (__swab16((__force __u16)(__be16)(val)))
-#define be32_to_cpu(val)        (__swab32((__force __u32)(__be32)(val)))
-#define be64_to_cpu(val)        (__swab64((__force __u64)(__be64)(val)))
-#endif
-static inline void be16_add_cpu(__be16 *a, __s16 b)
-{
-        *a = cpu_to_be16(be16_to_cpu(*a) + b);
-}
-static inline void be32_add_cpu(__be32 *a, __s32 b)
-{
-        *a = cpu_to_be32(be32_to_cpu(*a) + b);
-}
-static inline void be64_add_cpu(__be64 *a, __s64 b)
-{
-        *a = cpu_to_be64(be64_to_cpu(*a) + b);
-}
-#endif  /* __KERNEL__ */
-/*
- * get and set integers from potentially unaligned locations
- */
-#define INT_GET_UNALIGNED_16_BE(pointer) \
-   ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1])))
-#define INT_SET_UNALIGNED_16_BE(pointer,value) \
-    { \
-        ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \
-        ((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
-    }
-/*
- * In directories inode numbers are stored as unaligned arrays of unsigned
- * 8bit integers on disk.
- *
- * For v1 directories or v2 directories that contain inode numbers that
- * do not fit into 32bit the array has eight members, but the first member
- * is always zero:
- *
- *  |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7|
- *
- * For v2 directories that only contain entries with inode numbers that fit
- * into 32bits a four-member array is used:
- *
- *  |24-31|16-23| 8-15| 0- 7|
- */ 
-#define XFS_GET_DIR_INO4(di) \
-        (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
-#define XFS_PUT_DIR_INO4(from, di) \
-do { \
-        (di).i[0] = (((from) & 0xff000000ULL) >> 24); \
-        (di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \
-        (di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \
-        (di).i[3] = ((from) & 0x000000ffULL); \
-} while (0)
-#define XFS_DI_HI(di) \
-        (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
-#define XFS_DI_LO(di) \
-        (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
-#define XFS_GET_DIR_INO8(di)        \
-        (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
-         ((xfs_ino_t)XFS_DI_HI(di) << 32))
-#define XFS_PUT_DIR_INO8(from, di) \
-do { \
-        (di).i[0] = 0; \
-        (di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \
-        (di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \
-        (di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \
-        (di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \
-        (di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \
-        (di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \
-        (di).i[7] = ((from) & 0x00000000000000ffULL); \
-} while (0)
-        
-#endif  /* __XFS_ARCH_H__ */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01d2072fb6d4..cbae424fe1ba 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -822,17 +822,21 @@ xfs_attr_inactive(xfs_inode_t *dp)
        error = xfs_attr_root_inactive(&trans, dp);
        if (error)
                goto out;
        /*
-         * signal synchronous inactive transactions unless this
+         * Signal synchronous inactive transactions unless this is a
-         * is a synchronous mount filesystem in which case we
+         * synchronous mount filesystem in which case we know that we're here
-         * know that we're here because we've been called out of
+         * because we've been called out of xfs_inactive which means that the
-         * xfs_inactive which means that the last reference is gone
+         * last reference is gone and the unlink transaction has already hit
-         * and the unlink transaction has already hit the disk so
+         * the disk so async inactive transactions are safe.
-         * async inactive transactions are safe.
         */
-        if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
+        if (!(mp->m_flags & XFS_MOUNT_WSYNC)) {
-                                (!(mp->m_flags & XFS_MOUNT_WSYNC)
+                if (dp->i_d.di_anextents > 0)
-                                 ? 1 : 0))))
+                        xfs_trans_set_sync(trans);
+        }
+        error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
+        if (error)
                goto out;
        /*
@@ -1199,7 +1203,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
                return XFS_ERROR(error);
        ASSERT(bp != NULL);
        leaf = bp->data;
-        if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
+        if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
                XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
                                     context->dp->i_mount, leaf);
                xfs_da_brelse(NULL, bp);
@@ -1606,9 +1610,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
                                                     XFS_ATTR_FORK);
                if (error)
                        goto out;
-                ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
+                ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) ==
-                                      bp->data)->hdr.info.magic)
+                       cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-                                                       == XFS_ATTR_LEAF_MAGIC);
                if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
                        xfs_bmap_init(args->flist, args->firstblock);
@@ -1873,11 +1876,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
                                return(XFS_ERROR(EFSCORRUPTED));
                        }
                        node = bp->data;
-                        if (be16_to_cpu(node->hdr.info.magic)
+                        if (node->hdr.info.magic ==
-                                                        == XFS_ATTR_LEAF_MAGIC)
+                            cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
                                break;
-                        if (unlikely(be16_to_cpu(node->hdr.info.magic)
+                        if (unlikely(node->hdr.info.magic !=
-                                                        != XFS_DA_NODE_MAGIC)) {
+                                     cpu_to_be16(XFS_DA_NODE_MAGIC))) {
                                XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
                                                     XFS_ERRLEVEL_LOW,
                                                     context->dp->i_mount,
@@ -1912,8 +1915,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
         */
        for (;;) {
                leaf = bp->data;
-                if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
+                if (unlikely(leaf->hdr.info.magic !=
-                                                != XFS_ATTR_LEAF_MAGIC)) {
+                             cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
                        XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
                                             XFS_ERRLEVEL_LOW,
                                             context->dp->i_mount, leaf);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 71e90dc2aeb1..8fad9602542b 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -731,7 +731,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
        int bytes, i;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        entry = &leaf->entries[0];
        bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -777,7 +777,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
        ASSERT(bp != NULL);
        memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
        leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
        /*
@@ -872,7 +872,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
                goto out;
        node = bp1->data;
        leaf = bp2->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        /* both on-disk, don't endian-flip twice */
        node->btree[0].hashval =
                leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
@@ -997,7 +997,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
        int tablesize, entsize, sum, tmp, i;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT((args->index >= 0)
                && (args->index <= be16_to_cpu(leaf->hdr.count)));
        hdr = &leaf->hdr;
@@ -1070,7 +1070,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
        int tmp, i;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        hdr = &leaf->hdr;
        ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
        ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
@@ -1256,8 +1256,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
        ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
        leaf1 = blk1->bp->data;
        leaf2 = blk2->bp->data;
-        ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-        ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        args = state->args;
        /*
@@ -1533,7 +1533,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
         */
        blk = &state->path.blk[ state->path.active-1 ];
        info = blk->bp->data;
-        ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        leaf = (xfs_attr_leafblock_t *)info;
        count = be16_to_cpu(leaf->hdr.count);
        bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1596,7 +1596,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
                bytes  = state->blocksize - (state->blocksize>>2);
                bytes -= be16_to_cpu(leaf->hdr.usedbytes);
                leaf = bp->data;
-                ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+                ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
                count += be16_to_cpu(leaf->hdr.count);
                bytes -= be16_to_cpu(leaf->hdr.usedbytes);
                bytes -= count * sizeof(xfs_attr_leaf_entry_t);
@@ -1650,7 +1650,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
        xfs_mount_t *mp;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        hdr = &leaf->hdr;
        mp = args->trans->t_mountp;
        ASSERT((be16_to_cpu(hdr->count) > 0)
@@ -1813,8 +1813,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
        ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
        drop_leaf = drop_blk->bp->data;
        save_leaf = save_blk->bp->data;
-        ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-        ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        drop_hdr = &drop_leaf->hdr;
        save_hdr = &save_leaf->hdr;
@@ -1915,7 +1915,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
        xfs_dahash_t hashval;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(be16_to_cpu(leaf->hdr.count)
                                        < (XFS_LBSIZE(args->dp->i_mount)/8));
@@ -2019,7 +2019,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
        xfs_attr_leaf_name_remote_t *name_rmt;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(be16_to_cpu(leaf->hdr.count)
                                        < (XFS_LBSIZE(args->dp->i_mount)/8));
        ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
@@ -2087,8 +2087,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
        /*
         * Set up environment.
         */
-        ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
-        ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        hdr_s = &leaf_s->hdr;
        hdr_d = &leaf_d->hdr;
        ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
@@ -2222,8 +2222,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
        leaf1 = leaf1_bp->data;
        leaf2 = leaf2_bp->data;
-        ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
+        ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
-               (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
+               (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
        if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
            (be16_to_cpu(leaf2->hdr.count) > 0) &&
            ((be32_to_cpu(leaf2->entries[0].hashval) <
@@ -2246,7 +2246,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
        xfs_attr_leafblock_t *leaf;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        if (count)
                *count = be16_to_cpu(leaf->hdr.count);
        if (!leaf->hdr.count)
@@ -2265,7 +2265,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
        xfs_attr_leaf_name_remote_t *name_rmt;
        int size;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
                name_loc = xfs_attr_leaf_name_local(leaf, index);
                size = xfs_attr_leaf_entsize_local(name_loc->namelen,
@@ -2451,7 +2451,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
        ASSERT(bp != NULL);
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
        ASSERT(args->index >= 0);
        entry = &leaf->entries[ args->index ];
@@ -2515,7 +2515,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
        ASSERT(bp != NULL);
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
        ASSERT(args->index >= 0);
        entry = &leaf->entries[ args->index ];
@@ -2585,13 +2585,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
        }
        leaf1 = bp1->data;
-        ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
        ASSERT(args->index >= 0);
        entry1 = &leaf1->entries[ args->index ];
        leaf2 = bp2->data;
-        ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
        ASSERT(args->index2 >= 0);
        entry2 = &leaf2->entries[ args->index2 ];
@@ -2689,9 +2689,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
         * This is a depth-first traversal!
         */
        info = bp->data;
-        if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
+        if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
                error = xfs_attr_node_inactive(trans, dp, bp, 1);
-        } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
+        } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
                error = xfs_attr_leaf_inactive(trans, dp, bp);
        } else {
                error = XFS_ERROR(EIO);
@@ -2739,7 +2739,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
        }
        node = bp->data;
-        ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        parent_blkno = xfs_da_blkno(bp);        /* save for re-read later */
        count = be16_to_cpu(node->hdr.count);
        if (!count) {
@@ -2773,10 +2773,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
                         * Invalidate the subtree, however we have to.
                         */
                        info = child_bp->data;
-                        if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
+                        if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
                                error = xfs_attr_node_inactive(trans, dp,
                                                child_bp, level+1);
-                        } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
+                        } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
                                error = xfs_attr_leaf_inactive(trans, dp,
                                                child_bp);
                        } else {
@@ -2836,7 +2836,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
        int error, count, size, tmp, i;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        /*
         * Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e546a33214c9..c51a3f903633 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -29,15 +29,11 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_mount.h"
 #include "xfs_itable.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
 #include "xfs_inode_item.h"
 #include "xfs_extfree_item.h"
 #include "xfs_alloc.h"
@@ -94,6 +90,7 @@ xfs_bmap_add_attrfork_local(
 */
 STATIC int                              /* error */
 xfs_bmap_add_extent_delay_real(
+        struct xfs_trans        *tp,    /* transaction pointer */
        xfs_inode_t             *ip,    /* incore inode pointer */
        xfs_extnum_t            *idx,   /* extent number to update/insert */
        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
@@ -439,6 +436,7 @@ xfs_bmap_add_attrfork_local(
 */
 STATIC int                              /* error */
 xfs_bmap_add_extent(
+        struct xfs_trans        *tp,    /* transaction pointer */
        xfs_inode_t             *ip,    /* incore inode pointer */
        xfs_extnum_t            *idx,   /* extent number to update/insert */
        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
@@ -524,7 +522,7 @@ xfs_bmap_add_extent(
                                if (cur)
                                        ASSERT(cur->bc_private.b.flags &
                                                XFS_BTCUR_BPRV_WASDEL);
-                                error = xfs_bmap_add_extent_delay_real(ip,
+                                error = xfs_bmap_add_extent_delay_real(tp, ip,
                                                idx, &cur, new, &da_new,
                                                first, flist, &logflags);
                        } else {
@@ -561,7 +559,7 @@ xfs_bmap_add_extent(
                int     tmp_logflags;   /* partial log flag return val */
                ASSERT(cur == NULL);
-                error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
+                error = xfs_bmap_extents_to_btree(tp, ip, first,
                        flist, &cur, da_old > 0, &tmp_logflags, whichfork);
                logflags |= tmp_logflags;
                if (error)
@@ -604,6 +602,7 @@ done:
 */
 STATIC int                              /* error */
 xfs_bmap_add_extent_delay_real(
+        struct xfs_trans        *tp,    /* transaction pointer */
        xfs_inode_t             *ip,    /* incore inode pointer */
        xfs_extnum_t            *idx,   /* extent number to update/insert */
        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
@@ -901,7 +900,7 @@ xfs_bmap_add_extent_delay_real(
                }
                if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
                    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-                        error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+                        error = xfs_bmap_extents_to_btree(tp, ip,
                                        first, flist, &cur, 1, &tmp_rval,
                                        XFS_DATA_FORK);
                        rval |= tmp_rval;
@@ -984,7 +983,7 @@ xfs_bmap_add_extent_delay_real(
                }
                if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
                    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-                        error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+                        error = xfs_bmap_extents_to_btree(tp, ip,
                                first, flist, &cur, 1, &tmp_rval,
                                XFS_DATA_FORK);
                        rval |= tmp_rval;
@@ -1052,7 +1051,7 @@ xfs_bmap_add_extent_delay_real(
                }
                if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
                    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
-                        error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+                        error = xfs_bmap_extents_to_btree(tp, ip,
                                        first, flist, &cur, 1, &tmp_rval,
                                        XFS_DATA_FORK);
                        rval |= tmp_rval;
@@ -2871,8 +2870,8 @@ xfs_bmap_del_extent(
                        len = del->br_blockcount;
                        do_div(bno, mp->m_sb.sb_rextsize);
                        do_div(len, mp->m_sb.sb_rextsize);
-                        if ((error = xfs_rtfree_extent(ip->i_transp, bno,
+                        error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
-                                        (xfs_extlen_t)len)))
+                        if (error)
                                goto done;
                        do_fx = 0;
                        nblks = len * mp->m_sb.sb_rextsize;
@@ -4080,7 +4079,7 @@ xfs_bmap_sanity_check(
 {
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-        if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
+        if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
            be16_to_cpu(block->bb_level) != level ||
            be16_to_cpu(block->bb_numrecs) == 0 ||
            be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
@@ -4662,7 +4661,7 @@ xfs_bmapi(
                                if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
                                        got.br_state = XFS_EXT_UNWRITTEN;
                        }
-                        error = xfs_bmap_add_extent(ip, &lastx, &cur, &got,
+                        error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got,
                                firstblock, flist, &tmp_logflags,
                                whichfork);
                        logflags |= tmp_logflags;
@@ -4763,7 +4762,7 @@ xfs_bmapi(
                        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
                                                ? XFS_EXT_NORM
                                                : XFS_EXT_UNWRITTEN;
-                        error = xfs_bmap_add_extent(ip, &lastx, &cur, mval,
+                        error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval,
                                firstblock, flist, &tmp_logflags,
                                whichfork);
                        logflags |= tmp_logflags;
@@ -5117,7 +5116,7 @@ xfs_bunmapi(
                                del.br_blockcount = mod;
                        }
                        del.br_state = XFS_EXT_UNWRITTEN;
-                        error = xfs_bmap_add_extent(ip, &lastx, &cur, &del,
+                        error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del,
                                firstblock, flist, &logflags,
                                XFS_DATA_FORK);
                        if (error)
@@ -5175,18 +5174,18 @@ xfs_bunmapi(
                                }
                                prev.br_state = XFS_EXT_UNWRITTEN;
                                lastx--;
-                                error = xfs_bmap_add_extent(ip, &lastx, &cur,
+                                error = xfs_bmap_add_extent(tp, ip, &lastx,
-                                        &prev, firstblock, flist, &logflags,
+                                                &cur, &prev, firstblock, flist,
-                                        XFS_DATA_FORK);
+                                                &logflags, XFS_DATA_FORK);
                                if (error)
                                        goto error0;
                                goto nodelete;
                        } else {
                                ASSERT(del.br_state == XFS_EXT_NORM);
                                del.br_state = XFS_EXT_UNWRITTEN;
-                                error = xfs_bmap_add_extent(ip, &lastx, &cur,
+                                error = xfs_bmap_add_extent(tp, ip, &lastx,
-                                        &del, firstblock, flist, &logflags,
+                                                &cur, &del, firstblock, flist,
-                                        XFS_DATA_FORK);
+                                                &logflags, XFS_DATA_FORK);
                                if (error)
                                        goto error0;
                                goto nodelete;
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 87d3c10b6954..e2f5d59cbeaf 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -33,7 +33,6 @@
 #include "xfs_inode_item.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
 #include "xfs_itable.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
@@ -425,10 +424,10 @@ xfs_bmbt_to_bmdr(
        xfs_bmbt_key_t          *tkp;
        __be64                  *tpp;
-        ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
+        ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
-        ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
+        ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
-        ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
+        ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
-        ASSERT(be16_to_cpu(rblock->bb_level) > 0);
+        ASSERT(rblock->bb_level != 0);
        dblock->bb_level = rblock->bb_level;
        dblock->bb_numrecs = rblock->bb_numrecs;
        dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
@@ -732,95 +731,6 @@ xfs_bmbt_recs_inorder(
 }
 #endif  /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t        *xfs_bmbt_trace_buf;
-STATIC void
-xfs_bmbt_trace_enter(
-        struct xfs_btree_cur    *cur,
-        const char              *func,
-        char                    *s,
-        int                     type,
-        int                     line,
-        __psunsigned_t          a0,
-        __psunsigned_t          a1,
-        __psunsigned_t          a2,
-        __psunsigned_t          a3,
-        __psunsigned_t          a4,
-        __psunsigned_t          a5,
-        __psunsigned_t          a6,
-        __psunsigned_t          a7,
-        __psunsigned_t          a8,
-        __psunsigned_t          a9,
-        __psunsigned_t          a10)
-{
-        struct xfs_inode        *ip = cur->bc_private.b.ip;
-        int                     whichfork = cur->bc_private.b.whichfork;
-        ktrace_enter(xfs_bmbt_trace_buf,
-                (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
-                (void *)func, (void *)s, (void *)ip, (void *)cur,
-                (void *)a0, (void *)a1, (void *)a2, (void *)a3,
-                (void *)a4, (void *)a5, (void *)a6, (void *)a7,
-                (void *)a8, (void *)a9, (void *)a10);
-}
-STATIC void
-xfs_bmbt_trace_cursor(
-        struct xfs_btree_cur    *cur,
-        __uint32_t              *s0,
-        __uint64_t              *l0,
-        __uint64_t              *l1)
-{
-        struct xfs_bmbt_rec_host r;
-        xfs_bmbt_set_all(&r, &cur->bc_rec.b);
-        *s0 = (cur->bc_nlevels << 24) |
-              (cur->bc_private.b.flags << 16) |
-               cur->bc_private.b.allocated;
-        *l0 = r.l0;
-        *l1 = r.l1;
-}
-STATIC void
-xfs_bmbt_trace_key(
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_key     *key,
-        __uint64_t              *l0,
-        __uint64_t              *l1)
-{
-        *l0 = be64_to_cpu(key->bmbt.br_startoff);
-        *l1 = 0;
-}
-/* Endian flipping versions of the bmbt extraction functions */
-STATIC void
-xfs_bmbt_disk_get_all(
-        xfs_bmbt_rec_t  *r,
-        xfs_bmbt_irec_t *s)
-{
-        __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
-                                get_unaligned_be64(&r->l1), s);
-}
-STATIC void
-xfs_bmbt_trace_record(
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_rec     *rec,
-        __uint64_t              *l0,
-        __uint64_t              *l1,
-        __uint64_t              *l2)
-{
-        struct xfs_bmbt_irec    irec;
-        xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
-        *l0 = irec.br_startoff;
-        *l1 = irec.br_startblock;
-        *l2 = irec.br_blockcount;
-}
-#endif /* XFS_BTREE_TRACE */
 static const struct xfs_btree_ops xfs_bmbt_ops = {
        .rec_len                = sizeof(xfs_bmbt_rec_t),
        .key_len                = sizeof(xfs_bmbt_key_t),
@@ -837,18 +747,10 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
        .init_rec_from_cur      = xfs_bmbt_init_rec_from_cur,
        .init_ptr_from_cur      = xfs_bmbt_init_ptr_from_cur,
        .key_diff               = xfs_bmbt_key_diff,
 #ifdef DEBUG
        .keys_inorder           = xfs_bmbt_keys_inorder,
        .recs_inorder           = xfs_bmbt_recs_inorder,
 #endif
-#ifdef XFS_BTREE_TRACE
-        .trace_enter            = xfs_bmbt_trace_enter,
-        .trace_cursor           = xfs_bmbt_trace_cursor,
-        .trace_key              = xfs_bmbt_trace_key,
-        .trace_record           = xfs_bmbt_trace_record,
-#endif
 };
 /*
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 2f9e97c128a0..cabf4b5604aa 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -32,7 +32,6 @@
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -66,11 +65,11 @@ xfs_btree_check_lblock(
                be16_to_cpu(block->bb_numrecs) <=
                        cur->bc_ops->get_maxrecs(cur, level) &&
                block->bb_u.l.bb_leftsib &&
-                (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
+                (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
                 XFS_FSB_SANITY_CHECK(mp,
                        be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
                block->bb_u.l.bb_rightsib &&
-                (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
+                (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
                 XFS_FSB_SANITY_CHECK(mp,
                        be64_to_cpu(block->bb_u.l.bb_rightsib)));
        if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
@@ -105,10 +104,10 @@ xfs_btree_check_sblock(
                be16_to_cpu(block->bb_level) == level &&
                be16_to_cpu(block->bb_numrecs) <=
                        cur->bc_ops->get_maxrecs(cur, level) &&
-                (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
+                (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
                 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
                block->bb_u.s.bb_leftsib &&
-                (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
+                (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
                 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
                block->bb_u.s.bb_rightsib;
        if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
@@ -511,9 +510,9 @@ xfs_btree_islastblock(
        block = xfs_btree_get_block(cur, level, &bp);
        xfs_btree_check_block(cur, block, level, bp);
        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-                return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
+                return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
        else
-                return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
+                return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
 }
 /*
@@ -777,14 +776,14 @@ xfs_btree_setbuf(
        b = XFS_BUF_TO_BLOCK(bp);
        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-                if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
+                if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-                if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
+                if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
        } else {
-                if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
+                if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
                        cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-                if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
+                if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
                        cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
        }
 }
@@ -795,9 +794,9 @@ xfs_btree_ptr_is_null(
        union xfs_btree_ptr     *ptr)
 {
        if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-                return be64_to_cpu(ptr->l) == NULLDFSBNO;
+                return ptr->l == cpu_to_be64(NULLDFSBNO);
        else
-                return be32_to_cpu(ptr->s) == NULLAGBLOCK;
+                return ptr->s == cpu_to_be32(NULLAGBLOCK);
 }
 STATIC void
@@ -923,12 +922,12 @@ xfs_btree_ptr_to_daddr(
        union xfs_btree_ptr     *ptr)
 {
        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-                ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
+                ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
                return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
        } else {
                ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
-                ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
+                ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
                return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
                                        be32_to_cpu(ptr->s));
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 82fafc66bd1f..8d05a6a46ce3 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -199,25 +199,6 @@ struct xfs_btree_ops {
                                union xfs_btree_rec *r1,
                                union xfs_btree_rec *r2);
 #endif
-        /* btree tracing */
-#ifdef XFS_BTREE_TRACE
-        void            (*trace_enter)(struct xfs_btree_cur *, const char *,
-                                       char *, int, int, __psunsigned_t,
-                                       __psunsigned_t, __psunsigned_t,
-                                       __psunsigned_t, __psunsigned_t,
-                                       __psunsigned_t, __psunsigned_t,
-                                       __psunsigned_t, __psunsigned_t,
-                                       __psunsigned_t, __psunsigned_t);
-        void            (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
-                                        __uint64_t *, __uint64_t *);
-        void            (*trace_key)(struct xfs_btree_cur *,
-                                     union xfs_btree_key *, __uint64_t *,
-                                     __uint64_t *);
-        void            (*trace_record)(struct xfs_btree_cur *,
-                                        union xfs_btree_rec *, __uint64_t *,
-                                        __uint64_t *, __uint64_t *);
-#endif
 };
 /*
@@ -452,4 +433,23 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
        (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
                XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
+/*
+ * Trace hooks.  Currently not implemented as they need to be ported
+ * over to the generic tracing functionality, which is some effort.
+ *
+ * i,j = integer (32 bit)
+ * b = btree block buffer (xfs_buf_t)
+ * p = btree ptr
+ * r = btree record
+ * k = btree key
+ */
+#define XFS_BTREE_TRACE_ARGBI(c, b, i)
+#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
+#define XFS_BTREE_TRACE_ARGI(c, i)
+#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
+#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
+#define XFS_BTREE_TRACE_ARGIK(c, i, k)
+#define XFS_BTREE_TRACE_ARGR(c, r)
+#define XFS_BTREE_TRACE_CURSOR(c, t)
 #endif  /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
deleted file mode 100644
index 44ff942a0fda..000000000000
--- a/fs/xfs/xfs_btree_trace.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
-STATIC void
-xfs_btree_trace_ptr(
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_ptr     ptr,
-        __psunsigned_t          *high,
-        __psunsigned_t          *low)
-{
-        if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-                __u64 val = be64_to_cpu(ptr.l);
-                *high = val >> 32;
-                *low = (int)val;
-        } else {
-                *high = 0;
-                *low = be32_to_cpu(ptr.s);
-        }
-}
-/*
- * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
- */
-void
-xfs_btree_trace_argbi(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        struct xfs_buf          *b,
-        int                     i,
-        int                     line)
-{
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
-                                 line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
-                                 0, 0, 0, 0);
-}
-/*
- * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
- */
-void
-xfs_btree_trace_argbii(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        struct xfs_buf          *b,
-        int                     i0,
-        int                     i1,
-        int                     line)
-{
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
-                                 line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
-                                 0, 0, 0, 0);
-}
-/*
- * Add a trace buffer entry for arguments, for 3 block-length args
- * and an integer arg.
- */
-void
-xfs_btree_trace_argfffi(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        xfs_dfiloff_t           o,
-        xfs_dfsbno_t            b,
-        xfs_dfilblks_t          i,
-        int                     j,
-        int                     line)
-{
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
-                                 line,
-                                 o >> 32, (int)o,
-                                 b >> 32, (int)b,
-                                 i >> 32, (int)i,
-                                 (int)j, 0, 0, 0, 0);
-}
-/*
- * Add a trace buffer entry for arguments, for one integer arg.
- */
-void
-xfs_btree_trace_argi(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        int                     i,
-        int                     line)
-{
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
-                                 line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-}
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, key.
- */
-void
-xfs_btree_trace_argipk(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        int                     i,
-        union xfs_btree_ptr     ptr,
-        union xfs_btree_key     *key,
-        int                     line)
-{
-        __psunsigned_t          high, low;
-        __uint64_t              l0, l1;
-        xfs_btree_trace_ptr(cur, ptr, &high, &low);
-        cur->bc_ops->trace_key(cur, key, &l0, &l1);
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
-                                 line, i, high, low,
-                                 l0 >> 32, (int)l0,
-                                 l1 >> 32, (int)l1,
-                                 0, 0, 0, 0);
-}
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, rec.
- */
-void
-xfs_btree_trace_argipr(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        int                     i,
-        union xfs_btree_ptr     ptr,
-        union xfs_btree_rec     *rec,
-        int                     line)
-{
-        __psunsigned_t          high, low;
-        __uint64_t              l0, l1, l2;
-        xfs_btree_trace_ptr(cur, ptr, &high, &low);
-        cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
-                              line, i,
-                              high, low,
-                              l0 >> 32, (int)l0,
-                              l1 >> 32, (int)l1,
-                              l2 >> 32, (int)l2,
-                              0, 0);
-}
-/*
- * Add a trace buffer entry for arguments, for int, key.
- */
-void
-xfs_btree_trace_argik(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        int                     i,
-        union xfs_btree_key     *key,
-        int                     line)
-{
-        __uint64_t              l0, l1;
-        cur->bc_ops->trace_key(cur, key, &l0, &l1);
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
-                                 line, i,
-                                 l0 >> 32, (int)l0,
-                                 l1 >> 32, (int)l1,
-                                 0, 0, 0, 0, 0, 0);
-}
-/*
- * Add a trace buffer entry for arguments, for record.
- */
-void
-xfs_btree_trace_argr(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_rec     *rec,
-        int                     line)
-{
-        __uint64_t              l0, l1, l2;
-        cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
-        cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
-                              line,
-                              l0 >> 32, (int)l0,
-                              l1 >> 32, (int)l1,
-                              l2 >> 32, (int)l2,
-                              0, 0, 0, 0, 0);
-}
-/*
- * Add a trace buffer entry for the cursor/operation.
- */
-void
-xfs_btree_trace_cursor(
-        const char              *func,
-        struct xfs_btree_cur    *cur,
-        int                     type,
-        int                     line)
-{
-        __uint32_t              s0;
-        __uint64_t              l0, l1;
-        char                    *s;
-        switch (type) {
-        case XBT_ARGS:
-                s = "args";
-                break;
-        case XBT_ENTRY:
-                s = "entry";
-                break;
-        case XBT_ERROR:
-                s = "error";
-                break;
-        case XBT_EXIT:
-                s = "exit";
-                break;
-        default:
-                s = "unknown";
-                break;
-        }
-        cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
-        cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
-                                 s0,
-                                 l0 >> 32, (int)l0,
-                                 l1 >> 32, (int)l1,
-                                 (__psunsigned_t)cur->bc_bufs[0],
-                                 (__psunsigned_t)cur->bc_bufs[1],
-                                 (__psunsigned_t)cur->bc_bufs[2],
-                                 (__psunsigned_t)cur->bc_bufs[3],
-                                 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
-                                 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
-}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
deleted file mode 100644
index 2d8a309873ea..000000000000
--- a/fs/xfs/xfs_btree_trace.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_BTREE_TRACE_H__
-#define __XFS_BTREE_TRACE_H__
-struct xfs_btree_cur;
-struct xfs_buf;
-/*
- * Trace hooks.
- * i,j = integer (32 bit)
- * b = btree block buffer (xfs_buf_t)
- * p = btree ptr
- * r = btree record
- * k = btree key
- */
-#ifdef XFS_BTREE_TRACE
-/*
- * Trace buffer entry types.
- */
-#define XFS_BTREE_KTRACE_ARGBI   1
-#define XFS_BTREE_KTRACE_ARGBII  2
-#define XFS_BTREE_KTRACE_ARGFFFI 3
-#define XFS_BTREE_KTRACE_ARGI    4
-#define XFS_BTREE_KTRACE_ARGIPK  5
-#define XFS_BTREE_KTRACE_ARGIPR  6
-#define XFS_BTREE_KTRACE_ARGIK   7
-#define XFS_BTREE_KTRACE_ARGR    8
-#define XFS_BTREE_KTRACE_CUR     9
-/*
- * Sub-types for cursor traces.
- */
-#define XBT_ARGS        0
-#define XBT_ENTRY       1
-#define XBT_ERROR       2
-#define XBT_EXIT        3
-void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
-                struct xfs_buf *, int, int);
-void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
-                struct xfs_buf *, int, int, int);
-void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
-void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
-                union xfs_btree_ptr, union xfs_btree_key *, int);
-void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
-                union xfs_btree_ptr, union xfs_btree_rec *, int);
-void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
-                union xfs_btree_key *, int);
-void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
-                union xfs_btree_rec *, int);
-void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
-#define XFS_BTREE_TRACE_ARGBI(c, b, i)  \
-        xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
-#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)      \
-        xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
-#define XFS_BTREE_TRACE_ARGI(c, i)      \
-        xfs_btree_trace_argi(__func__, c, i, __LINE__)
-#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k)      \
-        xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
-#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)      \
-        xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
-#define XFS_BTREE_TRACE_ARGIK(c, i, k)  \
-        xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
-#define XFS_BTREE_TRACE_ARGR(c, r)      \
-        xfs_btree_trace_argr(__func__, c, r, __LINE__)
-#define XFS_BTREE_TRACE_CURSOR(c, t)    \
-        xfs_btree_trace_cursor(__func__, c, t, __LINE__)
-#else
-#define XFS_BTREE_TRACE_ARGBI(c, b, i)
-#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
-#define XFS_BTREE_TRACE_ARGI(c, i)
-#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
-#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
-#define XFS_BTREE_TRACE_ARGIK(c, i, k)
-#define XFS_BTREE_TRACE_ARGR(c, r)
-#define XFS_BTREE_TRACE_CURSOR(c, t)
-#endif  /* XFS_BTREE_TRACE */
-#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7b7e005e3dcc..88492916c3dc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -90,13 +90,11 @@ xfs_buf_item_flush_log_debug(
        uint            first,
        uint            last)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        uint                    nbytes;
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+        if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
-        if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) {
                return;
-        }
        ASSERT(bip->bli_logged != NULL);
        nbytes = last - first + 1;
@@ -408,7 +406,7 @@ xfs_buf_item_unpin(
        int             stale = bip->bli_flags & XFS_BLI_STALE;
        int             freed;
-        ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
+        ASSERT(bp->b_fspriv == bip);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        trace_xfs_buf_item_unpin(bip);
@@ -420,7 +418,7 @@ xfs_buf_item_unpin(
        if (freed && stale) {
                ASSERT(bip->bli_flags & XFS_BLI_STALE);
-                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+                ASSERT(xfs_buf_islocked(bp));
                ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
                ASSERT(XFS_BUF_ISSTALE(bp));
                ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
@@ -443,7 +441,7 @@ xfs_buf_item_unpin(
                         * Since the transaction no longer refers to the buffer,
                         * the buffer should no longer refer to the transaction.
                         */
-                        XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+                        bp->b_transp = NULL;
                }
                /*
@@ -454,13 +452,13 @@ xfs_buf_item_unpin(
                 */
                if (bip->bli_flags & XFS_BLI_STALE_INODE) {
                        xfs_buf_do_callbacks(bp);
-                        XFS_BUF_SET_FSPRIVATE(bp, NULL);
+                        bp->b_fspriv = NULL;
-                        XFS_BUF_CLR_IODONE_FUNC(bp);
+                        bp->b_iodone = NULL;
                } else {
                        spin_lock(&ailp->xa_lock);
                        xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
                        xfs_buf_item_relse(bp);
-                        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
+                        ASSERT(bp->b_fspriv == NULL);
                }
                xfs_buf_relse(bp);
        }
@@ -483,7 +481,7 @@ xfs_buf_item_trylock(
        if (XFS_BUF_ISPINNED(bp))
                return XFS_ITEM_PINNED;
-        if (!XFS_BUF_CPSEMA(bp))
+        if (!xfs_buf_trylock(bp))
                return XFS_ITEM_LOCKED;
        /* take a reference to the buffer.  */
@@ -525,7 +523,7 @@ xfs_buf_item_unlock(
        uint                    hold;
        /* Clear the buffer's association with this transaction. */
-        XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+        bp->b_transp = NULL;
        /*
         * If this is a transaction abort, don't return early.  Instead, allow
@@ -684,7 +682,7 @@ xfs_buf_item_init(
        xfs_buf_t       *bp,
        xfs_mount_t     *mp)
 {
-        xfs_log_item_t          *lip;
+        xfs_log_item_t          *lip = bp->b_fspriv;
        xfs_buf_log_item_t      *bip;
        int                     chunks;
        int                     map_size;
@@ -696,12 +694,8 @@ xfs_buf_item_init(
         * nothing to do here so return.
         */
        ASSERT(bp->b_target->bt_mount == mp);
-        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+        if (lip != NULL && lip->li_type == XFS_LI_BUF)
-                lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+                return;
-                if (lip->li_type == XFS_LI_BUF) {
-                        return;
-                }
-        }
        /*
         * chunks is the number of XFS_BLF_CHUNK size pieces
@@ -740,11 +734,9 @@ xfs_buf_item_init(
         * Put the buf item into the list of items attached to the
         * buffer at the front.
         */
-        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+        if (bp->b_fspriv)
-                bip->bli_item.li_bio_list =
+                bip->bli_item.li_bio_list = bp->b_fspriv;
-                                XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+        bp->b_fspriv = bip;
-        }
-        XFS_BUF_SET_FSPRIVATE(bp, bip);
 }
@@ -876,12 +868,11 @@ xfs_buf_item_relse(
        trace_xfs_buf_item_relse(bp, _RET_IP_);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+        bip = bp->b_fspriv;
-        XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
+        bp->b_fspriv = bip->bli_item.li_bio_list;
-        if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
+        if (bp->b_fspriv == NULL)
-            (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
+                bp->b_iodone = NULL;
-                XFS_BUF_CLR_IODONE_FUNC(bp);
-        }
        xfs_buf_rele(bp);
        xfs_buf_item_free(bip);
 }
@@ -905,20 +896,20 @@ xfs_buf_attach_iodone(
        xfs_log_item_t  *head_lip;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+        ASSERT(xfs_buf_islocked(bp));
        lip->li_cb = cb;
-        if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+        head_lip = bp->b_fspriv;
-                head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+        if (head_lip) {
                lip->li_bio_list = head_lip->li_bio_list;
                head_lip->li_bio_list = lip;
        } else {
-                XFS_BUF_SET_FSPRIVATE(bp, lip);
+                bp->b_fspriv = lip;
        }
-        ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) ||
+        ASSERT(bp->b_iodone == NULL ||
-               (XFS_BUF_IODONE_FUNC(bp) == NULL));
+               bp->b_iodone == xfs_buf_iodone_callbacks);
-        XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
+        bp->b_iodone = xfs_buf_iodone_callbacks;
 }
 /*
@@ -939,8 +930,8 @@ xfs_buf_do_callbacks(
 {
        struct xfs_log_item     *lip;
-        while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
+        while ((lip = bp->b_fspriv) != NULL) {
-                XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
+                bp->b_fspriv = lip->li_bio_list;
                ASSERT(lip->li_cb != NULL);
                /*
                 * Clear the next pointer so we don't have any
@@ -1007,7 +998,7 @@ xfs_buf_iodone_callbacks(
                        XFS_BUF_DONE(bp);
                        XFS_BUF_SET_START(bp);
                }
-                ASSERT(XFS_BUF_IODONE_FUNC(bp));
+                ASSERT(bp->b_iodone != NULL);
                trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
                xfs_buf_relse(bp);
                return;
@@ -1026,8 +1017,8 @@ xfs_buf_iodone_callbacks(
 do_callbacks:
        xfs_buf_do_callbacks(bp);
-        XFS_BUF_SET_FSPRIVATE(bp, NULL);
+        bp->b_fspriv = NULL;
-        XFS_BUF_CLR_IODONE_FUNC(bp);
+        bp->b_iodone = NULL;
        xfs_buf_ioend(bp, 0);
 }
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 6102ac6d1dff..2925726529f8 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -24,11 +24,12 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
@@ -36,10 +37,6 @@
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -89,7 +86,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
 */
 STATIC uint     xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
 STATIC int      xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
+STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
 STATIC int      xfs_da_blk_unlink(xfs_da_state_t *state,
                                  xfs_da_state_blk_t *drop_blk,
                                  xfs_da_state_blk_t *save_blk);
@@ -321,11 +318,11 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
        ASSERT(bp != NULL);
        node = bp->data;
        oldroot = blk1->bp->data;
-        if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
+        if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
                size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
                             (char *)oldroot);
        } else {
-                ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+                ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
                leaf = (xfs_dir2_leaf_t *)oldroot;
                size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
                             (char *)leaf);
@@ -352,7 +349,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
        node->hdr.count = cpu_to_be16(2);
 #ifdef DEBUG
-        if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
+        if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
                ASSERT(blk1->blkno >= mp->m_dirleafblk &&
                       blk1->blkno < mp->m_dirfreeblk);
                ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -384,7 +381,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
        int useextra;
        node = oldblk->bp->data;
-        ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        /*
         * With V2 dirs the extra block is data or freespace.
@@ -483,8 +480,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                node1 = node2;
                node2 = tmpnode;
        }
-        ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-        ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
        if (count == 0)
                return;
@@ -578,7 +575,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
        int tmp;
        node = oldblk->bp->data;
-        ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
        ASSERT(newblk->blkno != 0);
        if (state->args->whichfork == XFS_DATA_FORK)
@@ -714,7 +711,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
        ASSERT(args != NULL);
        ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
        oldroot = root_blk->bp->data;
-        ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        ASSERT(!oldroot->hdr.info.forw);
        ASSERT(!oldroot->hdr.info.back);
@@ -737,10 +734,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
        ASSERT(bp != NULL);
        blkinfo = bp->data;
        if (be16_to_cpu(oldroot->hdr.level) == 1) {
-                ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
+                ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-                       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
+                       blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        } else {
-                ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
+                ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        }
        ASSERT(!blkinfo->forw);
        ASSERT(!blkinfo->back);
@@ -776,7 +773,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
         */
        blk = &state->path.blk[ state->path.active-1 ];
        info = blk->bp->data;
-        ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        node = (xfs_da_intnode_t *)info;
        count = be16_to_cpu(node->hdr.count);
        if (count > (state->node_ents >> 1)) {
@@ -836,7 +833,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
                count -= state->node_ents >> 2;
                count -= be16_to_cpu(node->hdr.count);
                node = bp->data;
-                ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+                ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                count -= be16_to_cpu(node->hdr.count);
                xfs_da_brelse(state->args->trans, bp);
                if (count >= 0)
@@ -911,7 +908,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
        }
        for (blk--, level--; level >= 0; blk--, level--) {
                node = blk->bp->data;
-                ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+                ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                btree = &node->btree[ blk->index ];
                if (be32_to_cpu(btree->hashval) == lasthash)
                        break;
@@ -979,8 +976,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
        drop_node = drop_blk->bp->data;
        save_node = save_blk->bp->data;
-        ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-        ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        tp = state->args->trans;
        /*
@@ -1278,8 +1275,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
        node1 = node1_bp->data;
        node2 = node2_bp->data;
-        ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
+        ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
-               (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
+               node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
            ((be32_to_cpu(node2->btree[0].hashval) <
              be32_to_cpu(node1->btree[0].hashval)) ||
@@ -1299,7 +1296,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
        xfs_da_intnode_t *node;
        node = bp->data;
-        ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+        ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
        if (count)
                *count = be16_to_cpu(node->hdr.count);
        if (!node->hdr.count)
@@ -1412,7 +1409,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
        for (blk = &path->blk[level]; level >= 0; blk--, level--) {
                ASSERT(blk->bp != NULL);
                node = blk->bp->data;
-                ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+                ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
                        blk->index++;
                        blkno = be32_to_cpu(node->btree[blk->index].before);
@@ -1451,9 +1448,9 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
                        return(error);
                ASSERT(blk->bp != NULL);
                info = blk->bp->data;
-                ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
+                ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
-                       be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
+                       info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-                       be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+                       info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
                blk->magic = be16_to_cpu(info->magic);
                if (blk->magic == XFS_DA_NODE_MAGIC) {
                        node = (xfs_da_intnode_t *)info;
@@ -1546,79 +1543,62 @@ const struct xfs_nameops xfs_default_nameops = {
        .compname       = xfs_da_compname
 };
-/*
- * Add a block to the btree ahead of the file.
- * Return the new block number to the caller.
- */
 int
-xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
+xfs_da_grow_inode_int(
+        struct xfs_da_args      *args,
+        xfs_fileoff_t           *bno,
+        int                     count)
 {
-        xfs_fileoff_t bno, b;
+        struct xfs_trans        *tp = args->trans;
-        xfs_bmbt_irec_t map;
+        struct xfs_inode        *dp = args->dp;
-        xfs_bmbt_irec_t *mapp;
+        int                     w = args->whichfork;
-        xfs_inode_t *dp;
+        xfs_drfsbno_t           nblks = dp->i_d.di_nblocks;
-        int nmap, error, w, count, c, got, i, mapi;
+        struct xfs_bmbt_irec    map, *mapp;
-        xfs_trans_t *tp;
+        int                     nmap, error, got, i, mapi;
-        xfs_mount_t *mp;
-        xfs_drfsbno_t   nblks;
-        dp = args->dp;
-        mp = dp->i_mount;
-        w = args->whichfork;
-        tp = args->trans;
-        nblks = dp->i_d.di_nblocks;
-        /*
-         * For new directories adjust the file offset and block count.
-         */
-        if (w == XFS_DATA_FORK) {
-                bno = mp->m_dirleafblk;
-                count = mp->m_dirblkfsbs;
-        } else {
-                bno = 0;
-                count = 1;
-        }
        /*
         * Find a spot in the file space to put the new block.
         */
-        if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
+        error = xfs_bmap_first_unused(tp, dp, count, bno, w);
+        if (error)
                return error;
-        if (w == XFS_DATA_FORK)
-                ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
        /*
         * Try mapping it in one filesystem block.
         */
        nmap = 1;
        ASSERT(args->firstblock != NULL);
-        if ((error = xfs_bmapi(tp, dp, bno, count,
+        error = xfs_bmapi(tp, dp, *bno, count,
                        xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
                        XFS_BMAPI_CONTIG,
                        args->firstblock, args->total, &map, &nmap,
-                        args->flist))) {
+                        args->flist);
+        if (error)
                return error;
-        }
        ASSERT(nmap <= 1);
        if (nmap == 1) {
                mapp = &map;
                mapi = 1;
-        }
+        } else if (nmap == 0 && count > 1) {
-        /*
+                xfs_fileoff_t           b;
-         * If we didn't get it and the block might work if fragmented,
+                int                     c;
-         * try without the CONTIG flag.  Loop until we get it all.
-         */
+                /*
-        else if (nmap == 0 && count > 1) {
+                 * If we didn't get it and the block might work if fragmented,
+                 * try without the CONTIG flag.  Loop until we get it all.
+                 */
                mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
-                for (b = bno, mapi = 0; b < bno + count; ) {
+                for (b = *bno, mapi = 0; b < *bno + count; ) {
                        nmap = MIN(XFS_BMAP_MAX_NMAP, count);
-                        c = (int)(bno + count - b);
+                        c = (int)(*bno + count - b);
-                        if ((error = xfs_bmapi(tp, dp, b, c,
+                        error = xfs_bmapi(tp, dp, b, c,
                                        xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
                                        XFS_BMAPI_METADATA,
                                        args->firstblock, args->total,
-                                        &mapp[mapi], &nmap, args->flist))) {
+                                        &mapp[mapi], &nmap, args->flist);
-                                kmem_free(mapp);
+                        if (error)
-                                return error;
+                                goto out_free_map;
-                        }
                        if (nmap < 1)
                                break;
                        mapi += nmap;
@@ -1629,24 +1609,53 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
                mapi = 0;
                mapp = NULL;
        }
        /*
         * Count the blocks we got, make sure it matches the total.
         */
        for (i = 0, got = 0; i < mapi; i++)
                got += mapp[i].br_blockcount;
-        if (got != count || mapp[0].br_startoff != bno ||
+        if (got != count || mapp[0].br_startoff != *bno ||
            mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
-            bno + count) {
+            *bno + count) {
-                if (mapp != &map)
+                error = XFS_ERROR(ENOSPC);
-                        kmem_free(mapp);
+                goto out_free_map;
-                return XFS_ERROR(ENOSPC);
        }
-        if (mapp != &map)
-                kmem_free(mapp);
        /* account for newly allocated blocks in reserved blocks total */
        args->total -= dp->i_d.di_nblocks - nblks;
-        *new_blkno = (xfs_dablk_t)bno;
-        return 0;
+out_free_map:
+        if (mapp != &map)
+                kmem_free(mapp);
+        return error;
+}
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
+int
+xfs_da_grow_inode(
+        struct xfs_da_args      *args,
+        xfs_dablk_t             *new_blkno)
+{
+        xfs_fileoff_t           bno;
+        int                     count;
+        int                     error;
+        if (args->whichfork == XFS_DATA_FORK) {
+                bno = args->dp->i_mount->m_dirleafblk;
+                count = args->dp->i_mount->m_dirblkfsbs;
+        } else {
+                bno = 0;
+                count = 1;
+        }
+        error = xfs_da_grow_inode_int(args, &bno, count);
+        if (!error)
+                *new_blkno = (xfs_dablk_t)bno;
+        return error;
 }
 /*
@@ -1704,12 +1713,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
        /*
         * Get values from the moved block.
         */
-        if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
+        if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
                dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
                dead_level = 0;
                dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
        } else {
-                ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
+                ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
                dead_node = (xfs_da_intnode_t *)dead_info;
                dead_level = be16_to_cpu(dead_node->hdr.level);
                dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
@@ -1768,8 +1777,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
                if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
                        goto done;
                par_node = par_buf->data;
-                if (unlikely(
+                if (unlikely(par_node->hdr.info.magic !=
-                    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
+                    cpu_to_be16(XFS_DA_NODE_MAGIC) ||
                    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
                        XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
                                         XFS_ERRLEVEL_LOW, mp);
@@ -1820,7 +1829,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
                par_node = par_buf->data;
                if (unlikely(
                    be16_to_cpu(par_node->hdr.level) != level ||
-                    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
+                    par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
                        XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
                                         XFS_ERRLEVEL_LOW, mp);
                        error = XFS_ERROR(EFSCORRUPTED);
@@ -1930,8 +1939,7 @@ xfs_da_do_buf(
        xfs_daddr_t     *mappedbnop,
        xfs_dabuf_t     **bpp,
        int             whichfork,
-        int             caller,
+        int             caller)
-        inst_t          *ra)
 {
        xfs_buf_t       *bp = NULL;
        xfs_buf_t       **bplist;
@@ -2070,25 +2078,22 @@ xfs_da_do_buf(
         * Build a dabuf structure.
         */
        if (bplist) {
-                rbp = xfs_da_buf_make(nbplist, bplist, ra);
+                rbp = xfs_da_buf_make(nbplist, bplist);
        } else if (bp)
-                rbp = xfs_da_buf_make(1, &bp, ra);
+                rbp = xfs_da_buf_make(1, &bp);
        else
                rbp = NULL;
        /*
         * For read_buf, check the magic number.
         */
        if (caller == 1) {
-                xfs_dir2_data_t         *data;
+                xfs_dir2_data_hdr_t     *hdr = rbp->data;
-                xfs_dir2_free_t         *free;
+                xfs_dir2_free_t         *free = rbp->data;
-                xfs_da_blkinfo_t        *info;
+                xfs_da_blkinfo_t        *info = rbp->data;
                uint                    magic, magic1;
-                info = rbp->data;
-                data = rbp->data;
-                free = rbp->data;
                magic = be16_to_cpu(info->magic);
-                magic1 = be32_to_cpu(data->hdr.magic);
+                magic1 = be32_to_cpu(hdr->magic);
                if (unlikely(
                    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
                                   (magic != XFS_ATTR_LEAF_MAGIC) &&
@@ -2096,7 +2101,7 @@ xfs_da_do_buf(
                                   (magic != XFS_DIR2_LEAFN_MAGIC) &&
                                   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
                                   (magic1 != XFS_DIR2_DATA_MAGIC) &&
-                                   (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
+                                   (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
                                mp, XFS_ERRTAG_DA_READ_BUF,
                                XFS_RANDOM_DA_READ_BUF))) {
                        trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
@@ -2143,8 +2148,7 @@ xfs_da_get_buf(
        xfs_dabuf_t     **bpp,
        int             whichfork)
 {
-        return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
+        return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
-                                                 (inst_t *)__return_address);
 }
 /*
@@ -2159,8 +2163,7 @@ xfs_da_read_buf(
        xfs_dabuf_t     **bpp,
        int             whichfork)
 {
-        return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
+        return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
-                (inst_t *)__return_address);
 }
 /*
@@ -2176,8 +2179,7 @@ xfs_da_reada_buf(
        xfs_daddr_t             rval;
        rval = -1;
-        if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
+        if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
-                        (inst_t *)__return_address))
                return -1;
        else
                return rval;
@@ -2235,17 +2237,12 @@ xfs_da_state_free(xfs_da_state_t *state)
        kmem_zone_free(xfs_da_state_zone, state);
 }
-#ifdef XFS_DABUF_DEBUG
-xfs_dabuf_t     *xfs_dabuf_global_list;
-static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
-#endif
 /*
 * Create a dabuf.
 */
 /* ARGSUSED */
 STATIC xfs_dabuf_t *
-xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
+xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
 {
        xfs_buf_t       *bp;
        xfs_dabuf_t     *dabuf;
@@ -2257,11 +2254,6 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
        else
                dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
        dabuf->dirty = 0;
-#ifdef XFS_DABUF_DEBUG
-        dabuf->ra = ra;
-        dabuf->target = XFS_BUF_TARGET(bps[0]);
-        dabuf->blkno = XFS_BUF_ADDR(bps[0]);
-#endif
        if (nbuf == 1) {
                dabuf->nbuf = 1;
                bp = bps[0];
@@ -2281,23 +2273,6 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
                                XFS_BUF_COUNT(bp));
                }
        }
-#ifdef XFS_DABUF_DEBUG
-        {
-                xfs_dabuf_t     *p;
-                spin_lock(&xfs_dabuf_global_lock);
-                for (p = xfs_dabuf_global_list; p; p = p->next) {
-                        ASSERT(p->blkno != dabuf->blkno ||
-                               p->target != dabuf->target);
-                }
-                dabuf->prev = NULL;
-                if (xfs_dabuf_global_list)
-                        xfs_dabuf_global_list->prev = dabuf;
-                dabuf->next = xfs_dabuf_global_list;
-                xfs_dabuf_global_list = dabuf;
-                spin_unlock(&xfs_dabuf_global_lock);
-        }
-#endif
        return dabuf;
 }
@@ -2333,25 +2308,12 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
        ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
        if (dabuf->dirty)
                xfs_da_buf_clean(dabuf);
-        if (dabuf->nbuf > 1)
+        if (dabuf->nbuf > 1) {
                kmem_free(dabuf->data);
-#ifdef XFS_DABUF_DEBUG
-        {
-                spin_lock(&xfs_dabuf_global_lock);
-                if (dabuf->prev)
-                        dabuf->prev->next = dabuf->next;
-                else
-                        xfs_dabuf_global_list = dabuf->next;
-                if (dabuf->next)
-                        dabuf->next->prev = dabuf->prev;
-                spin_unlock(&xfs_dabuf_global_lock);
-        }
-        memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
-#endif
-        if (dabuf->nbuf == 1)
-                kmem_zone_free(xfs_dabuf_zone, dabuf);
-        else
                kmem_free(dabuf);
+        } else {
+                kmem_zone_free(xfs_dabuf_zone, dabuf);
+        }
 }
 /*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index fe9f5a8c1d2a..dbf7c074ae73 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -145,22 +145,11 @@ typedef struct xfs_dabuf {
        short           dirty;          /* data needs to be copied back */
        short           bbcount;        /* how large is data in bbs */
        void            *data;          /* pointer for buffers' data */
-#ifdef XFS_DABUF_DEBUG
-        inst_t          *ra;            /* return address of caller to make */
-        struct xfs_dabuf *next;         /* next in global chain */
-        struct xfs_dabuf *prev;         /* previous in global chain */
-        struct xfs_buftarg *target;     /* device for buffer */
-        xfs_daddr_t     blkno;          /* daddr first in bps[0] */
-#endif
        struct xfs_buf  *bps[1];        /* actually nbuf of these */
 } xfs_dabuf_t;
 #define XFS_DA_BUF_SIZE(n)      \
        (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
-#ifdef XFS_DABUF_DEBUG
-extern xfs_dabuf_t      *xfs_dabuf_global_list;
-#endif
 /*
 * Storage for holding state during Btree searches and split/join ops.
 *
@@ -248,6 +237,8 @@ int	xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 * Utility routines.
 */
 int     xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
+int     xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
+                              int count);
 int     xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
                              xfs_dablk_t bno, xfs_daddr_t mappedbno,
                              xfs_dabuf_t **bp, int whichfork);
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index dba7a71cedf3..4580ce00aeb4 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -24,20 +24,17 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
+#include "xfs_dir2.h"
-#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_format.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2_priv.h"
-#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
@@ -122,15 +119,15 @@ int
 xfs_dir_isempty(
        xfs_inode_t     *dp)
 {
-        xfs_dir2_sf_t   *sfp;
+        xfs_dir2_sf_hdr_t       *sfp;
        ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
        if (dp->i_d.di_size == 0)       /* might happen during shutdown. */
                return 1;
        if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
                return 0;
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        return !sfp->hdr.count;
+        return !sfp->count;
 }
 /*
@@ -500,129 +497,34 @@ xfs_dir_canenter(
 /*
 * Add a block to the directory.
- * This routine is for data and free blocks, not leaf/node blocks
+ *
- * which are handled by xfs_da_grow_inode.
+ * This routine is for data and free blocks, not leaf/node blocks which are
+ * handled by xfs_da_grow_inode.
 */
 int
 xfs_dir2_grow_inode(
-        xfs_da_args_t   *args,
+        struct xfs_da_args      *args,
-        int             space,          /* v2 dir's space XFS_DIR2_xxx_SPACE */
+        int                     space,  /* v2 dir's space XFS_DIR2_xxx_SPACE */
-        xfs_dir2_db_t   *dbp)           /* out: block number added */
+        xfs_dir2_db_t           *dbp)   /* out: block number added */
 {
-        xfs_fileoff_t   bno;            /* directory offset of new block */
+        struct xfs_inode        *dp = args->dp;
-        int             count;          /* count of filesystem blocks */
+        struct xfs_mount        *mp = dp->i_mount;
-        xfs_inode_t     *dp;            /* incore directory inode */
+        xfs_fileoff_t           bno;    /* directory offset of new block */
-        int             error;
+        int                     count;  /* count of filesystem blocks */
-        int             got;            /* blocks actually mapped */
+        int                     error;
-        int             i;
-        xfs_bmbt_irec_t map;            /* single structure for bmap */
-        int             mapi;           /* mapping index */
-        xfs_bmbt_irec_t *mapp;          /* bmap mapping structure(s) */
-        xfs_mount_t     *mp;
-        int             nmap;           /* number of bmap entries */
-        xfs_trans_t     *tp;
-        xfs_drfsbno_t   nblks;
        trace_xfs_dir2_grow_inode(args, space);
-        dp = args->dp;
-        tp = args->trans;
-        mp = dp->i_mount;
-        nblks = dp->i_d.di_nblocks;
        /*
         * Set lowest possible block in the space requested.
         */
        bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
        count = mp->m_dirblkfsbs;
-        /*
-         * Find the first hole for our block.
-         */
-        if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
-                return error;
-        nmap = 1;
-        ASSERT(args->firstblock != NULL);
-        /*
-         * Try mapping the new block contiguously (one extent).
-         */
-        if ((error = xfs_bmapi(tp, dp, bno, count,
-                        XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
-                        args->firstblock, args->total, &map, &nmap,
-                        args->flist)))
-                return error;
-        ASSERT(nmap <= 1);
-        if (nmap == 1) {
-                mapp = &map;
-                mapi = 1;
-        }
-        /*
-         * Didn't work and this is a multiple-fsb directory block.
-         * Try again with contiguous flag turned on.
-         */
-        else if (nmap == 0 && count > 1) {
-                xfs_fileoff_t   b;      /* current file offset */
-                /*
+        error = xfs_da_grow_inode_int(args, &bno, count);
-                 * Space for maximum number of mappings.
+        if (error)
-                 */
+                return error;
-                mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
-                /*
-                 * Iterate until we get to the end of our block.
-                 */
-                for (b = bno, mapi = 0; b < bno + count; ) {
-                        int     c;      /* current fsb count */
-                        /*
-                         * Can't map more than MAX_NMAP at once.
-                         */
-                        nmap = MIN(XFS_BMAP_MAX_NMAP, count);
-                        c = (int)(bno + count - b);
-                        if ((error = xfs_bmapi(tp, dp, b, c,
-                                        XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
-                                        args->firstblock, args->total,
-                                        &mapp[mapi], &nmap, args->flist))) {
-                                kmem_free(mapp);
-                                return error;
-                        }
-                        if (nmap < 1)
-                                break;
-                        /*
-                         * Add this bunch into our table, go to the next offset.
-                         */
-                        mapi += nmap;
-                        b = mapp[mapi - 1].br_startoff +
-                            mapp[mapi - 1].br_blockcount;
-                }
-        }
-        /*
-         * Didn't work.
-         */
-        else {
-                mapi = 0;
-                mapp = NULL;
-        }
-        /*
-         * See how many fsb's we got.
-         */
-        for (i = 0, got = 0; i < mapi; i++)
-                got += mapp[i].br_blockcount;
-        /*
-         * Didn't get enough fsb's, or the first/last block's are wrong.
-         */
-        if (got != count || mapp[0].br_startoff != bno ||
-            mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
-            bno + count) {
-                if (mapp != &map)
-                        kmem_free(mapp);
-                return XFS_ERROR(ENOSPC);
-        }
-        /*
-         * Done with the temporary mapping table.
-         */
-        if (mapp != &map)
-                kmem_free(mapp);
-        /* account for newly allocated blocks in reserved blocks total */
-        args->total -= dp->i_d.di_nblocks - nblks;
        *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
        /*
@@ -634,7 +536,7 @@ xfs_dir2_grow_inode(
                size = XFS_FSB_TO_B(mp, bno + count);
                if (size > dp->i_d.di_size) {
                        dp->i_d.di_size = size;
-                        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+                        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
                }
        }
        return 0;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 74a3b1057685..e937d9991c18 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -16,49 +16,14 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #ifndef __XFS_DIR2_H__
-#define __XFS_DIR2_H__
+#define __XFS_DIR2_H__
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_put_args;
 struct xfs_bmap_free;
+struct xfs_da_args;
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
-/*
- * Directory version 2.
- * There are 4 possible formats:
- *      shortform
- *      single block - data with embedded leaf at the end
- *      multiple data blocks, single leaf+freeindex block
- *      data blocks, node&leaf blocks (btree), freeindex blocks
- *
- *      The shortform format is in xfs_dir2_sf.h.
- *      The single block format is in xfs_dir2_block.h.
- *      The data block format is in xfs_dir2_data.h.
- *      The leaf and freeindex block formats are in xfs_dir2_leaf.h.
- *      Node blocks are the same as the other version, in xfs_da_btree.h.
- */
-/*
- * Byte offset in data block and shortform entry.
- */
-typedef __uint16_t      xfs_dir2_data_off_t;
-#define NULLDATAOFF     0xffffU
-typedef uint            xfs_dir2_data_aoff_t;   /* argument form */
-/*
- * Directory block number (logical dirblk in file)
- */
-typedef __uint32_t      xfs_dir2_db_t;
-/*
- * Byte offset in a directory.
- */
-typedef xfs_off_t       xfs_dir2_off_t;
 extern struct xfs_name  xfs_name_dotdot;
 /*
@@ -86,21 +51,10 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
                                struct xfs_bmap_free *flist, xfs_extlen_t tot);
 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
                                struct xfs_name *name, uint resblks);
-extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
 /*
- * Utility routines for v2 directories.
+ * Direct call from the bmap code, bypassing the generic directory layer.
 */
-extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
-                                xfs_dir2_db_t *dbp);
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
-                                int *vp);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
-                                int *vp);
-extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
-                                struct xfs_dabuf *bp);
-extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
-                                const unsigned char *name, int len);
 #endif  /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 580d99cef9e7..9245e029b8ea 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -23,17 +23,14 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
-#include "xfs_dir2_data.h"
+#include "xfs_dir2_format.h"
-#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_priv.h"
-#include "xfs_dir2_block.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -67,7 +64,7 @@ xfs_dir2_block_addname(
        xfs_da_args_t           *args)          /* directory op arguments */
 {
        xfs_dir2_data_free_t    *bf;            /* bestfree table in block */
-        xfs_dir2_block_t        *block;         /* directory block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
        xfs_dabuf_t             *bp;            /* buffer for block */
        xfs_dir2_block_tail_t   *btp;           /* block tail */
@@ -105,13 +102,13 @@ xfs_dir2_block_addname(
                return error;
        }
        ASSERT(bp != NULL);
-        block = bp->data;
+        hdr = bp->data;
        /*
         * Check the magic number, corrupted if wrong.
         */
-        if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
+        if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
                XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
-                                     XFS_ERRLEVEL_LOW, mp, block);
+                                     XFS_ERRLEVEL_LOW, mp, hdr);
                xfs_da_brelse(tp, bp);
                return XFS_ERROR(EFSCORRUPTED);
        }
@@ -119,8 +116,8 @@ xfs_dir2_block_addname(
        /*
         * Set up pointers to parts of the block.
         */
-        bf = block->hdr.bestfree;
+        bf = hdr->bestfree;
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * No stale entries?  Need space for entry and new leaf.
@@ -133,7 +130,7 @@ xfs_dir2_block_addname(
                /*
                 * Data object just before the first leaf entry.
                 */
-                enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+                enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
                /*
                 * If it's not free then can't do this add without cleaning up:
                 * the space before the first leaf entry needs to be free so it
@@ -146,7 +143,7 @@ xfs_dir2_block_addname(
                 */
                else {
                        dup = (xfs_dir2_data_unused_t *)
-                              ((char *)block + be16_to_cpu(bf[0].offset));
+                              ((char *)hdr + be16_to_cpu(bf[0].offset));
                        if (dup == enddup) {
                                /*
                                 * It is the biggest freespace, is it too small
@@ -159,7 +156,7 @@ xfs_dir2_block_addname(
                                         */
                                        if (be16_to_cpu(bf[1].length) >= len)
                                                dup = (xfs_dir2_data_unused_t *)
-                                                      ((char *)block +
+                                                      ((char *)hdr +
                                                       be16_to_cpu(bf[1].offset));
                                        else
                                                dup = NULL;
@@ -182,7 +179,7 @@ xfs_dir2_block_addname(
         */
        else if (be16_to_cpu(bf[0].length) >= len) {
                dup = (xfs_dir2_data_unused_t *)
-                      ((char *)block + be16_to_cpu(bf[0].offset));
+                      ((char *)hdr + be16_to_cpu(bf[0].offset));
                compact = 0;
        }
        /*
@@ -196,7 +193,7 @@ xfs_dir2_block_addname(
                /*
                 * Data object just before the first leaf entry.
                 */
-                dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+                dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
                /*
                 * If it's not free then the data will go where the
                 * leaf data starts now, if it works at all.
@@ -255,7 +252,8 @@ xfs_dir2_block_addname(
                        highstale = lfloghigh = -1;
                     fromidx >= 0;
                     fromidx--) {
-                        if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
+                        if (blp[fromidx].address ==
+                            cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
                                if (highstale == -1)
                                        highstale = toidx;
                                else {
@@ -272,7 +270,7 @@ xfs_dir2_block_addname(
                lfloghigh -= be32_to_cpu(btp->stale) - 1;
                be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
                xfs_dir2_data_make_free(tp, bp,
-                        (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+                        (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
                        (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
                        &needlog, &needscan);
                blp += be32_to_cpu(btp->stale) - 1;
@@ -282,7 +280,7 @@ xfs_dir2_block_addname(
                 * This needs to happen before the next call to use_free.
                 */
                if (needscan) {
-                        xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+                        xfs_dir2_data_freescan(mp, hdr, &needlog);
                        needscan = 0;
                }
        }
@@ -318,7 +316,7 @@ xfs_dir2_block_addname(
                 */
                xfs_dir2_data_use_free(tp, bp, enddup,
                        (xfs_dir2_data_aoff_t)
-                        ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
+                        ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
                         sizeof(*blp)),
                        (xfs_dir2_data_aoff_t)sizeof(*blp),
                        &needlog, &needscan);
@@ -331,8 +329,7 @@ xfs_dir2_block_addname(
                 * This needs to happen before the next call to use_free.
                 */
                if (needscan) {
-                        xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
+                        xfs_dir2_data_freescan(mp, hdr, &needlog);
-                                &needlog);
                        needscan = 0;
                }
                /*
@@ -353,12 +350,14 @@ xfs_dir2_block_addname(
        else {
                for (lowstale = mid;
                     lowstale >= 0 &&
-                        be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
+                        blp[lowstale].address !=
+                        cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
                     lowstale--)
                        continue;
                for (highstale = mid + 1;
                     highstale < be32_to_cpu(btp->count) &&
-                        be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
+                        blp[highstale].address !=
+                        cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
                        (lowstale < 0 || mid - lowstale > highstale - mid);
                     highstale++)
                        continue;
@@ -397,13 +396,13 @@ xfs_dir2_block_addname(
         */
        blp[mid].hashval = cpu_to_be32(args->hashval);
        blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                                (char *)dep - (char *)block));
+                                (char *)dep - (char *)hdr));
        xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
        /*
         * Mark space for the data entry used.
         */
        xfs_dir2_data_use_free(tp, bp, dup,
-                (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+                (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
                (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
        /*
         * Create the new data entry.
@@ -412,12 +411,12 @@ xfs_dir2_block_addname(
        dep->namelen = args->namelen;
        memcpy(dep->name, args->name, args->namelen);
        tagp = xfs_dir2_data_entry_tag_p(dep);
-        *tagp = cpu_to_be16((char *)dep - (char *)block);
+        *tagp = cpu_to_be16((char *)dep - (char *)hdr);
        /*
         * Clean up the bestfree array and log the header, tail, and entry.
         */
        if (needscan)
-                xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        if (needlog)
                xfs_dir2_data_log_header(tp, bp);
        xfs_dir2_block_log_tail(tp, bp);
@@ -437,7 +436,7 @@ xfs_dir2_block_getdents(
        xfs_off_t               *offset,
        filldir_t               filldir)
 {
-        xfs_dir2_block_t        *block;         /* directory block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dabuf_t             *bp;            /* buffer for block */
        xfs_dir2_block_tail_t   *btp;           /* block tail */
        xfs_dir2_data_entry_t   *dep;           /* block data entry */
@@ -470,13 +469,13 @@ xfs_dir2_block_getdents(
         * We'll skip entries before this.
         */
        wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
-        block = bp->data;
+        hdr = bp->data;
        xfs_dir2_data_check(dp, bp);
        /*
         * Set up values for the loop.
         */
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
-        ptr = (char *)block->u;
+        ptr = (char *)(hdr + 1);
        endptr = (char *)xfs_dir2_block_leaf_p(btp);
        /*
@@ -502,11 +501,11 @@ xfs_dir2_block_getdents(
                /*
                 * The entry is before the desired starting point, skip it.
                 */
-                if ((char *)dep - (char *)block < wantoff)
+                if ((char *)dep - (char *)hdr < wantoff)
                        continue;
                cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-                                            (char *)dep - (char *)block);
+                                            (char *)dep - (char *)hdr);
                /*
                 * If it didn't fit, set the final offset to here & return.
@@ -540,17 +539,14 @@ xfs_dir2_block_log_leaf(
        int                     first,          /* index of first logged leaf */
        int                     last)           /* index of last logged leaf */
 {
-        xfs_dir2_block_t        *block;         /* directory block structure */
+        xfs_dir2_data_hdr_t     *hdr = bp->data;
-        xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+        xfs_dir2_leaf_entry_t   *blp;
-        xfs_dir2_block_tail_t   *btp;           /* block tail */
+        xfs_dir2_block_tail_t   *btp;
-        xfs_mount_t             *mp;            /* filesystem mount point */
-        mp = tp->t_mountp;
+        btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
-        block = bp->data;
-        btp = xfs_dir2_block_tail_p(mp, block);
        blp = xfs_dir2_block_leaf_p(btp);
-        xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
+        xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
-                (uint)((char *)&blp[last + 1] - (char *)block - 1));
+                (uint)((char *)&blp[last + 1] - (char *)hdr - 1));
 }
 /*
@@ -561,15 +557,12 @@ xfs_dir2_block_log_tail(
        xfs_trans_t             *tp,            /* transaction structure */
        xfs_dabuf_t             *bp)            /* block buffer */
 {
-        xfs_dir2_block_t        *block;         /* directory block structure */
+        xfs_dir2_data_hdr_t     *hdr = bp->data;
-        xfs_dir2_block_tail_t   *btp;           /* block tail */
+        xfs_dir2_block_tail_t   *btp;
-        xfs_mount_t             *mp;            /* filesystem mount point */
-        mp = tp->t_mountp;
+        btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
-        block = bp->data;
+        xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
-        btp = xfs_dir2_block_tail_p(mp, block);
+                (uint)((char *)(btp + 1) - (char *)hdr - 1));
-        xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
-                (uint)((char *)(btp + 1) - (char *)block - 1));
 }
 /*
@@ -580,7 +573,7 @@ int						/* error */
 xfs_dir2_block_lookup(
        xfs_da_args_t           *args)          /* dir lookup arguments */
 {
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
        xfs_dabuf_t             *bp;            /* block buffer */
        xfs_dir2_block_tail_t   *btp;           /* block tail */
@@ -600,14 +593,14 @@ xfs_dir2_block_lookup(
                return error;
        dp = args->dp;
        mp = dp->i_mount;
-        block = bp->data;
+        hdr = bp->data;
        xfs_dir2_data_check(dp, bp);
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Get the offset from the leaf entry, to point to the data.
         */
-        dep = (xfs_dir2_data_entry_t *)((char *)block +
+        dep = (xfs_dir2_data_entry_t *)((char *)hdr +
                xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
        /*
         * Fill in inode number, CI name if appropriate, release the block.
@@ -628,7 +621,7 @@ xfs_dir2_block_lookup_int(
        int                     *entno)         /* returned entry number */
 {
        xfs_dir2_dataptr_t      addr;           /* data entry address */
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
        xfs_dabuf_t             *bp;            /* block buffer */
        xfs_dir2_block_tail_t   *btp;           /* block tail */
@@ -654,9 +647,9 @@ xfs_dir2_block_lookup_int(
                return error;
        }
        ASSERT(bp != NULL);
-        block = bp->data;
+        hdr = bp->data;
        xfs_dir2_data_check(dp, bp);
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Loop doing a binary search for our hash value.
@@ -694,7 +687,7 @@ xfs_dir2_block_lookup_int(
                 * Get pointer to the entry from the leaf.
                 */
                dep = (xfs_dir2_data_entry_t *)
-                        ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+                        ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
                /*
                 * Compare name and if it's an exact match, return the index
                 * and buffer. If it's the first case-insensitive match, store
@@ -733,7 +726,7 @@ int						/* error */
 xfs_dir2_block_removename(
        xfs_da_args_t           *args)          /* directory operation args */
 {
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_leaf_entry_t   *blp;           /* block leaf pointer */
        xfs_dabuf_t             *bp;            /* block buffer */
        xfs_dir2_block_tail_t   *btp;           /* block tail */
@@ -760,20 +753,20 @@ xfs_dir2_block_removename(
        dp = args->dp;
        tp = args->trans;
        mp = dp->i_mount;
-        block = bp->data;
+        hdr = bp->data;
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Point to the data entry using the leaf entry.
         */
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+              ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
        /*
         * Mark the data entry's space free.
         */
        needlog = needscan = 0;
        xfs_dir2_data_make_free(tp, bp,
-                (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
+                (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
                xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
        /*
         * Fix up the block tail.
@@ -789,15 +782,15 @@ xfs_dir2_block_removename(
         * Fix up bestfree, log the header if necessary.
         */
        if (needscan)
-                xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        if (needlog)
                xfs_dir2_data_log_header(tp, bp);
        xfs_dir2_data_check(dp, bp);
        /*
         * See if the size as a shortform is good enough.
         */
-        if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+        size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-            XFS_IFORK_DSIZE(dp)) {
+        if (size > XFS_IFORK_DSIZE(dp)) {
                xfs_da_buf_done(bp);
                return 0;
        }
@@ -815,7 +808,7 @@ int						/* error */
 xfs_dir2_block_replace(
        xfs_da_args_t           *args)          /* directory operation args */
 {
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
        xfs_dabuf_t             *bp;            /* block buffer */
        xfs_dir2_block_tail_t   *btp;           /* block tail */
@@ -836,14 +829,14 @@ xfs_dir2_block_replace(
        }
        dp = args->dp;
        mp = dp->i_mount;
-        block = bp->data;
+        hdr = bp->data;
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Point to the data entry we need to change.
         */
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+              ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
        ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
        /*
         * Change the inode number to the new value.
@@ -882,7 +875,7 @@ xfs_dir2_leaf_to_block(
        xfs_dabuf_t             *dbp)           /* data buffer */
 {
        __be16                  *bestsp;        /* leaf bests table */
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_block_tail_t   *btp;           /* block tail */
        xfs_inode_t             *dp;            /* incore directory inode */
        xfs_dir2_data_unused_t  *dup;           /* unused data entry */
@@ -906,7 +899,7 @@ xfs_dir2_leaf_to_block(
        tp = args->trans;
        mp = dp->i_mount;
        leaf = lbp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        /*
         * If there are data blocks other than the first one, take this
@@ -917,7 +910,7 @@ xfs_dir2_leaf_to_block(
        while (dp->i_d.di_size > mp->m_dirblksize) {
                bestsp = xfs_dir2_leaf_bests_p(ltp);
                if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
-                    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
+                    mp->m_dirblksize - (uint)sizeof(*hdr)) {
                        if ((error =
                            xfs_dir2_leaf_trim_data(args, lbp,
                                    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -935,18 +928,18 @@ xfs_dir2_leaf_to_block(
                    XFS_DATA_FORK))) {
                goto out;
        }
-        block = dbp->data;
+        hdr = dbp->data;
-        ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
        /*
         * Size of the "leaf" area in the block.
         */
-        size = (uint)sizeof(block->tail) +
+        size = (uint)sizeof(xfs_dir2_block_tail_t) +
               (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
        /*
         * Look at the last data entry.
         */
-        tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1;
+        tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
-        dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+        dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
        /*
         * If it's not free or is too short we can't do it.
         */
@@ -958,7 +951,7 @@ xfs_dir2_leaf_to_block(
        /*
         * Start converting it to block form.
         */
-        block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+        hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
        needlog = 1;
        needscan = 0;
        /*
@@ -969,7 +962,7 @@ xfs_dir2_leaf_to_block(
        /*
         * Initialize the block tail.
         */
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
        btp->stale = 0;
        xfs_dir2_block_log_tail(tp, dbp);
@@ -978,7 +971,8 @@ xfs_dir2_leaf_to_block(
         */
        lep = xfs_dir2_block_leaf_p(btp);
        for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
-                if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+                if (leaf->ents[from].address ==
+                    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                        continue;
                lep[to++] = leaf->ents[from];
        }
@@ -988,7 +982,7 @@ xfs_dir2_leaf_to_block(
         * Scan the bestfree if we need it and log the data block header.
         */
        if (needscan)
-                xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        if (needlog)
                xfs_dir2_data_log_header(tp, dbp);
        /*
@@ -1002,8 +996,8 @@ xfs_dir2_leaf_to_block(
        /*
         * Now see if the resulting block can be shrunken to shortform.
         */
-        if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+        size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-            XFS_IFORK_DSIZE(dp)) {
+        if (size > XFS_IFORK_DSIZE(dp)) {
                error = 0;
                goto out;
        }
@@ -1024,12 +1018,10 @@ xfs_dir2_sf_to_block(
        xfs_da_args_t           *args)          /* operation arguments */
 {
        xfs_dir2_db_t           blkno;          /* dir-relative block # (0) */
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
        xfs_dabuf_t             *bp;            /* block buffer */
        xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
-        char                    *buf;           /* sf buffer */
-        int                     buf_len;
        xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
        xfs_inode_t             *dp;            /* incore directory inode */
        int                     dummy;          /* trash */
@@ -1043,7 +1035,8 @@ xfs_dir2_sf_to_block(
        int                     newoffset;      /* offset from current entry */
        int                     offset;         /* target block offset */
        xfs_dir2_sf_entry_t     *sfep;          /* sf entry pointer */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *oldsfp;        /* old shortform header  */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform header  */
        __be16                  *tagp;          /* end of data entry */
        xfs_trans_t             *tp;            /* transaction pointer */
        struct xfs_name         name;
@@ -1061,32 +1054,30 @@ xfs_dir2_sf_to_block(
                ASSERT(XFS_FORCED_SHUTDOWN(mp));
                return XFS_ERROR(EIO);
        }
+        oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
-        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
        /*
-         * Copy the directory into the stack buffer.
+         * Copy the directory into a temporary buffer.
         * Then pitch the incore inode data so we can make extents.
         */
+        sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
+        memcpy(sfp, oldsfp, dp->i_df.if_bytes);
-        buf_len = dp->i_df.if_bytes;
+        xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
-        buf = kmem_alloc(buf_len, KM_SLEEP);
-        memcpy(buf, sfp, buf_len);
-        xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
        dp->i_d.di_size = 0;
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-        /*
-         * Reset pointer - old sfp is gone.
-         */
-        sfp = (xfs_dir2_sf_t *)buf;
        /*
         * Add block 0 to the inode.
         */
        error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
        if (error) {
-                kmem_free(buf);
+                kmem_free(sfp);
                return error;
        }
        /*
@@ -1094,21 +1085,21 @@ xfs_dir2_sf_to_block(
         */
        error = xfs_dir2_data_init(args, blkno, &bp);
        if (error) {
-                kmem_free(buf);
+                kmem_free(sfp);
                return error;
        }
-        block = bp->data;
+        hdr = bp->data;
-        block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+        hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
        /*
         * Compute size of block "tail" area.
         */
        i = (uint)sizeof(*btp) +
-            (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+            (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
        /*
         * The whole thing is initialized to free by the init routine.
         * Say we're using the leaf and tail area.
         */
-        dup = (xfs_dir2_data_unused_t *)block->u;
+        dup = (xfs_dir2_data_unused_t *)(hdr + 1);
        needlog = needscan = 0;
        xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
                &needscan);
@@ -1116,50 +1107,51 @@ xfs_dir2_sf_to_block(
        /*
         * Fill in the tail.
         */
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
-        btp->count = cpu_to_be32(sfp->hdr.count + 2);   /* ., .. */
+        btp->count = cpu_to_be32(sfp->count + 2);       /* ., .. */
        btp->stale = 0;
        blp = xfs_dir2_block_leaf_p(btp);
-        endoffset = (uint)((char *)blp - (char *)block);
+        endoffset = (uint)((char *)blp - (char *)hdr);
        /*
         * Remove the freespace, we'll manage it.
         */
        xfs_dir2_data_use_free(tp, bp, dup,
-                (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+                (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
                be16_to_cpu(dup->length), &needlog, &needscan);
        /*
         * Create entry for .
         */
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
+              ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
        dep->inumber = cpu_to_be64(dp->i_ino);
        dep->namelen = 1;
        dep->name[0] = '.';
        tagp = xfs_dir2_data_entry_tag_p(dep);
-        *tagp = cpu_to_be16((char *)dep - (char *)block);
+        *tagp = cpu_to_be16((char *)dep - (char *)hdr);
        xfs_dir2_data_log_entry(tp, bp, dep);
        blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
        blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                                (char *)dep - (char *)block));
+                                (char *)dep - (char *)hdr));
        /*
         * Create entry for ..
         */
        dep = (xfs_dir2_data_entry_t *)
-                ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
+                ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
-        dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+        dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
        dep->namelen = 2;
        dep->name[0] = dep->name[1] = '.';
        tagp = xfs_dir2_data_entry_tag_p(dep);
-        *tagp = cpu_to_be16((char *)dep - (char *)block);
+        *tagp = cpu_to_be16((char *)dep - (char *)hdr);
        xfs_dir2_data_log_entry(tp, bp, dep);
        blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
        blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                                (char *)dep - (char *)block));
+                                (char *)dep - (char *)hdr));
        offset = XFS_DIR2_DATA_FIRST_OFFSET;
        /*
         * Loop over existing entries, stuff them in.
         */
-        if ((i = 0) == sfp->hdr.count)
+        i = 0;
+        if (!sfp->count)
                sfep = NULL;
        else
                sfep = xfs_dir2_sf_firstentry(sfp);
@@ -1179,43 +1171,40 @@ xfs_dir2_sf_to_block(
                 * There should be a hole here, make one.
                 */
                if (offset < newoffset) {
-                        dup = (xfs_dir2_data_unused_t *)
+                        dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
-                              ((char *)block + offset);
                        dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                        dup->length = cpu_to_be16(newoffset - offset);
                        *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
-                                ((char *)dup - (char *)block));
+                                ((char *)dup - (char *)hdr));
                        xfs_dir2_data_log_unused(tp, bp, dup);
-                        (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
+                        xfs_dir2_data_freeinsert(hdr, dup, &dummy);
-                                dup, &dummy);
                        offset += be16_to_cpu(dup->length);
                        continue;
                }
                /*
                 * Copy a real entry.
                 */
-                dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
+                dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
-                dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
+                dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
-                                xfs_dir2_sf_inumberp(sfep)));
                dep->namelen = sfep->namelen;
                memcpy(dep->name, sfep->name, dep->namelen);
                tagp = xfs_dir2_data_entry_tag_p(dep);
-                *tagp = cpu_to_be16((char *)dep - (char *)block);
+                *tagp = cpu_to_be16((char *)dep - (char *)hdr);
                xfs_dir2_data_log_entry(tp, bp, dep);
                name.name = sfep->name;
                name.len = sfep->namelen;
                blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
                                                        hashname(&name));
                blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
-                                                 (char *)dep - (char *)block));
+                                                 (char *)dep - (char *)hdr));
-                offset = (int)((char *)(tagp + 1) - (char *)block);
+                offset = (int)((char *)(tagp + 1) - (char *)hdr);
-                if (++i == sfp->hdr.count)
+                if (++i == sfp->count)
                        sfep = NULL;
                else
                        sfep = xfs_dir2_sf_nextentry(sfp, sfep);
        }
        /* Done with the temporary buffer */
-        kmem_free(buf);
+        kmem_free(sfp);
        /*
         * Sort the leaf entries by hash value.
         */
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
deleted file mode 100644
index 10e689676382..000000000000
--- a/fs/xfs/xfs_dir2_block.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_BLOCK_H__
-#define __XFS_DIR2_BLOCK_H__
-/*
- * xfs_dir2_block.h
- * Directory version 2, single block format structures
- */
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_data_hdr;
-struct xfs_dir2_leaf_entry;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-/*
- * The single block format is as follows:
- * xfs_dir2_data_hdr_t structure
- * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
- * xfs_dir2_leaf_entry_t structures
- * xfs_dir2_block_tail_t structure
- */
-#define XFS_DIR2_BLOCK_MAGIC    0x58443242      /* XD2B: for one block dirs */
-typedef struct xfs_dir2_block_tail {
-        __be32          count;                  /* count of leaf entries */
-        __be32          stale;                  /* count of stale lf entries */
-} xfs_dir2_block_tail_t;
-/*
- * Generic single-block structure, for xfs_db.
- */
-typedef struct xfs_dir2_block {
-        xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_BLOCK_MAGIC */
-        xfs_dir2_data_union_t   u[1];
-        xfs_dir2_leaf_entry_t   leaf[1];
-        xfs_dir2_block_tail_t   tail;
-} xfs_dir2_block_t;
-/*
- * Pointer to the leaf header embedded in a data block (1-block format)
- */
-static inline xfs_dir2_block_tail_t *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
-{
-        return (((xfs_dir2_block_tail_t *)
-                ((char *)(block) + (mp)->m_dirblksize)) - 1);
-}
-/*
- * Pointer to the leaf entries embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_leaf_entry *
-xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
-{
-        return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
-}
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_addname(struct xfs_da_args *args);
-extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
-                                   xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_block_removename(struct xfs_da_args *args);
-extern int xfs_dir2_block_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
-                                  struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
-extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
-#endif  /* __XFS_DIR2_BLOCK_H__ */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 921595b84f5b..5bbe2a8a023f 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -23,18 +23,18 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_dir2_data.h"
+#include "xfs_dir2_format.h"
-#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_priv.h"
-#include "xfs_dir2_block.h"
 #include "xfs_error.h"
+STATIC xfs_dir2_data_free_t *
+xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
 #ifdef DEBUG
 /*
 * Check the consistency of the data block.
@@ -50,7 +50,7 @@ xfs_dir2_data_check(
        xfs_dir2_data_free_t    *bf;            /* bestfree table */
        xfs_dir2_block_tail_t   *btp=NULL;      /* block tail */
        int                     count;          /* count of entries found */
-        xfs_dir2_data_t         *d;             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dir2_data_entry_t   *dep;           /* data entry */
        xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
        xfs_dir2_data_unused_t  *dup;           /* unused entry */
@@ -66,17 +66,19 @@ xfs_dir2_data_check(
        struct xfs_name         name;
        mp = dp->i_mount;
-        d = bp->data;
+        hdr = bp->data;
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+        bf = hdr->bestfree;
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+        p = (char *)(hdr + 1);
-        bf = d->hdr.bestfree;
-        p = (char *)d->u;
+        if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
-        if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+                btp = xfs_dir2_block_tail_p(mp, hdr);
-                btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
                lep = xfs_dir2_block_leaf_p(btp);
                endp = (char *)lep;
-        } else
+        } else {
-                endp = (char *)d + mp->m_dirblksize;
+                ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+                endp = (char *)hdr + mp->m_dirblksize;
+        }
        count = lastfree = freeseen = 0;
        /*
         * Account for zero bestfree entries.
@@ -108,8 +110,8 @@ xfs_dir2_data_check(
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
                        ASSERT(lastfree == 0);
                        ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
-                               (char *)dup - (char *)d);
+                               (char *)dup - (char *)hdr);
-                        dfp = xfs_dir2_data_freefind(d, dup);
+                        dfp = xfs_dir2_data_freefind(hdr, dup);
                        if (dfp) {
                                i = (int)(dfp - bf);
                                ASSERT((freeseen & (1 << i)) == 0);
@@ -132,13 +134,13 @@ xfs_dir2_data_check(
                ASSERT(dep->namelen != 0);
                ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
                ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
-                       (char *)dep - (char *)d);
+                       (char *)dep - (char *)hdr);
                count++;
                lastfree = 0;
-                if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+                if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
                        addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                (xfs_dir2_data_aoff_t)
-                                ((char *)dep - (char *)d));
+                                ((char *)dep - (char *)hdr));
                        name.name = dep->name;
                        name.len = dep->namelen;
                        hash = mp->m_dirnameops->hashname(&name);
@@ -155,9 +157,10 @@ xfs_dir2_data_check(
         * Need to have seen all the entries and all the bestfree slots.
         */
        ASSERT(freeseen == 7);
-        if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+        if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
                for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
-                        if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR)
+                        if (lep[i].address ==
+                            cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                                stale++;
                        if (i > 0)
                                ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
@@ -172,9 +175,9 @@ xfs_dir2_data_check(
 * Given a data block and an unused entry from that block,
 * return the bestfree entry if any that corresponds to it.
 */
-xfs_dir2_data_free_t *
+STATIC xfs_dir2_data_free_t *
 xfs_dir2_data_freefind(
-        xfs_dir2_data_t         *d,             /* data block */
+        xfs_dir2_data_hdr_t     *hdr,           /* data block */
        xfs_dir2_data_unused_t  *dup)           /* data unused entry */
 {
        xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
@@ -184,17 +187,17 @@ xfs_dir2_data_freefind(
        int                     seenzero;       /* saw a 0 bestfree entry */
 #endif
-        off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d);
+        off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
 #if defined(DEBUG) && defined(__KERNEL__)
        /*
         * Validate some consistency in the bestfree table.
         * Check order, non-overlapping entries, and if we find the
         * one we're looking for it has to be exact.
         */
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-        for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
+        for (dfp = &hdr->bestfree[0], seenzero = matched = 0;
-             dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+             dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
             dfp++) {
                if (!dfp->offset) {
                        ASSERT(!dfp->length);
@@ -210,7 +213,7 @@ xfs_dir2_data_freefind(
                else
                        ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
                ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
-                if (dfp > &d->hdr.bestfree[0])
+                if (dfp > &hdr->bestfree[0])
                        ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
        }
 #endif
@@ -219,13 +222,13 @@ xfs_dir2_data_freefind(
         * it can't be there since they're sorted.
         */
        if (be16_to_cpu(dup->length) <
-            be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
+            be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
                return NULL;
        /*
         * Look at the three bestfree entries for our guy.
         */
-        for (dfp = &d->hdr.bestfree[0];
+        for (dfp = &hdr->bestfree[0];
-             dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+             dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
             dfp++) {
                if (!dfp->offset)
                        return NULL;
@@ -243,7 +246,7 @@ xfs_dir2_data_freefind(
 */
 xfs_dir2_data_free_t *                          /* entry inserted */
 xfs_dir2_data_freeinsert(
-        xfs_dir2_data_t         *d,             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr,           /* data block pointer */
        xfs_dir2_data_unused_t  *dup,           /* unused space */
        int                     *loghead)       /* log the data header (out) */
 {
@@ -251,12 +254,13 @@ xfs_dir2_data_freeinsert(
        xfs_dir2_data_free_t    new;            /* new bestfree entry */
 #ifdef __KERNEL__
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
 #endif
-        dfp = d->hdr.bestfree;
+        dfp = hdr->bestfree;
        new.length = dup->length;
-        new.offset = cpu_to_be16((char *)dup - (char *)d);
+        new.offset = cpu_to_be16((char *)dup - (char *)hdr);
        /*
         * Insert at position 0, 1, or 2; or not at all.
         */
@@ -286,36 +290,36 @@ xfs_dir2_data_freeinsert(
 */
 STATIC void
 xfs_dir2_data_freeremove(
-        xfs_dir2_data_t         *d,             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr,           /* data block header */
        xfs_dir2_data_free_t    *dfp,           /* bestfree entry pointer */
        int                     *loghead)       /* out: log data header */
 {
 #ifdef __KERNEL__
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
 #endif
        /*
         * It's the first entry, slide the next 2 up.
         */
-        if (dfp == &d->hdr.bestfree[0]) {
+        if (dfp == &hdr->bestfree[0]) {
-                d->hdr.bestfree[0] = d->hdr.bestfree[1];
+                hdr->bestfree[0] = hdr->bestfree[1];
-                d->hdr.bestfree[1] = d->hdr.bestfree[2];
+                hdr->bestfree[1] = hdr->bestfree[2];
        }
        /*
         * It's the second entry, slide the 3rd entry up.
         */
-        else if (dfp == &d->hdr.bestfree[1])
+        else if (dfp == &hdr->bestfree[1])
-                d->hdr.bestfree[1] = d->hdr.bestfree[2];
+                hdr->bestfree[1] = hdr->bestfree[2];
        /*
         * Must be the last entry.
         */
        else
-                ASSERT(dfp == &d->hdr.bestfree[2]);
+                ASSERT(dfp == &hdr->bestfree[2]);
        /*
         * Clear the 3rd entry, must be zero now.
         */
-        d->hdr.bestfree[2].length = 0;
+        hdr->bestfree[2].length = 0;
-        d->hdr.bestfree[2].offset = 0;
+        hdr->bestfree[2].offset = 0;
        *loghead = 1;
 }
@@ -325,7 +329,7 @@ xfs_dir2_data_freeremove(
 void
 xfs_dir2_data_freescan(
        xfs_mount_t             *mp,            /* filesystem mount point */
-        xfs_dir2_data_t         *d,             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr,           /* data block header */
        int                     *loghead)       /* out: log data header */
 {
        xfs_dir2_block_tail_t   *btp;           /* block tail */
@@ -335,23 +339,23 @@ xfs_dir2_data_freescan(
        char                    *p;             /* current entry pointer */
 #ifdef __KERNEL__
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
 #endif
        /*
         * Start by clearing the table.
         */
-        memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree));
+        memset(hdr->bestfree, 0, sizeof(hdr->bestfree));
        *loghead = 1;
        /*
         * Set up pointers.
         */
-        p = (char *)d->u;
+        p = (char *)(hdr + 1);
-        if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+        if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
-                btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+                btp = xfs_dir2_block_tail_p(mp, hdr);
                endp = (char *)xfs_dir2_block_leaf_p(btp);
        } else
-                endp = (char *)d + mp->m_dirblksize;
+                endp = (char *)hdr + mp->m_dirblksize;
        /*
         * Loop over the block's entries.
         */
@@ -361,9 +365,9 @@ xfs_dir2_data_freescan(
                 * If it's a free entry, insert it.
                 */
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                        ASSERT((char *)dup - (char *)d ==
+                        ASSERT((char *)dup - (char *)hdr ==
                               be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
-                        xfs_dir2_data_freeinsert(d, dup, loghead);
+                        xfs_dir2_data_freeinsert(hdr, dup, loghead);
                        p += be16_to_cpu(dup->length);
                }
                /*
@@ -371,7 +375,7 @@ xfs_dir2_data_freescan(
                 */
                else {
                        dep = (xfs_dir2_data_entry_t *)p;
-                        ASSERT((char *)dep - (char *)d ==
+                        ASSERT((char *)dep - (char *)hdr ==
                               be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
                        p += xfs_dir2_data_entsize(dep->namelen);
                }
@@ -389,7 +393,7 @@ xfs_dir2_data_init(
        xfs_dabuf_t             **bpp)          /* output block buffer */
 {
        xfs_dabuf_t             *bp;            /* block buffer */
-        xfs_dir2_data_t         *d;             /* pointer to block */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_inode_t             *dp;            /* incore directory inode */
        xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
        int                     error;          /* error return value */
@@ -410,26 +414,28 @@ xfs_dir2_data_init(
                return error;
        }
        ASSERT(bp != NULL);
        /*
         * Initialize the header.
         */
-        d = bp->data;
+        hdr = bp->data;
-        d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+        hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
-        d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr));
+        hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
        for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
-                d->hdr.bestfree[i].length = 0;
+                hdr->bestfree[i].length = 0;
-                d->hdr.bestfree[i].offset = 0;
+                hdr->bestfree[i].offset = 0;
        }
        /*
         * Set up an unused entry for the block's body.
         */
-        dup = &d->u[0].unused;
+        dup = (xfs_dir2_data_unused_t *)(hdr + 1);
        dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-        t=mp->m_dirblksize - (uint)sizeof(d->hdr);
+        t = mp->m_dirblksize - (uint)sizeof(*hdr);
-        d->hdr.bestfree[0].length = cpu_to_be16(t);
+        hdr->bestfree[0].length = cpu_to_be16(t);
        dup->length = cpu_to_be16(t);
-        *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
+        *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
        /*
         * Log it and return it.
         */
@@ -448,14 +454,14 @@ xfs_dir2_data_log_entry(
        xfs_dabuf_t             *bp,            /* block buffer */
        xfs_dir2_data_entry_t   *dep)           /* data entry pointer */
 {
-        xfs_dir2_data_t         *d;             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr = bp->data;
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-        d = bp->data;
+        xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-        xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
                (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
-                       (char *)d - 1));
+                       (char *)hdr - 1));
 }
 /*
@@ -466,13 +472,12 @@ xfs_dir2_data_log_header(
        xfs_trans_t             *tp,            /* transaction pointer */
        xfs_dabuf_t             *bp)            /* block buffer */
 {
-        xfs_dir2_data_t         *d;             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr = bp->data;
-        d = bp->data;
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-        xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d),
+        xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
-                (uint)(sizeof(d->hdr) - 1));
 }
 /*
@@ -484,23 +489,23 @@ xfs_dir2_data_log_unused(
        xfs_dabuf_t             *bp,            /* block buffer */
        xfs_dir2_data_unused_t  *dup)           /* data unused pointer */
 {
-        xfs_dir2_data_t         *d;             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr = bp->data;
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-        d = bp->data;
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
        /*
         * Log the first part of the unused entry.
         */
-        xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d),
+        xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
                (uint)((char *)&dup->length + sizeof(dup->length) -
-                       1 - (char *)d));
+                       1 - (char *)hdr));
        /*
         * Log the end (tag) of the unused entry.
         */
        xfs_da_log_buf(tp, bp,
-                (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
+                (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
-                (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
+                (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
                       sizeof(xfs_dir2_data_off_t) - 1));
 }
@@ -517,7 +522,7 @@ xfs_dir2_data_make_free(
        int                     *needlogp,      /* out: log header */
        int                     *needscanp)     /* out: regen bestfree */
 {
-        xfs_dir2_data_t         *d;             /* data block pointer */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block pointer */
        xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
        char                    *endptr;        /* end of data area */
        xfs_mount_t             *mp;            /* filesystem mount point */
@@ -527,28 +532,29 @@ xfs_dir2_data_make_free(
        xfs_dir2_data_unused_t  *prevdup;       /* unused entry before us */
        mp = tp->t_mountp;
-        d = bp->data;
+        hdr = bp->data;
        /*
         * Figure out where the end of the data area is.
         */
-        if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC)
+        if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC))
-                endptr = (char *)d + mp->m_dirblksize;
+                endptr = (char *)hdr + mp->m_dirblksize;
        else {
                xfs_dir2_block_tail_t   *btp;   /* block tail */
-                ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+                ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
-                btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+                btp = xfs_dir2_block_tail_p(mp, hdr);
                endptr = (char *)xfs_dir2_block_leaf_p(btp);
        }
        /*
         * If this isn't the start of the block, then back up to
         * the previous entry and see if it's free.
         */
-        if (offset > sizeof(d->hdr)) {
+        if (offset > sizeof(*hdr)) {
                __be16                  *tagp;  /* tag just before us */
-                tagp = (__be16 *)((char *)d + offset) - 1;
+                tagp = (__be16 *)((char *)hdr + offset) - 1;
-                prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp));
+                prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
                if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
                        prevdup = NULL;
        } else
@@ -557,9 +563,9 @@ xfs_dir2_data_make_free(
         * If this isn't the end of the block, see if the entry after
         * us is free.
         */
-        if ((char *)d + offset + len < endptr) {
+        if ((char *)hdr + offset + len < endptr) {
                postdup =
-                        (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+                        (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
                if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
                        postdup = NULL;
        } else
@@ -576,21 +582,21 @@ xfs_dir2_data_make_free(
                /*
                 * See if prevdup and/or postdup are in bestfree table.
                 */
-                dfp = xfs_dir2_data_freefind(d, prevdup);
+                dfp = xfs_dir2_data_freefind(hdr, prevdup);
-                dfp2 = xfs_dir2_data_freefind(d, postdup);
+                dfp2 = xfs_dir2_data_freefind(hdr, postdup);
                /*
                 * We need a rescan unless there are exactly 2 free entries
                 * namely our two.  Then we know what's happening, otherwise
                 * since the third bestfree is there, there might be more
                 * entries.
                 */
-                needscan = (d->hdr.bestfree[2].length != 0);
+                needscan = (hdr->bestfree[2].length != 0);
                /*
                 * Fix up the new big freespace.
                 */
                be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
                *xfs_dir2_data_unused_tag_p(prevdup) =
-                        cpu_to_be16((char *)prevdup - (char *)d);
+                        cpu_to_be16((char *)prevdup - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, prevdup);
                if (!needscan) {
                        /*
@@ -600,18 +606,18 @@ xfs_dir2_data_make_free(
                         * Remove entry 1 first then entry 0.
                         */
                        ASSERT(dfp && dfp2);
-                        if (dfp == &d->hdr.bestfree[1]) {
+                        if (dfp == &hdr->bestfree[1]) {
-                                dfp = &d->hdr.bestfree[0];
+                                dfp = &hdr->bestfree[0];
                                ASSERT(dfp2 == dfp);
-                                dfp2 = &d->hdr.bestfree[1];
+                                dfp2 = &hdr->bestfree[1];
                        }
-                        xfs_dir2_data_freeremove(d, dfp2, needlogp);
+                        xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
-                        xfs_dir2_data_freeremove(d, dfp, needlogp);
+                        xfs_dir2_data_freeremove(hdr, dfp, needlogp);
                        /*
                         * Now insert the new entry.
                         */
-                        dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+                        dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
-                        ASSERT(dfp == &d->hdr.bestfree[0]);
+                        ASSERT(dfp == &hdr->bestfree[0]);
                        ASSERT(dfp->length == prevdup->length);
                        ASSERT(!dfp[1].length);
                        ASSERT(!dfp[2].length);
@@ -621,10 +627,10 @@ xfs_dir2_data_make_free(
         * The entry before us is free, merge with it.
         */
        else if (prevdup) {
-                dfp = xfs_dir2_data_freefind(d, prevdup);
+                dfp = xfs_dir2_data_freefind(hdr, prevdup);
                be16_add_cpu(&prevdup->length, len);
                *xfs_dir2_data_unused_tag_p(prevdup) =
-                        cpu_to_be16((char *)prevdup - (char *)d);
+                        cpu_to_be16((char *)prevdup - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, prevdup);
                /*
                 * If the previous entry was in the table, the new entry
@@ -632,27 +638,27 @@ xfs_dir2_data_make_free(
                 * the old one and add the new one.
                 */
                if (dfp) {
-                        xfs_dir2_data_freeremove(d, dfp, needlogp);
+                        xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-                        (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+                        xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
                }
                /*
                 * Otherwise we need a scan if the new entry is big enough.
                 */
                else {
                        needscan = be16_to_cpu(prevdup->length) >
-                                   be16_to_cpu(d->hdr.bestfree[2].length);
+                                   be16_to_cpu(hdr->bestfree[2].length);
                }
        }
        /*
         * The following entry is free, merge with it.
         */
        else if (postdup) {
-                dfp = xfs_dir2_data_freefind(d, postdup);
+                dfp = xfs_dir2_data_freefind(hdr, postdup);
-                newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+                newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
                newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
                *xfs_dir2_data_unused_tag_p(newdup) =
-                        cpu_to_be16((char *)newdup - (char *)d);
+                        cpu_to_be16((char *)newdup - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                /*
                 * If the following entry was in the table, the new entry
@@ -660,28 +666,28 @@ xfs_dir2_data_make_free(
                 * the old one and add the new one.
                 */
                if (dfp) {
-                        xfs_dir2_data_freeremove(d, dfp, needlogp);
+                        xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-                        (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                        xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
                }
                /*
                 * Otherwise we need a scan if the new entry is big enough.
                 */
                else {
                        needscan = be16_to_cpu(newdup->length) >
-                                   be16_to_cpu(d->hdr.bestfree[2].length);
+                                   be16_to_cpu(hdr->bestfree[2].length);
                }
        }
        /*
         * Neither neighbor is free.  Make a new entry.
         */
        else {
-                newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+                newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
                newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup->length = cpu_to_be16(len);
                *xfs_dir2_data_unused_tag_p(newdup) =
-                        cpu_to_be16((char *)newdup - (char *)d);
+                        cpu_to_be16((char *)newdup - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, newdup);
-                (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
        }
        *needscanp = needscan;
 }
@@ -699,7 +705,7 @@ xfs_dir2_data_use_free(
        int                     *needlogp,      /* out: need to log header */
        int                     *needscanp)     /* out: need regen bestfree */
 {
-        xfs_dir2_data_t         *d;             /* data block */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
        int                     matchback;      /* matches end of freespace */
        int                     matchfront;     /* matches start of freespace */
@@ -708,24 +714,24 @@ xfs_dir2_data_use_free(
        xfs_dir2_data_unused_t  *newdup2;       /* another new unused entry */
        int                     oldlen;         /* old unused entry's length */
-        d = bp->data;
+        hdr = bp->data;
-        ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-               be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+               hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
        ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
-        ASSERT(offset >= (char *)dup - (char *)d);
+        ASSERT(offset >= (char *)dup - (char *)hdr);
-        ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
+        ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
-        ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+        ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
        /*
         * Look up the entry in the bestfree table.
         */
-        dfp = xfs_dir2_data_freefind(d, dup);
+        dfp = xfs_dir2_data_freefind(hdr, dup);
        oldlen = be16_to_cpu(dup->length);
-        ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
+        ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length));
        /*
         * Check for alignment with front and back of the entry.
         */
-        matchfront = (char *)dup - (char *)d == offset;
+        matchfront = (char *)dup - (char *)hdr == offset;
-        matchback = (char *)dup + oldlen - (char *)d == offset + len;
+        matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
        ASSERT(*needscanp == 0);
        needscan = 0;
        /*
@@ -734,9 +740,9 @@ xfs_dir2_data_use_free(
         */
        if (matchfront && matchback) {
                if (dfp) {
-                        needscan = (d->hdr.bestfree[2].offset != 0);
+                        needscan = (hdr->bestfree[2].offset != 0);
                        if (!needscan)
-                                xfs_dir2_data_freeremove(d, dfp, needlogp);
+                                xfs_dir2_data_freeremove(hdr, dfp, needlogp);
                }
        }
        /*
@@ -744,27 +750,27 @@ xfs_dir2_data_use_free(
         * Make a new entry with the remaining freespace.
         */
        else if (matchfront) {
-                newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+                newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
                newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup->length = cpu_to_be16(oldlen - len);
                *xfs_dir2_data_unused_tag_p(newdup) =
-                        cpu_to_be16((char *)newdup - (char *)d);
+                        cpu_to_be16((char *)newdup - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                /*
                 * If it was in the table, remove it and add the new one.
                 */
                if (dfp) {
-                        xfs_dir2_data_freeremove(d, dfp, needlogp);
+                        xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-                        dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                        dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
                        ASSERT(dfp != NULL);
                        ASSERT(dfp->length == newdup->length);
-                        ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+                        ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
                        /*
                         * If we got inserted at the last slot,
                         * that means we don't know if there was a better
                         * choice for the last slot, or not.  Rescan.
                         */
-                        needscan = dfp == &d->hdr.bestfree[2];
+                        needscan = dfp == &hdr->bestfree[2];
                }
        }
        /*
@@ -773,25 +779,25 @@ xfs_dir2_data_use_free(
         */
        else if (matchback) {
                newdup = dup;
-                newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+                newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
                *xfs_dir2_data_unused_tag_p(newdup) =
-                        cpu_to_be16((char *)newdup - (char *)d);
+                        cpu_to_be16((char *)newdup - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, newdup);
                /*
                 * If it was in the table, remove it and add the new one.
                 */
                if (dfp) {
-                        xfs_dir2_data_freeremove(d, dfp, needlogp);
+                        xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-                        dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                        dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
                        ASSERT(dfp != NULL);
                        ASSERT(dfp->length == newdup->length);
-                        ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+                        ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
                        /*
                         * If we got inserted at the last slot,
                         * that means we don't know if there was a better
                         * choice for the last slot, or not.  Rescan.
                         */
-                        needscan = dfp == &d->hdr.bestfree[2];
+                        needscan = dfp == &hdr->bestfree[2];
                }
        }
        /*
@@ -800,15 +806,15 @@ xfs_dir2_data_use_free(
         */
        else {
                newdup = dup;
-                newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+                newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
                *xfs_dir2_data_unused_tag_p(newdup) =
-                        cpu_to_be16((char *)newdup - (char *)d);
+                        cpu_to_be16((char *)newdup - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, newdup);
-                newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+                newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
                newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
                newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
                *xfs_dir2_data_unused_tag_p(newdup2) =
-                        cpu_to_be16((char *)newdup2 - (char *)d);
+                        cpu_to_be16((char *)newdup2 - (char *)hdr);
                xfs_dir2_data_log_unused(tp, bp, newdup2);
                /*
                 * If the old entry was in the table, we need to scan
@@ -819,13 +825,12 @@ xfs_dir2_data_use_free(
                 * the 2 new will work.
                 */
                if (dfp) {
-                        needscan = (d->hdr.bestfree[2].length != 0);
+                        needscan = (hdr->bestfree[2].length != 0);
                        if (!needscan) {
-                                xfs_dir2_data_freeremove(d, dfp, needlogp);
+                                xfs_dir2_data_freeremove(hdr, dfp, needlogp);
-                                (void)xfs_dir2_data_freeinsert(d, newdup,
+                                xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
-                                        needlogp);
+                                xfs_dir2_data_freeinsert(hdr, newdup2,
-                                (void)xfs_dir2_data_freeinsert(d, newdup2,
+                                                         needlogp);
-                                        needlogp);
                        }
                }
        }
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
deleted file mode 100644
index efbc290c7fec..000000000000
--- a/fs/xfs/xfs_dir2_data.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_DATA_H__
-#define __XFS_DIR2_DATA_H__
-/*
- * Directory format 2, data block structures.
- */
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_trans;
-/*
- * Constants.
- */
-#define XFS_DIR2_DATA_MAGIC     0x58443244      /* XD2D: for multiblock dirs */
-#define XFS_DIR2_DATA_ALIGN_LOG 3               /* i.e., 8 bytes */
-#define XFS_DIR2_DATA_ALIGN     (1 << XFS_DIR2_DATA_ALIGN_LOG)
-#define XFS_DIR2_DATA_FREE_TAG  0xffff
-#define XFS_DIR2_DATA_FD_COUNT  3
-/*
- * Directory address space divided into sections,
- * spaces separated by 32GB.
- */
-#define XFS_DIR2_SPACE_SIZE     (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
-#define XFS_DIR2_DATA_SPACE     0
-#define XFS_DIR2_DATA_OFFSET    (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_DATA_FIRSTDB(mp)       \
-        xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
-/*
- * Offsets of . and .. in data space (always block 0)
- */
-#define XFS_DIR2_DATA_DOT_OFFSET        \
-        ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
-#define XFS_DIR2_DATA_DOTDOT_OFFSET     \
-        (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
-#define XFS_DIR2_DATA_FIRST_OFFSET              \
-        (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
-/*
- * Structures.
- */
-/*
- * Describe a free area in the data block.
- * The freespace will be formatted as a xfs_dir2_data_unused_t.
- */
-typedef struct xfs_dir2_data_free {
-        __be16                  offset;         /* start of freespace */
-        __be16                  length;         /* length of freespace */
-} xfs_dir2_data_free_t;
-/*
- * Header for the data blocks.
- * Always at the beginning of a directory-sized block.
- * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
- */
-typedef struct xfs_dir2_data_hdr {
-        __be32                  magic;          /* XFS_DIR2_DATA_MAGIC */
-                                                /* or XFS_DIR2_BLOCK_MAGIC */
-        xfs_dir2_data_free_t    bestfree[XFS_DIR2_DATA_FD_COUNT];
-} xfs_dir2_data_hdr_t;
-/*
- * Active entry in a data block.  Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_entry {
-        __be64                  inumber;        /* inode number */
-        __u8                    namelen;        /* name length */
-        __u8                    name[1];        /* name bytes, no null */
-                                                /* variable offset */
-        __be16                  tag;            /* starting offset of us */
-} xfs_dir2_data_entry_t;
-/*
- * Unused entry in a data block.  Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_unused {
-        __be16                  freetag;        /* XFS_DIR2_DATA_FREE_TAG */
-        __be16                  length;         /* total free length */
-                                                /* variable offset */
-        __be16                  tag;            /* starting offset of us */
-} xfs_dir2_data_unused_t;
-typedef union {
-        xfs_dir2_data_entry_t   entry;
-        xfs_dir2_data_unused_t  unused;
-} xfs_dir2_data_union_t;
-/*
- * Generic data block structure, for xfs_db.
- */
-typedef struct xfs_dir2_data {
-        xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_DATA_MAGIC */
-        xfs_dir2_data_union_t   u[1];
-} xfs_dir2_data_t;
-/*
- * Macros.
- */
-/*
- * Size of a data entry.
- */
-static inline int xfs_dir2_data_entsize(int n)
-{
-        return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
-                 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
-}
-/*
- * Pointer to an entry's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
-{
-        return (__be16 *)((char *)dep +
-                xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-/*
- * Pointer to a freespace's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
-{
-        return (__be16 *)((char *)dup +
-                        be16_to_cpu(dup->length) - sizeof(__be16));
-}
-/*
- * Function declarations.
- */
-#ifdef DEBUG
-extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
-#else
-#define xfs_dir2_data_check(dp,bp)
-#endif
-extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
-                                xfs_dir2_data_unused_t *dup);
-extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
-                                xfs_dir2_data_unused_t *dup, int *loghead);
-extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
-                                int *loghead);
-extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
-                                struct xfs_dabuf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                                xfs_dir2_data_entry_t *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
-                                struct xfs_dabuf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                                xfs_dir2_data_unused_t *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                                xfs_dir2_data_aoff_t offset,
-                                xfs_dir2_data_aoff_t len, int *needlogp,
-                                int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                               xfs_dir2_data_unused_t *dup,
-                               xfs_dir2_data_aoff_t offset,
-                               xfs_dir2_data_aoff_t len, int *needlogp,
-                               int *needscanp);
-#endif  /* __XFS_DIR2_DATA_H__ */
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
new file mode 100644
index 000000000000..07270981f48f
--- /dev/null
+++ b/fs/xfs/xfs_dir2_format.h
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_FORMAT_H__
+#define __XFS_DIR2_FORMAT_H__
+/*
+ * Directory version 2.
+ *
+ * There are 4 possible formats:
+ *  - shortform - embedded into the inode
+ *  - single block - data with embedded leaf at the end
+ *  - multiple data blocks, single leaf+freeindex block
+ *  - data blocks, node and leaf blocks (btree), freeindex blocks
+ *
+ * Note: many node blocks structures and constants are shared with the attr
+ * code and defined in xfs_da_btree.h.
+ */
+#define XFS_DIR2_BLOCK_MAGIC    0x58443242      /* XD2B: single block dirs */
+#define XFS_DIR2_DATA_MAGIC     0x58443244      /* XD2D: multiblock dirs */
+#define XFS_DIR2_FREE_MAGIC     0x58443246      /* XD2F: free index blocks */
+/*
+ * Byte offset in data block and shortform entry.
+ */
+typedef __uint16_t      xfs_dir2_data_off_t;
+#define NULLDATAOFF     0xffffU
+typedef uint            xfs_dir2_data_aoff_t;   /* argument form */
+/*
+ * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
+ * Only need 16 bits, this is the byte offset into the single block form.
+ */
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
+/*
+ * Offset in data space of a data entry.
+ */
+typedef __uint32_t      xfs_dir2_dataptr_t;
+#define XFS_DIR2_MAX_DATAPTR    ((xfs_dir2_dataptr_t)0xffffffff)
+#define XFS_DIR2_NULL_DATAPTR   ((xfs_dir2_dataptr_t)0)
+/*
+ * Byte offset in a directory.
+ */
+typedef xfs_off_t       xfs_dir2_off_t;
+/*
+ * Directory block number (logical dirblk in file)
+ */
+typedef __uint32_t      xfs_dir2_db_t;
+/*
+ * Inode number stored as 8 8-bit values.
+ */
+typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
+/*
+ * Inode number stored as 4 8-bit values.
+ * Works a lot of the time, when all the inode numbers in a directory
+ * fit in 32 bits.
+ */
+typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
+typedef union {
+        xfs_dir2_ino8_t i8;
+        xfs_dir2_ino4_t i4;
+} xfs_dir2_inou_t;
+#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to fit into the
+ * literal area of the inode.  These "shortform" directories consist of a
+ * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
+ * structures.  Due the different inode number storage size and the variable
+ * length name field in the xfs_dir2_sf_entry all these structure are
+ * variable length, and the accessors in this file should be used to iterate
+ * over them.
+ */
+typedef struct xfs_dir2_sf_hdr {
+        __uint8_t               count;          /* count of entries */
+        __uint8_t               i8count;        /* count of 8-byte inode #s */
+        xfs_dir2_inou_t         parent;         /* parent dir inode number */
+} __arch_pack xfs_dir2_sf_hdr_t;
+typedef struct xfs_dir2_sf_entry {
+        __u8                    namelen;        /* actual name length */
+        xfs_dir2_sf_off_t       offset;         /* saved offset */
+        __u8                    name[];         /* name, variable size */
+        /*
+         * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
+         * variable offset after the name.
+         */
+} __arch_pack xfs_dir2_sf_entry_t;
+static inline int xfs_dir2_sf_hdr_size(int i8count)
+{
+        return sizeof(struct xfs_dir2_sf_hdr) -
+                (i8count == 0) *
+                (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
+}
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
+{
+        return get_unaligned_be16(&sfep->offset.i);
+}
+static inline void
+xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
+{
+        put_unaligned_be16(off, &sfep->offset.i);
+}
+static inline int
+xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len)
+{
+        return sizeof(struct xfs_dir2_sf_entry) +       /* namelen + offset */
+                len +                                   /* name */
+                (hdr->i8count ?                         /* ino */
+                 sizeof(xfs_dir2_ino8_t) :
+                 sizeof(xfs_dir2_ino4_t));
+}
+static inline struct xfs_dir2_sf_entry *
+xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
+{
+        return (struct xfs_dir2_sf_entry *)
+                ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
+}
+static inline struct xfs_dir2_sf_entry *
+xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
+                struct xfs_dir2_sf_entry *sfep)
+{
+        return (struct xfs_dir2_sf_entry *)
+                ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
+}
+/*
+ * Data block structures.
+ *
+ * A pure data block looks like the following drawing on disk:
+ *
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_hdr_t                             |
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | ...                                             |
+ *    +-------------------------------------------------+
+ *    | unused space                                    |
+ *    +-------------------------------------------------+
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ *
+ * In addition to the pure data blocks for the data and node formats,
+ * most structures are also used for the combined data/freespace "block"
+ * format below.
+ */
+#define XFS_DIR2_DATA_ALIGN_LOG 3               /* i.e., 8 bytes */
+#define XFS_DIR2_DATA_ALIGN     (1 << XFS_DIR2_DATA_ALIGN_LOG)
+#define XFS_DIR2_DATA_FREE_TAG  0xffff
+#define XFS_DIR2_DATA_FD_COUNT  3
+/*
+ * Directory address space divided into sections,
+ * spaces separated by 32GB.
+ */
+#define XFS_DIR2_SPACE_SIZE     (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
+#define XFS_DIR2_DATA_SPACE     0
+#define XFS_DIR2_DATA_OFFSET    (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
+#define XFS_DIR2_DATA_FIRSTDB(mp)       \
+        xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
+/*
+ * Offsets of . and .. in data space (always block 0)
+ */
+#define XFS_DIR2_DATA_DOT_OFFSET        \
+        ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
+#define XFS_DIR2_DATA_DOTDOT_OFFSET     \
+        (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
+#define XFS_DIR2_DATA_FIRST_OFFSET              \
+        (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
+/*
+ * Describe a free area in the data block.
+ *
+ * The freespace will be formatted as a xfs_dir2_data_unused_t.
+ */
+typedef struct xfs_dir2_data_free {
+        __be16                  offset;         /* start of freespace */
+        __be16                  length;         /* length of freespace */
+} xfs_dir2_data_free_t;
+/*
+ * Header for the data blocks.
+ *
+ * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
+ */
+typedef struct xfs_dir2_data_hdr {
+        __be32                  magic;          /* XFS_DIR2_DATA_MAGIC or */
+                                                /* XFS_DIR2_BLOCK_MAGIC */
+        xfs_dir2_data_free_t    bestfree[XFS_DIR2_DATA_FD_COUNT];
+} xfs_dir2_data_hdr_t;
+/*
+ * Active entry in a data block.
+ *
+ * Aligned to 8 bytes.  After the variable length name field there is a
+ * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p.
+ */
+typedef struct xfs_dir2_data_entry {
+        __be64                  inumber;        /* inode number */
+        __u8                    namelen;        /* name length */
+        __u8                    name[];         /* name bytes, no null */
+     /* __be16                  tag; */         /* starting offset of us */
+} xfs_dir2_data_entry_t;
+/*
+ * Unused entry in a data block.
+ *
+ * Aligned to 8 bytes.  Tag appears as the last 2 bytes and must be accessed
+ * using xfs_dir2_data_unused_tag_p.
+ */
+typedef struct xfs_dir2_data_unused {
+        __be16                  freetag;        /* XFS_DIR2_DATA_FREE_TAG */
+        __be16                  length;         /* total free length */
+                                                /* variable offset */
+        __be16                  tag;            /* starting offset of us */
+} xfs_dir2_data_unused_t;
+/*
+ * Size of a data entry.
+ */
+static inline int xfs_dir2_data_entsize(int n)
+{
+        return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n +
+                 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
+}
+/*
+ * Pointer to an entry's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep)
+{
+        return (__be16 *)((char *)dep +
+                xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
+}
+/*
+ * Pointer to a freespace's tag word.
+ */
+static inline __be16 *
+xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
+{
+        return (__be16 *)((char *)dup +
+                        be16_to_cpu(dup->length) - sizeof(__be16));
+}
+/*
+ * Leaf block structures.
+ *
+ * A pure leaf block looks like the following drawing on disk:
+ *
+ *    +---------------------------+
+ *    | xfs_dir2_leaf_hdr_t       |
+ *    +---------------------------+
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | xfs_dir2_leaf_entry_t     |
+ *    | ...                       |
+ *    +---------------------------+
+ *    | xfs_dir2_data_off_t       |
+ *    | xfs_dir2_data_off_t       |
+ *    | xfs_dir2_data_off_t       |
+ *    | ...                       |
+ *    +---------------------------+
+ *    | xfs_dir2_leaf_tail_t      |
+ *    +---------------------------+
+ *
+ * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
+ * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
+ * for directories with separate leaf nodes and free space blocks
+ * (magic = XFS_DIR2_LEAFN_MAGIC).
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ */
+/*
+ * Offset of the leaf/node space.  First block in this space
+ * is the btree root.
+ */
+#define XFS_DIR2_LEAF_SPACE     1
+#define XFS_DIR2_LEAF_OFFSET    (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
+#define XFS_DIR2_LEAF_FIRSTDB(mp)       \
+        xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
+/*
+ * Leaf block header.
+ */
+typedef struct xfs_dir2_leaf_hdr {
+        xfs_da_blkinfo_t        info;           /* header for da routines */
+        __be16                  count;          /* count of entries */
+        __be16                  stale;          /* count of stale entries */
+} xfs_dir2_leaf_hdr_t;
+/*
+ * Leaf block entry.
+ */
+typedef struct xfs_dir2_leaf_entry {
+        __be32                  hashval;        /* hash value of name */
+        __be32                  address;        /* address of data entry */
+} xfs_dir2_leaf_entry_t;
+/*
+ * Leaf block tail.
+ */
+typedef struct xfs_dir2_leaf_tail {
+        __be32                  bestcount;
+} xfs_dir2_leaf_tail_t;
+/*
+ * Leaf block.
+ */
+typedef struct xfs_dir2_leaf {
+        xfs_dir2_leaf_hdr_t     hdr;            /* leaf header */
+        xfs_dir2_leaf_entry_t   ents[];         /* entries */
+} xfs_dir2_leaf_t;
+/*
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
+ */
+static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
+{
+        return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) /
+                (uint)sizeof(struct xfs_dir2_leaf_entry);
+}
+/*
+ * Get address of the bestcount field in the single-leaf block.
+ */
+static inline struct xfs_dir2_leaf_tail *
+xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
+{
+        return (struct xfs_dir2_leaf_tail *)
+                ((char *)lp + mp->m_dirblksize -
+                  sizeof(struct xfs_dir2_leaf_tail));
+}
+/*
+ * Get address of the bests array in the single-leaf block.
+ */
+static inline __be16 *
+xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
+{
+        return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
+}
+/*
+ * Convert dataptr to byte in file space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+        return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
+}
+/*
+ * Convert byte in file space to dataptr.  It had better be aligned.
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+        return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+/*
+ * Convert byte in space to (DB) block
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+        return (xfs_dir2_db_t)
+                (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog));
+}
+/*
+ * Convert dataptr to a block number
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+        return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+/*
+ * Convert byte in space to offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+        return (xfs_dir2_data_aoff_t)(by &
+                ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1));
+}
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
+{
+        return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
+}
+/*
+ * Convert block and offset to byte in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
+                        xfs_dir2_data_aoff_t o)
+{
+        return ((xfs_dir2_off_t)db <<
+                (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o;
+}
+/*
+ * Convert block (DB) to block (dablk)
+ */
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+        return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog);
+}
+/*
+ * Convert byte in space to (DA) block
+ */
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
+{
+        return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
+}
+/*
+ * Convert block and offset to dataptr
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
+                           xfs_dir2_data_aoff_t o)
+{
+        return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
+}
+/*
+ * Convert block (dablk) to block (DB)
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
+{
+        return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog);
+}
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
+{
+        return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
+}
+/*
+ * Free space block defintions for the node format.
+ */
+/*
+ * Offset of the freespace index.
+ */
+#define XFS_DIR2_FREE_SPACE     2
+#define XFS_DIR2_FREE_OFFSET    (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
+#define XFS_DIR2_FREE_FIRSTDB(mp)       \
+        xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
+typedef struct xfs_dir2_free_hdr {
+        __be32                  magic;          /* XFS_DIR2_FREE_MAGIC */
+        __be32                  firstdb;        /* db of first entry */
+        __be32                  nvalid;         /* count of valid entries */
+        __be32                  nused;          /* count of used entries */
+} xfs_dir2_free_hdr_t;
+typedef struct xfs_dir2_free {
+        xfs_dir2_free_hdr_t     hdr;            /* block header */
+        __be16                  bests[];        /* best free counts */
+                                                /* unused entries are -1 */
+} xfs_dir2_free_t;
+static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp)
+{
+        return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
+                sizeof(xfs_dir2_data_off_t);
+}
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+        return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
+}
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static inline int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
+{
+        return db % xfs_dir2_free_max_bests(mp);
+}
+/*
+ * Single block format.
+ *
+ * The single block format looks like the following drawing on disk:
+ *
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_hdr_t                             |
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
+ *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
+ *    | ...                                             |
+ *    +-------------------------------------------------+
+ *    | unused space                                    |
+ *    +-------------------------------------------------+
+ *    | ...                                             |
+ *    | xfs_dir2_leaf_entry_t                           |
+ *    | xfs_dir2_leaf_entry_t                           |
+ *    +-------------------------------------------------+
+ *    | xfs_dir2_block_tail_t                           |
+ *    +-------------------------------------------------+
+ *
+ * As all the entries are variable size structures the accessors below should
+ * be used to iterate over them.
+ */
+typedef struct xfs_dir2_block_tail {
+        __be32          count;                  /* count of leaf entries */
+        __be32          stale;                  /* count of stale lf entries */
+} xfs_dir2_block_tail_t;
+/*
+ * Pointer to the leaf header embedded in a data block (1-block format)
+ */
+static inline struct xfs_dir2_block_tail *
+xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr)
+{
+        return ((struct xfs_dir2_block_tail *)
+                ((char *)hdr + mp->m_dirblksize)) - 1;
+}
+/*
+ * Pointer to the leaf entries embedded in a data block (1-block format)
+ */
+static inline struct xfs_dir2_leaf_entry *
+xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
+{
+        return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
+}
+#endif /* __XFS_DIR2_FORMAT_H__ */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae891223be90..ca2386d82cdf 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -24,18 +24,14 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
+#include "xfs_dir2_format.h"
-#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_priv.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -64,7 +60,7 @@ xfs_dir2_block_to_leaf(
 {
        __be16                  *bestsp;        /* leaf's bestsp entries */
        xfs_dablk_t             blkno;          /* leaf block's bno */
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_leaf_entry_t   *blp;           /* block's leaf entries */
        xfs_dir2_block_tail_t   *btp;           /* block's tail */
        xfs_inode_t             *dp;            /* incore directory inode */
@@ -101,9 +97,9 @@ xfs_dir2_block_to_leaf(
        }
        ASSERT(lbp != NULL);
        leaf = lbp->data;
-        block = dbp->data;
+        hdr = dbp->data;
        xfs_dir2_data_check(dp, dbp);
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        blp = xfs_dir2_block_leaf_p(btp);
        /*
         * Set the counts in the leaf header.
@@ -123,23 +119,23 @@ xfs_dir2_block_to_leaf(
         * tail be free.
         */
        xfs_dir2_data_make_free(tp, dbp,
-                (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+                (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
-                (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
+                (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
                                       (char *)blp),
                &needlog, &needscan);
        /*
         * Fix up the block header, make it a data block.
         */
-        block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+        hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
        if (needscan)
-                xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        /*
         * Set up leaf tail and bests table.
         */
        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        ltp->bestcount = cpu_to_be32(1);
        bestsp = xfs_dir2_leaf_bests_p(ltp);
-        bestsp[0] =  block->hdr.bestfree[0].length;
+        bestsp[0] =  hdr->bestfree[0].length;
        /*
         * Log the data header and leaf bests table.
         */
@@ -152,6 +148,131 @@ xfs_dir2_block_to_leaf(
        return 0;
 }
+STATIC void
+xfs_dir2_leaf_find_stale(
+        struct xfs_dir2_leaf    *leaf,
+        int                     index,
+        int                     *lowstale,
+        int                     *highstale)
+{
+        /*
+         * Find the first stale entry before our index, if any.
+         */
+        for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
+                if (leaf->ents[*lowstale].address ==
+                    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                        break;
+        }
+        /*
+         * Find the first stale entry at or after our index, if any.
+         * Stop if the result would require moving more entries than using
+         * lowstale.
+         */
+        for (*highstale = index;
+             *highstale < be16_to_cpu(leaf->hdr.count);
+             ++*highstale) {
+                if (leaf->ents[*highstale].address ==
+                    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                        break;
+                if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
+                        break;
+        }
+}
+struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_find_entry(
+        xfs_dir2_leaf_t         *leaf,          /* leaf structure */
+        int                     index,          /* leaf table position */
+        int                     compact,        /* need to compact leaves */
+        int                     lowstale,       /* index of prev stale leaf */
+        int                     highstale,      /* index of next stale leaf */
+        int                     *lfloglow,      /* low leaf logging index */
+        int                     *lfloghigh)     /* high leaf logging index */
+{
+        if (!leaf->hdr.stale) {
+                xfs_dir2_leaf_entry_t   *lep;   /* leaf entry table pointer */
+                /*
+                 * Now we need to make room to insert the leaf entry.
+                 *
+                 * If there are no stale entries, just insert a hole at index.
+                 */
+                lep = &leaf->ents[index];
+                if (index < be16_to_cpu(leaf->hdr.count))
+                        memmove(lep + 1, lep,
+                                (be16_to_cpu(leaf->hdr.count) - index) *
+                                 sizeof(*lep));
+                /*
+                 * Record low and high logging indices for the leaf.
+                 */
+                *lfloglow = index;
+                *lfloghigh = be16_to_cpu(leaf->hdr.count);
+                be16_add_cpu(&leaf->hdr.count, 1);
+                return lep;
+        }
+        /*
+         * There are stale entries.
+         *
+         * We will use one of them for the new entry.  It's probably not at
+         * the right location, so we'll have to shift some up or down first.
+         *
+         * If we didn't compact before, we need to find the nearest stale
+         * entries before and after our insertion point.
+         */
+        if (compact == 0)
+                xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
+        /*
+         * If the low one is better, use it.
+         */
+        if (lowstale >= 0 &&
+            (highstale == be16_to_cpu(leaf->hdr.count) ||
+             index - lowstale - 1 < highstale - index)) {
+                ASSERT(index - lowstale - 1 >= 0);
+                ASSERT(leaf->ents[lowstale].address ==
+                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+                /*
+                 * Copy entries up to cover the stale entry and make room
+                 * for the new entry.
+                 */
+                if (index - lowstale - 1 > 0) {
+                        memmove(&leaf->ents[lowstale],
+                                &leaf->ents[lowstale + 1],
+                                (index - lowstale - 1) *
+                                sizeof(xfs_dir2_leaf_entry_t));
+                }
+                *lfloglow = MIN(lowstale, *lfloglow);
+                *lfloghigh = MAX(index - 1, *lfloghigh);
+                be16_add_cpu(&leaf->hdr.stale, -1);
+                return &leaf->ents[index - 1];
+        }
+        /*
+         * The high one is better, so use that one.
+         */
+        ASSERT(highstale - index >= 0);
+        ASSERT(leaf->ents[highstale].address ==
+               cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+        /*
+         * Copy entries down to cover the stale entry and make room for the
+         * new entry.
+         */
+        if (highstale - index > 0) {
+                memmove(&leaf->ents[index + 1],
+                        &leaf->ents[index],
+                        (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
+        }
+        *lfloglow = MIN(index, *lfloglow);
+        *lfloghigh = MAX(highstale, *lfloghigh);
+        be16_add_cpu(&leaf->hdr.stale, -1);
+        return &leaf->ents[index];
+}
 /*
 * Add an entry to a leaf form directory.
 */
@@ -161,7 +282,7 @@ xfs_dir2_leaf_addname(
 {
        __be16                  *bestsp;        /* freespace table in leaf */
        int                     compact;        /* need to compact leaves */
-        xfs_dir2_data_t         *data;          /* data block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dabuf_t             *dbp;           /* data block buffer */
        xfs_dir2_data_entry_t   *dep;           /* data block entry */
        xfs_inode_t             *dp;            /* incore directory inode */
@@ -225,7 +346,7 @@ xfs_dir2_leaf_addname(
                        continue;
                i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
                ASSERT(i < be32_to_cpu(ltp->bestcount));
-                ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
+                ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
                if (be16_to_cpu(bestsp[i]) >= length) {
                        use_block = i;
                        break;
@@ -239,7 +360,8 @@ xfs_dir2_leaf_addname(
                        /*
                         * Remember a block we see that's missing.
                         */
-                        if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
+                        if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
+                            use_block == -1)
                                use_block = i;
                        else if (be16_to_cpu(bestsp[i]) >= length) {
                                use_block = i;
@@ -250,14 +372,17 @@ xfs_dir2_leaf_addname(
        /*
         * How many bytes do we need in the leaf block?
         */
-        needbytes =
+        needbytes = 0;
-                (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) +
+        if (!leaf->hdr.stale)
-                (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
+                needbytes += sizeof(xfs_dir2_leaf_entry_t);
+        if (use_block == -1)
+                needbytes += sizeof(xfs_dir2_data_off_t);
        /*
         * Now kill use_block if it refers to a missing block, so we
         * can use it as an indication of allocation needed.
         */
-        if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
+        if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
                use_block = -1;
        /*
         * If we don't have enough free bytes but we can make enough
@@ -369,8 +494,8 @@ xfs_dir2_leaf_addname(
                 */
                else
                        xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
-                data = dbp->data;
+                hdr = dbp->data;
-                bestsp[use_block] = data->hdr.bestfree[0].length;
+                bestsp[use_block] = hdr->bestfree[0].length;
                grown = 1;
        }
        /*
@@ -384,7 +509,7 @@ xfs_dir2_leaf_addname(
                        xfs_da_brelse(tp, lbp);
                        return error;
                }
-                data = dbp->data;
+                hdr = dbp->data;
                grown = 0;
        }
        xfs_dir2_data_check(dp, dbp);
@@ -392,14 +517,14 @@ xfs_dir2_leaf_addname(
         * Point to the biggest freespace in our data block.
         */
        dup = (xfs_dir2_data_unused_t *)
-              ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+              ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
        ASSERT(be16_to_cpu(dup->length) >= length);
        needscan = needlog = 0;
        /*
         * Mark the initial part of our freespace in use for the new entry.
         */
        xfs_dir2_data_use_free(tp, dbp, dup,
-                (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+                (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
                &needlog, &needscan);
        /*
         * Initialize our new entry (at last).
@@ -409,12 +534,12 @@ xfs_dir2_leaf_addname(
        dep->namelen = args->namelen;
        memcpy(dep->name, args->name, dep->namelen);
        tagp = xfs_dir2_data_entry_tag_p(dep);
-        *tagp = cpu_to_be16((char *)dep - (char *)data);
+        *tagp = cpu_to_be16((char *)dep - (char *)hdr);
        /*
         * Need to scan fix up the bestfree table.
         */
        if (needscan)
-                xfs_dir2_data_freescan(mp, data, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        /*
         * Need to log the data block's header.
         */
@@ -425,107 +550,15 @@ xfs_dir2_leaf_addname(
         * If the bests table needs to be changed, do it.
         * Log the change unless we've already done that.
         */
-        if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+        if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
-                bestsp[use_block] = data->hdr.bestfree[0].length;
+                bestsp[use_block] = hdr->bestfree[0].length;
                if (!grown)
                        xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
        }
-        /*
-         * Now we need to make room to insert the leaf entry.
+        lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
-         * If there are no stale entries, we just insert a hole at index.
+                                       highstale, &lfloglow, &lfloghigh);
-         */
-        if (!leaf->hdr.stale) {
-                /*
-                 * lep is still good as the index leaf entry.
-                 */
-                if (index < be16_to_cpu(leaf->hdr.count))
-                        memmove(lep + 1, lep,
-                                (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
-                /*
-                 * Record low and high logging indices for the leaf.
-                 */
-                lfloglow = index;
-                lfloghigh = be16_to_cpu(leaf->hdr.count);
-                be16_add_cpu(&leaf->hdr.count, 1);
-        }
-        /*
-         * There are stale entries.
-         * We will use one of them for the new entry.
-         * It's probably not at the right location, so we'll have to
-         * shift some up or down first.
-         */
-        else {
-                /*
-                 * If we didn't compact before, we need to find the nearest
-                 * stale entries before and after our insertion point.
-                 */
-                if (compact == 0) {
-                        /*
-                         * Find the first stale entry before the insertion
-                         * point, if any.
-                         */
-                        for (lowstale = index - 1;
-                             lowstale >= 0 &&
-                                be32_to_cpu(leaf->ents[lowstale].address) !=
-                                XFS_DIR2_NULL_DATAPTR;
-                             lowstale--)
-                                continue;
-                        /*
-                         * Find the next stale entry at or after the insertion
-                         * point, if any.   Stop if we go so far that the
-                         * lowstale entry would be better.
-                         */
-                        for (highstale = index;
-                             highstale < be16_to_cpu(leaf->hdr.count) &&
-                                be32_to_cpu(leaf->ents[highstale].address) !=
-                                XFS_DIR2_NULL_DATAPTR &&
-                                (lowstale < 0 ||
-                                 index - lowstale - 1 >= highstale - index);
-                             highstale++)
-                                continue;
-                }
-                /*
-                 * If the low one is better, use it.
-                 */
-                if (lowstale >= 0 &&
-                    (highstale == be16_to_cpu(leaf->hdr.count) ||
-                     index - lowstale - 1 < highstale - index)) {
-                        ASSERT(index - lowstale - 1 >= 0);
-                        ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
-                               XFS_DIR2_NULL_DATAPTR);
-                        /*
-                         * Copy entries up to cover the stale entry
-                         * and make room for the new entry.
-                         */
-                        if (index - lowstale - 1 > 0)
-                                memmove(&leaf->ents[lowstale],
-                                        &leaf->ents[lowstale + 1],
-                                        (index - lowstale - 1) * sizeof(*lep));
-                        lep = &leaf->ents[index - 1];
-                        lfloglow = MIN(lowstale, lfloglow);
-                        lfloghigh = MAX(index - 1, lfloghigh);
-                }
-                /*
-                 * The high one is better, so use that one.
-                 */
-                else {
-                        ASSERT(highstale - index >= 0);
-                        ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
-                               XFS_DIR2_NULL_DATAPTR);
-                        /*
-                         * Copy entries down to cover the stale entry
-                         * and make room for the new entry.
-                         */
-                        if (highstale - index > 0)
-                                memmove(&leaf->ents[index + 1],
-                                        &leaf->ents[index],
-                                        (highstale - index) * sizeof(*lep));
-                        lep = &leaf->ents[index];
-                        lfloglow = MIN(index, lfloglow);
-                        lfloghigh = MAX(highstale, lfloghigh);
-                }
-                be16_add_cpu(&leaf->hdr.stale, -1);
-        }
        /*
         * Fill in the new leaf entry.
         */
@@ -562,7 +595,7 @@ xfs_dir2_leaf_check(
        leaf = bp->data;
        mp = dp->i_mount;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
        /*
         * This value is not restrictive enough.
         * Should factor in the size of the bests table as well.
@@ -582,7 +615,7 @@ xfs_dir2_leaf_check(
                if (i + 1 < be16_to_cpu(leaf->hdr.count))
                        ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
                               be32_to_cpu(leaf->ents[i + 1].hashval));
-                if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+                if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                        stale++;
        }
        ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -611,7 +644,8 @@ xfs_dir2_leaf_compact(
         * Compress out the stale entries in place.
         */
        for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
-                if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+                if (leaf->ents[from].address ==
+                    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                        continue;
                /*
                 * Only actually copy the entries that are different.
@@ -663,24 +697,9 @@ xfs_dir2_leaf_compact_x1(
        leaf = bp->data;
        ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
        index = *indexp;
-        /*
-         * Find the first stale entry before our index, if any.
+        xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
-         */
-        for (lowstale = index - 1;
-             lowstale >= 0 &&
-                be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
-             lowstale--)
-                continue;
-        /*
-         * Find the first stale entry at or after our index, if any.
-         * Stop if the answer would be worse than lowstale.
-         */
-        for (highstale = index;
-             highstale < be16_to_cpu(leaf->hdr.count) &&
-                be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
-                (lowstale < 0 || index - lowstale > highstale - index);
-             highstale++)
-                continue;
        /*
         * Pick the better of lowstale and highstale.
         */
@@ -701,7 +720,8 @@ xfs_dir2_leaf_compact_x1(
                if (index == from)
                        newindex = to;
                if (from != keepstale &&
-                    be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
+                    leaf->ents[from].address ==
+                    cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
                        if (from == to)
                                *lowlogp = to;
                        continue;
@@ -760,7 +780,7 @@ xfs_dir2_leaf_getdents(
        int                     byteoff;        /* offset in current block */
        xfs_dir2_db_t           curdb;          /* db for current block */
        xfs_dir2_off_t          curoff;         /* current overall offset */
-        xfs_dir2_data_t         *data;          /* data block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dir2_data_entry_t   *dep;           /* data entry */
        xfs_dir2_data_unused_t  *dup;           /* unused entry */
        int                     error = 0;      /* error return value */
@@ -1018,23 +1038,23 @@ xfs_dir2_leaf_getdents(
                        else if (curoff > newoff)
                                ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
                                       curdb);
-                        data = bp->data;
+                        hdr = bp->data;
                        xfs_dir2_data_check(dp, bp);
                        /*
                         * Find our position in the block.
                         */
-                        ptr = (char *)&data->u;
+                        ptr = (char *)(hdr + 1);
                        byteoff = xfs_dir2_byte_to_off(mp, curoff);
                        /*
                         * Skip past the header.
                         */
                        if (byteoff == 0)
-                                curoff += (uint)sizeof(data->hdr);
+                                curoff += (uint)sizeof(*hdr);
                        /*
                         * Skip past entries until we reach our offset.
                         */
                        else {
-                                while ((char *)ptr - (char *)data < byteoff) {
+                                while ((char *)ptr - (char *)hdr < byteoff) {
                                        dup = (xfs_dir2_data_unused_t *)ptr;
                                        if (be16_to_cpu(dup->freetag)
@@ -1055,8 +1075,8 @@ xfs_dir2_leaf_getdents(
                                curoff =
                                        xfs_dir2_db_off_to_byte(mp,
                                            xfs_dir2_byte_to_db(mp, curoff),
-                                            (char *)ptr - (char *)data);
+                                            (char *)ptr - (char *)hdr);
-                                if (ptr >= (char *)data + mp->m_dirblksize) {
+                                if (ptr >= (char *)hdr + mp->m_dirblksize) {
                                        continue;
                                }
                        }
@@ -1179,7 +1199,7 @@ xfs_dir2_leaf_log_bests(
        xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
        ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
        firstb = xfs_dir2_leaf_bests_p(ltp) + first;
        lastb = xfs_dir2_leaf_bests_p(ltp) + last;
@@ -1202,8 +1222,8 @@ xfs_dir2_leaf_log_ents(
        xfs_dir2_leaf_t         *leaf;          /* leaf structure */
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-               be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+               leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        firstlep = &leaf->ents[first];
        lastlep = &leaf->ents[last];
        xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1221,8 +1241,8 @@ xfs_dir2_leaf_log_header(
        xfs_dir2_leaf_t         *leaf;          /* leaf structure */
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-               be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+               leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
                (uint)(sizeof(leaf->hdr) - 1));
 }
@@ -1241,7 +1261,7 @@ xfs_dir2_leaf_log_tail(
        mp = tp->t_mountp;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
                (uint)(mp->m_dirblksize - 1));
@@ -1437,7 +1457,7 @@ xfs_dir2_leaf_removename(
        xfs_da_args_t           *args)          /* operation arguments */
 {
        __be16                  *bestsp;        /* leaf block best freespace */
-        xfs_dir2_data_t         *data;          /* data block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dir2_db_t           db;             /* data block number */
        xfs_dabuf_t             *dbp;           /* data block buffer */
        xfs_dir2_data_entry_t   *dep;           /* data entry structure */
@@ -1467,7 +1487,7 @@ xfs_dir2_leaf_removename(
        tp = args->trans;
        mp = dp->i_mount;
        leaf = lbp->data;
-        data = dbp->data;
+        hdr = dbp->data;
        xfs_dir2_data_check(dp, dbp);
        /*
         * Point to the leaf entry, use that to point to the data entry.
@@ -1475,9 +1495,9 @@ xfs_dir2_leaf_removename(
        lep = &leaf->ents[index];
        db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
        dep = (xfs_dir2_data_entry_t *)
-              ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+              ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
        needscan = needlog = 0;
-        oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
+        oldbest = be16_to_cpu(hdr->bestfree[0].length);
        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
        bestsp = xfs_dir2_leaf_bests_p(ltp);
        ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1485,7 +1505,7 @@ xfs_dir2_leaf_removename(
         * Mark the former data entry unused.
         */
        xfs_dir2_data_make_free(tp, dbp,
-                (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
+                (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
                xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
        /*
         * We just mark the leaf entry stale by putting a null in it.
@@ -1499,23 +1519,23 @@ xfs_dir2_leaf_removename(
         * log the data block header if necessary.
         */
        if (needscan)
-                xfs_dir2_data_freescan(mp, data, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        if (needlog)
                xfs_dir2_data_log_header(tp, dbp);
        /*
         * If the longest freespace in the data block has changed,
         * put the new value in the bests table and log that.
         */
-        if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
+        if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
-                bestsp[db] = data->hdr.bestfree[0].length;
+                bestsp[db] = hdr->bestfree[0].length;
                xfs_dir2_leaf_log_bests(tp, lbp, db, db);
        }
        xfs_dir2_data_check(dp, dbp);
        /*
         * If the data block is now empty then get rid of the data block.
         */
-        if (be16_to_cpu(data->hdr.bestfree[0].length) ==
+        if (be16_to_cpu(hdr->bestfree[0].length) ==
-            mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+            mp->m_dirblksize - (uint)sizeof(*hdr)) {
                ASSERT(db != mp->m_dirdatablk);
                if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
                        /*
@@ -1542,7 +1562,7 @@ xfs_dir2_leaf_removename(
                         * Look for the last active entry (i).
                         */
                        for (i = db - 1; i > 0; i--) {
-                                if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
+                                if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
                                        break;
                        }
                        /*
@@ -1686,9 +1706,6 @@ xfs_dir2_leaf_trim_data(
        xfs_dir2_db_t           db)             /* data block number */
 {
        __be16                  *bestsp;        /* leaf bests table */
-#ifdef DEBUG
-        xfs_dir2_data_t         *data;          /* data block structure */
-#endif
        xfs_dabuf_t             *dbp;           /* data block buffer */
        xfs_inode_t             *dp;            /* incore directory inode */
        int                     error;          /* error return value */
@@ -1707,20 +1724,21 @@ xfs_dir2_leaf_trim_data(
                        XFS_DATA_FORK))) {
                return error;
        }
-#ifdef DEBUG
-        data = dbp->data;
-        ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
-#endif
-        /* this seems to be an error
-         * data is only valid if DEBUG is defined?
-         * RMC 09/08/1999
-         */
        leaf = lbp->data;
        ltp = xfs_dir2_leaf_tail_p(mp, leaf);
-        ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
-               mp->m_dirblksize - (uint)sizeof(data->hdr));
+#ifdef DEBUG
+{
+        struct xfs_dir2_data_hdr *hdr = dbp->data;
+        ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+        ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
+               mp->m_dirblksize - (uint)sizeof(*hdr));
        ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
+}
+#endif
        /*
         * Get rid of the data block.
         */
@@ -1740,6 +1758,20 @@ xfs_dir2_leaf_trim_data(
        return 0;
 }
+static inline size_t
+xfs_dir2_leaf_size(
+        struct xfs_dir2_leaf_hdr        *hdr,
+        int                             counts)
+{
+        int                     entries;
+        entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
+        return sizeof(xfs_dir2_leaf_hdr_t) +
+            entries * sizeof(xfs_dir2_leaf_entry_t) +
+            counts * sizeof(xfs_dir2_data_off_t) +
+            sizeof(xfs_dir2_leaf_tail_t);
+}
 /*
 * Convert node form directory to leaf form directory.
 * The root of the node form dir needs to already be a LEAFN block.
@@ -1810,7 +1842,7 @@ xfs_dir2_node_to_leaf(
                return 0;
        lbp = state->path.blk[0].bp;
        leaf = lbp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        /*
         * Read the freespace block.
         */
@@ -1819,20 +1851,19 @@ xfs_dir2_node_to_leaf(
                return error;
        }
        free = fbp->data;
-        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+        ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
        ASSERT(!free->hdr.firstdb);
        /*
         * Now see if the leafn and free data will fit in a leaf1.
         * If not, release the buffer and give up.
         */
-        if ((uint)sizeof(leaf->hdr) +
+        if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
-            (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
+                        mp->m_dirblksize) {
-            be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
-            (uint)sizeof(leaf->tail) >
-            mp->m_dirblksize) {
                xfs_da_brelse(tp, fbp);
                return 0;
        }
        /*
         * If the leaf has any stale entries in it, compress them out.
         * The compact routine will log the header.
@@ -1851,7 +1882,7 @@ xfs_dir2_node_to_leaf(
         * Set up the leaf bests table.
         */
        memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
-                be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
+                be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
        xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
        xfs_dir2_leaf_log_tail(tp, lbp);
        xfs_dir2_leaf_check(dp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
deleted file mode 100644
index 6c9539f06987..000000000000
--- a/fs/xfs/xfs_dir2_leaf.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_LEAF_H__
-#define __XFS_DIR2_LEAF_H__
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-/*
- * Offset of the leaf/node space.  First block in this space
- * is the btree root.
- */
-#define XFS_DIR2_LEAF_SPACE     1
-#define XFS_DIR2_LEAF_OFFSET    (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_LEAF_FIRSTDB(mp)       \
-        xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
-/*
- * Offset in data space of a data entry.
- */
-typedef __uint32_t      xfs_dir2_dataptr_t;
-#define XFS_DIR2_MAX_DATAPTR    ((xfs_dir2_dataptr_t)0xffffffff)
-#define XFS_DIR2_NULL_DATAPTR   ((xfs_dir2_dataptr_t)0)
-/*
- * Leaf block header.
- */
-typedef struct xfs_dir2_leaf_hdr {
-        xfs_da_blkinfo_t        info;           /* header for da routines */
-        __be16                  count;          /* count of entries */
-        __be16                  stale;          /* count of stale entries */
-} xfs_dir2_leaf_hdr_t;
-/*
- * Leaf block entry.
- */
-typedef struct xfs_dir2_leaf_entry {
-        __be32                  hashval;        /* hash value of name */
-        __be32                  address;        /* address of data entry */
-} xfs_dir2_leaf_entry_t;
-/*
- * Leaf block tail.
- */
-typedef struct xfs_dir2_leaf_tail {
-        __be32                  bestcount;
-} xfs_dir2_leaf_tail_t;
-/*
- * Leaf block.
- * bests and tail are at the end of the block for single-leaf only
- * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
- */
-typedef struct xfs_dir2_leaf {
-        xfs_dir2_leaf_hdr_t     hdr;            /* leaf header */
-        xfs_dir2_leaf_entry_t   ents[1];        /* entries */
-                                                /* ... */
-        xfs_dir2_data_off_t     bests[1];       /* best free counts */
-        xfs_dir2_leaf_tail_t    tail;           /* leaf tail */
-} xfs_dir2_leaf_t;
-/*
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
-{
-        return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
-               (uint)sizeof(xfs_dir2_leaf_entry_t));
-}
-/*
- * Get address of the bestcount field in the single-leaf block.
- */
-static inline xfs_dir2_leaf_tail_t *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
-{
-        return (xfs_dir2_leaf_tail_t *)
-                ((char *)(lp) + (mp)->m_dirblksize - 
-                  (uint)sizeof(xfs_dir2_leaf_tail_t));
-}
-/*
- * Get address of the bests array in the single-leaf block.
- */
-static inline __be16 *
-xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
-{
-        return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
-}
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-        return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG;
-}
-/*
- * Convert byte in file space to dataptr.  It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-        return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-        return (xfs_dir2_db_t)((by) >> \
-                 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog));
-}
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-        return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-        return (xfs_dir2_data_aoff_t)((by) & \
-                ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1));
-}
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
-        return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
-                        xfs_dir2_data_aoff_t o)
-{
-        return ((xfs_dir2_off_t)(db) << \
-                ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o);
-}
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-        return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog);
-}
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
-        return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
-}
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
-                           xfs_dir2_data_aoff_t o)
-{
-        return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
-}
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
-{
-        return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog);
-}
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
-{
-        return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
-}
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
-                                  struct xfs_dabuf *dbp);
-extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
-extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
-                                  struct xfs_dabuf *bp);
-extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
-                                     int *lowstalep, int *highstalep,
-                                     int *lowlogp, int *highlogp);
-extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
-                                  size_t bufsize, xfs_off_t *offset,
-                                  filldir_t filldir);
-extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
-                              struct xfs_dabuf **bpp, int magic);
-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
-                                   int first, int last);
-extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
-                                     struct xfs_dabuf *bp);
-extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
-                                     struct xfs_dabuf *lbp);
-extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
-                                   struct xfs_dabuf *lbp, xfs_dir2_db_t db);
-extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-#endif  /* __XFS_DIR2_LEAF_H__ */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index a0aab7d3294f..084b3247d636 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -23,18 +23,14 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
+#include "xfs_dir2_format.h"
-#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_priv.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -73,7 +69,7 @@ xfs_dir2_free_log_bests(
        xfs_dir2_free_t         *free;          /* freespace structure */
        free = bp->data;
-        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+        ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
        xfs_da_log_buf(tp, bp,
                (uint)((char *)&free->bests[first] - (char *)free),
                (uint)((char *)&free->bests[last] - (char *)free +
@@ -91,7 +87,7 @@ xfs_dir2_free_log_header(
        xfs_dir2_free_t         *free;          /* freespace structure */
        free = bp->data;
-        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+        ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
        xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
                (uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
 }
@@ -244,89 +240,13 @@ xfs_dir2_leafn_add(
                lfloglow = be16_to_cpu(leaf->hdr.count);
                lfloghigh = -1;
        }
-        /*
-         * No stale entries, just insert a space for the new entry.
-         */
-        if (!leaf->hdr.stale) {
-                lep = &leaf->ents[index];
-                if (index < be16_to_cpu(leaf->hdr.count))
-                        memmove(lep + 1, lep,
-                                (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
-                lfloglow = index;
-                lfloghigh = be16_to_cpu(leaf->hdr.count);
-                be16_add_cpu(&leaf->hdr.count, 1);
-        }
-        /*
-         * There are stale entries.  We'll use one for the new entry.
-         */
-        else {
-                /*
-                 * If we didn't do a compact then we need to figure out
-                 * which stale entry will be used.
-                 */
-                if (compact == 0) {
-                        /*
-                         * Find first stale entry before our insertion point.
-                         */
-                        for (lowstale = index - 1;
-                             lowstale >= 0 &&
-                                be32_to_cpu(leaf->ents[lowstale].address) !=
-                                XFS_DIR2_NULL_DATAPTR;
-                             lowstale--)
-                                continue;
-                        /*
-                         * Find next stale entry after insertion point.
-                         * Stop looking if the answer would be worse than
-                         * lowstale already found.
-                         */
-                        for (highstale = index;
-                             highstale < be16_to_cpu(leaf->hdr.count) &&
-                                be32_to_cpu(leaf->ents[highstale].address) !=
-                                XFS_DIR2_NULL_DATAPTR &&
-                                (lowstale < 0 ||
-                                 index - lowstale - 1 >= highstale - index);
-                             highstale++)
-                                continue;
-                }
-                /*
-                 * Using the low stale entry.
-                 * Shift entries up toward the stale slot.
-                 */
-                if (lowstale >= 0 &&
-                    (highstale == be16_to_cpu(leaf->hdr.count) ||
-                     index - lowstale - 1 < highstale - index)) {
-                        ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
-                               XFS_DIR2_NULL_DATAPTR);
-                        ASSERT(index - lowstale - 1 >= 0);
-                        if (index - lowstale - 1 > 0)
-                                memmove(&leaf->ents[lowstale],
-                                        &leaf->ents[lowstale + 1],
-                                        (index - lowstale - 1) * sizeof(*lep));
-                        lep = &leaf->ents[index - 1];
-                        lfloglow = MIN(lowstale, lfloglow);
-                        lfloghigh = MAX(index - 1, lfloghigh);
-                }
-                /*
-                 * Using the high stale entry.
-                 * Shift entries down toward the stale slot.
-                 */
-                else {
-                        ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
-                               XFS_DIR2_NULL_DATAPTR);
-                        ASSERT(highstale - index >= 0);
-                        if (highstale - index > 0)
-                                memmove(&leaf->ents[index + 1],
-                                        &leaf->ents[index],
-                                        (highstale - index) * sizeof(*lep));
-                        lep = &leaf->ents[index];
-                        lfloglow = MIN(index, lfloglow);
-                        lfloghigh = MAX(highstale, lfloghigh);
-                }
-                be16_add_cpu(&leaf->hdr.stale, -1);
-        }
        /*
         * Insert the new entry, log everything.
         */
+        lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
+                                       highstale, &lfloglow, &lfloghigh);
        lep->hashval = cpu_to_be32(args->hashval);
        lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
                                args->blkno, args->index));
@@ -352,14 +272,14 @@ xfs_dir2_leafn_check(
        leaf = bp->data;
        mp = dp->i_mount;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
        for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
                if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
                        ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
                               be32_to_cpu(leaf->ents[i + 1].hashval));
                }
-                if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+                if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                        stale++;
        }
        ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -378,7 +298,7 @@ xfs_dir2_leafn_lasthash(
        xfs_dir2_leaf_t *leaf;                  /* leaf structure */
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        if (count)
                *count = be16_to_cpu(leaf->hdr.count);
        if (!leaf->hdr.count)
@@ -417,7 +337,7 @@ xfs_dir2_leafn_lookup_for_addname(
        tp = args->trans;
        mp = dp->i_mount;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 #ifdef __KERNEL__
        ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
@@ -434,7 +354,7 @@ xfs_dir2_leafn_lookup_for_addname(
                curbp = state->extrablk.bp;
                curfdb = state->extrablk.blkno;
                free = curbp->data;
-                ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+                ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
        }
        length = xfs_dir2_data_entsize(args->namelen);
        /*
@@ -488,7 +408,7 @@ xfs_dir2_leafn_lookup_for_addname(
                                ASSERT(be32_to_cpu(free->hdr.magic) ==
                                        XFS_DIR2_FREE_MAGIC);
                                ASSERT((be32_to_cpu(free->hdr.firstdb) %
-                                        XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+                                        xfs_dir2_free_max_bests(mp)) == 0);
                                ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
                                ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
                                        be32_to_cpu(free->hdr.nvalid));
@@ -500,7 +420,8 @@ xfs_dir2_leafn_lookup_for_addname(
                        /*
                         * If it has room, return it.
                         */
-                        if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
+                        if (unlikely(free->bests[fi] ==
+                            cpu_to_be16(NULLDATAOFF))) {
                                XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
                                                        XFS_ERRLEVEL_LOW, mp);
                                if (curfdb != newfdb)
@@ -561,7 +482,7 @@ xfs_dir2_leafn_lookup_for_entry(
        tp = args->trans;
        mp = dp->i_mount;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
 #ifdef __KERNEL__
        ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
 #endif
@@ -742,7 +663,8 @@ xfs_dir2_leafn_moveents(
                int     i;                      /* temp leaf index */
                for (i = start_s, stale = 0; i < start_s + count; i++) {
-                        if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+                        if (leaf_s->ents[i].address ==
+                            cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                                stale++;
                }
        } else
@@ -789,8 +711,8 @@ xfs_dir2_leafn_order(
        leaf1 = leaf1_bp->data;
        leaf2 = leaf2_bp->data;
-        ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
-        ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        if (be16_to_cpu(leaf1->hdr.count) > 0 &&
            be16_to_cpu(leaf2->hdr.count) > 0 &&
            (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -918,7 +840,7 @@ xfs_dir2_leafn_remove(
        xfs_da_state_blk_t      *dblk,          /* data block */
        int                     *rval)          /* resulting block needs join */
 {
-        xfs_dir2_data_t         *data;          /* data block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dir2_db_t           db;             /* data block number */
        xfs_dabuf_t             *dbp;           /* data block buffer */
        xfs_dir2_data_entry_t   *dep;           /* data block entry */
@@ -938,7 +860,7 @@ xfs_dir2_leafn_remove(
        tp = args->trans;
        mp = dp->i_mount;
        leaf = bp->data;
-        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        /*
         * Point to the entry we're removing.
         */
@@ -963,9 +885,9 @@ xfs_dir2_leafn_remove(
         * in the data block in case it changes.
         */
        dbp = dblk->bp;
-        data = dbp->data;
+        hdr = dbp->data;
-        dep = (xfs_dir2_data_entry_t *)((char *)data + off);
+        dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
-        longest = be16_to_cpu(data->hdr.bestfree[0].length);
+        longest = be16_to_cpu(hdr->bestfree[0].length);
        needlog = needscan = 0;
        xfs_dir2_data_make_free(tp, dbp, off,
                xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -974,7 +896,7 @@ xfs_dir2_leafn_remove(
         * Log the data block header if needed.
         */
        if (needscan)
-                xfs_dir2_data_freescan(mp, data, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        if (needlog)
                xfs_dir2_data_log_header(tp, dbp);
        xfs_dir2_data_check(dp, dbp);
@@ -982,7 +904,7 @@ xfs_dir2_leafn_remove(
         * If the longest data block freespace changes, need to update
         * the corresponding freeblock entry.
         */
-        if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
+        if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
                int             error;          /* error return value */
                xfs_dabuf_t     *fbp;           /* freeblock buffer */
                xfs_dir2_db_t   fdb;            /* freeblock block number */
@@ -1000,27 +922,27 @@ xfs_dir2_leafn_remove(
                        return error;
                }
                free = fbp->data;
-                ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+                ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                ASSERT(be32_to_cpu(free->hdr.firstdb) ==
-                       XFS_DIR2_MAX_FREE_BESTS(mp) *
+                       xfs_dir2_free_max_bests(mp) *
                       (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
                /*
                 * Calculate which entry we need to fix.
                 */
                findex = xfs_dir2_db_to_fdindex(mp, db);
-                longest = be16_to_cpu(data->hdr.bestfree[0].length);
+                longest = be16_to_cpu(hdr->bestfree[0].length);
                /*
                 * If the data block is now empty we can get rid of it
                 * (usually).
                 */
-                if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+                if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) {
                        /*
                         * Try to punch out the data block.
                         */
                        error = xfs_dir2_shrink_inode(args, db, dbp);
                        if (error == 0) {
                                dblk->bp = NULL;
-                                data = NULL;
+                                hdr = NULL;
                        }
                        /*
                         * We can get ENOSPC if there's no space reservation.
@@ -1036,7 +958,7 @@ xfs_dir2_leafn_remove(
                 * If we got rid of the data block, we can eliminate that entry
                 * in the free block.
                 */
-                if (data == NULL) {
+                if (hdr == NULL) {
                        /*
                         * One less used entry in the free table.
                         */
@@ -1052,7 +974,8 @@ xfs_dir2_leafn_remove(
                                int     i;              /* free entry index */
                                for (i = findex - 1;
-                                     i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
+                                     i >= 0 &&
+                                     free->bests[i] == cpu_to_be16(NULLDATAOFF);
                                     i--)
                                        continue;
                                free->hdr.nvalid = cpu_to_be32(i + 1);
@@ -1209,7 +1132,7 @@ xfs_dir2_leafn_toosmall(
         */
        blk = &state->path.blk[state->path.active - 1];
        info = blk->bp->data;
-        ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        leaf = (xfs_dir2_leaf_t *)info;
        count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
        bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1268,7 +1191,7 @@ xfs_dir2_leafn_toosmall(
                count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
                bytes = state->blocksize - (state->blocksize >> 2);
                leaf = bp->data;
-                ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+                ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
                count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
                bytes -= count * (uint)sizeof(leaf->ents[0]);
                /*
@@ -1327,8 +1250,8 @@ xfs_dir2_leafn_unbalance(
        ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
        drop_leaf = drop_blk->bp->data;
        save_leaf = save_blk->bp->data;
-        ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
-        ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+        ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
        /*
         * If there are any stale leaf entries, take this opportunity
         * to purge them.
@@ -1432,7 +1355,7 @@ xfs_dir2_node_addname_int(
        xfs_da_args_t           *args,          /* operation arguments */
        xfs_da_state_blk_t      *fblk)          /* optional freespace block */
 {
-        xfs_dir2_data_t         *data;          /* data block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dir2_db_t           dbno;           /* data block number */
        xfs_dabuf_t             *dbp;           /* data block buffer */
        xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
@@ -1469,7 +1392,7 @@ xfs_dir2_node_addname_int(
                 */
                ifbno = fblk->blkno;
                free = fbp->data;
-                ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+                ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                findex = fblk->index;
                /*
                 * This means the free entry showed that the data block had
@@ -1553,7 +1476,7 @@ xfs_dir2_node_addname_int(
                                continue;
                        }
                        free = fbp->data;
-                        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+                        ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                        findex = 0;
                }
                /*
@@ -1680,12 +1603,12 @@ xfs_dir2_node_addname_int(
                        free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
                        free->hdr.firstdb = cpu_to_be32(
                                (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
-                                XFS_DIR2_MAX_FREE_BESTS(mp));
+                                xfs_dir2_free_max_bests(mp));
                        free->hdr.nvalid = 0;
                        free->hdr.nused = 0;
                } else {
                        free = fbp->data;
-                        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+                        ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
                }
                /*
@@ -1697,7 +1620,7 @@ xfs_dir2_node_addname_int(
                 * freespace block, extend that table.
                 */
                if (findex >= be32_to_cpu(free->hdr.nvalid)) {
-                        ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
+                        ASSERT(findex < xfs_dir2_free_max_bests(mp));
                        free->hdr.nvalid = cpu_to_be32(findex + 1);
                        /*
                         * Tag new entry so nused will go up.
@@ -1708,7 +1631,7 @@ xfs_dir2_node_addname_int(
                 * If this entry was for an empty data block
                 * (this should always be true) then update the header.
                 */
-                if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
+                if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) {
                        be32_add_cpu(&free->hdr.nused, 1);
                        xfs_dir2_free_log_header(tp, fbp);
                }
@@ -1717,8 +1640,8 @@ xfs_dir2_node_addname_int(
                 * We haven't allocated the data entry yet so this will
                 * change again.
                 */
-                data = dbp->data;
+                hdr = dbp->data;
-                free->bests[findex] = data->hdr.bestfree[0].length;
+                free->bests[findex] = hdr->bestfree[0].length;
                logfree = 1;
        }
        /*
@@ -1743,21 +1666,21 @@ xfs_dir2_node_addname_int(
                                xfs_da_buf_done(fbp);
                        return error;
                }
-                data = dbp->data;
+                hdr = dbp->data;
                logfree = 0;
        }
-        ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
+        ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
        /*
         * Point to the existing unused space.
         */
        dup = (xfs_dir2_data_unused_t *)
-              ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+              ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
        needscan = needlog = 0;
        /*
         * Mark the first part of the unused space, inuse for us.
         */
        xfs_dir2_data_use_free(tp, dbp, dup,
-                (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+                (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
                &needlog, &needscan);
        /*
         * Fill in the new entry and log it.
@@ -1767,13 +1690,13 @@ xfs_dir2_node_addname_int(
        dep->namelen = args->namelen;
        memcpy(dep->name, args->name, dep->namelen);
        tagp = xfs_dir2_data_entry_tag_p(dep);
-        *tagp = cpu_to_be16((char *)dep - (char *)data);
+        *tagp = cpu_to_be16((char *)dep - (char *)hdr);
        xfs_dir2_data_log_entry(tp, dbp, dep);
        /*
         * Rescan the block for bestfree if needed.
         */
        if (needscan)
-                xfs_dir2_data_freescan(mp, data, &needlog);
+                xfs_dir2_data_freescan(mp, hdr, &needlog);
        /*
         * Log the data block header if needed.
         */
@@ -1782,8 +1705,8 @@ xfs_dir2_node_addname_int(
        /*
         * If the freespace entry is now wrong, update it.
         */
-        if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
+        if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) {
-                free->bests[findex] = data->hdr.bestfree[0].length;
+                free->bests[findex] = hdr->bestfree[0].length;
                logfree = 1;
        }
        /*
@@ -1933,7 +1856,7 @@ xfs_dir2_node_replace(
        xfs_da_args_t           *args)          /* operation arguments */
 {
        xfs_da_state_blk_t      *blk;           /* leaf block */
-        xfs_dir2_data_t         *data;          /* data block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* data block header */
        xfs_dir2_data_entry_t   *dep;           /* data entry changed */
        int                     error;          /* error return value */
        int                     i;              /* btree level */
@@ -1977,10 +1900,10 @@ xfs_dir2_node_replace(
                /*
                 * Point to the data entry.
                 */
-                data = state->extrablk.bp->data;
+                hdr = state->extrablk.bp->data;
-                ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+                ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
                dep = (xfs_dir2_data_entry_t *)
-                      ((char *)data +
+                      ((char *)hdr +
                       xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
                ASSERT(inum != be64_to_cpu(dep->inumber));
                /*
@@ -2044,7 +1967,7 @@ xfs_dir2_node_trim_free(
                return 0;
        }
        free = bp->data;
-        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+        ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
        /*
         * If there are used entries, there's nothing to do.
         */
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
deleted file mode 100644
index 82dfe7147195..000000000000
--- a/fs/xfs/xfs_dir2_node.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_NODE_H__
-#define __XFS_DIR2_NODE_H__
-/*
- * Directory version 2, btree node format structures
- */
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_da_state;
-struct xfs_da_state_blk;
-struct xfs_inode;
-struct xfs_trans;
-/*
- * Offset of the freespace index.
- */
-#define XFS_DIR2_FREE_SPACE     2
-#define XFS_DIR2_FREE_OFFSET    (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_FREE_FIRSTDB(mp)       \
-        xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
-#define XFS_DIR2_FREE_MAGIC     0x58443246      /* XD2F */
-typedef struct xfs_dir2_free_hdr {
-        __be32                  magic;          /* XFS_DIR2_FREE_MAGIC */
-        __be32                  firstdb;        /* db of first entry */
-        __be32                  nvalid;         /* count of valid entries */
-        __be32                  nused;          /* count of used entries */
-} xfs_dir2_free_hdr_t;
-typedef struct xfs_dir2_free {
-        xfs_dir2_free_hdr_t     hdr;            /* block header */
-        __be16                  bests[1];       /* best free counts */
-                                                /* unused entries are -1 */
-} xfs_dir2_free_t;
-#define XFS_DIR2_MAX_FREE_BESTS(mp)     \
-        (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
-         (uint)sizeof(xfs_dir2_data_off_t))
-/*
- * Convert data space db to the corresponding free db.
- */
-static inline xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-        return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static inline int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
-        return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
-                                 struct xfs_dabuf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
-                                     struct xfs_da_args *args, int *indexp,
-                                     struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
-                                struct xfs_dabuf *leaf2_bp);
-extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
-                                struct xfs_da_state_blk *oldblk,
-                                struct xfs_da_state_blk *newblk);
-extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
-extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
-                                     struct xfs_da_state_blk *drop_blk,
-                                     struct xfs_da_state_blk *save_blk);
-extern int xfs_dir2_node_addname(struct xfs_da_args *args);
-extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_node_removename(struct xfs_da_args *args);
-extern int xfs_dir2_node_replace(struct xfs_da_args *args);
-extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
-                                   int *rvalp);
-#endif  /* __XFS_DIR2_NODE_H__ */
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
new file mode 100644
index 000000000000..067f403ecf8a
--- /dev/null
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_PRIV_H__
+#define __XFS_DIR2_PRIV_H__
+/* xfs_dir2.c */
+extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+                                xfs_dir2_db_t *dbp);
+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+                                struct xfs_dabuf *bp);
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
+                                const unsigned char *name, int len);
+/* xfs_dir2_block.c */
+extern int xfs_dir2_block_addname(struct xfs_da_args *args);
+extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
+                xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_block_removename(struct xfs_da_args *args);
+extern int xfs_dir2_block_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
+                struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
+/* xfs_dir2_data.c */
+#ifdef DEBUG
+extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+#else
+#define xfs_dir2_data_check(dp,bp)
+#endif
+extern struct xfs_dir2_data_free *
+xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
+                struct xfs_dir2_data_unused *dup, int *loghead);
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
+                struct xfs_dir2_data_hdr *hdr, int *loghead);
+extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+                struct xfs_dabuf **bpp);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                struct xfs_dir2_data_entry *dep);
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
+                struct xfs_dabuf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                struct xfs_dir2_data_unused *dup);
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
+                int *needlogp, int *needscanp);
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
+                xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
+/* xfs_dir2_leaf.c */
+extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
+                struct xfs_dabuf *dbp);
+extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
+extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
+                struct xfs_dabuf *bp);
+extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+                int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
+extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
+                size_t bufsize, xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
+                struct xfs_dabuf **bpp, int magic);
+extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                int first, int last);
+extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
+                struct xfs_dabuf *bp);
+extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+                struct xfs_dabuf *lbp);
+extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
+                struct xfs_dabuf *lbp, xfs_dir2_db_t db);
+extern struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
+                int lowstale, int highstale,
+                int *lfloglow, int *lfloghigh);
+extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+/* xfs_dir2_node.c */
+extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
+                struct xfs_dabuf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+                struct xfs_da_args *args, int *indexp,
+                struct xfs_da_state *state);
+extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
+                struct xfs_dabuf *leaf2_bp);
+extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
+        struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
+extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+                struct xfs_da_state_blk *drop_blk,
+                struct xfs_da_state_blk *save_blk);
+extern int xfs_dir2_node_addname(struct xfs_da_args *args);
+extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_node_removename(struct xfs_da_args *args);
+extern int xfs_dir2_node_replace(struct xfs_da_args *args);
+extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+                int *rvalp);
+/* xfs_dir2_sf.c */
+extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
+extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
+                struct xfs_dir2_sf_entry *sfep);
+extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
+                struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+                int size, xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
+extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
+                xfs_off_t *offset, filldir_t filldir);
+extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index b1bae6b1eed9..79d05e84e296 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -23,18 +23,16 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_error.h"
-#include "xfs_dir2_data.h"
+#include "xfs_dir2.h"
-#include "xfs_dir2_leaf.h"
+#include "xfs_dir2_format.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2_priv.h"
 #include "xfs_trace.h"
 /*
@@ -60,6 +58,82 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
 #endif /* XFS_BIG_INUMS */
 /*
+ * Inode numbers in short-form directories can come in two versions,
+ * either 4 bytes or 8 bytes wide.  These helpers deal with the
+ * two forms transparently by looking at the headers i8count field.
+ *
+ * For 64-bit inode number the most significant byte must be zero.
+ */
+static xfs_ino_t
+xfs_dir2_sf_get_ino(
+        struct xfs_dir2_sf_hdr  *hdr,
+        xfs_dir2_inou_t         *from)
+{
+        if (hdr->i8count)
+                return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
+        else
+                return get_unaligned_be32(&from->i4.i);
+}
+static void
+xfs_dir2_sf_put_ino(
+        struct xfs_dir2_sf_hdr  *hdr,
+        xfs_dir2_inou_t         *to,
+        xfs_ino_t               ino)
+{
+        ASSERT((ino & 0xff00000000000000ULL) == 0);
+        if (hdr->i8count)
+                put_unaligned_be64(ino, &to->i8.i);
+        else
+                put_unaligned_be32(ino, &to->i4.i);
+}
+xfs_ino_t
+xfs_dir2_sf_get_parent_ino(
+        struct xfs_dir2_sf_hdr  *hdr)
+{
+        return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
+}
+static void
+xfs_dir2_sf_put_parent_ino(
+        struct xfs_dir2_sf_hdr  *hdr,
+        xfs_ino_t               ino)
+{
+        xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
+}
+/*
+ * In short-form directory entries the inode numbers are stored at variable
+ * offset behind the entry name.  The inode numbers may only be accessed
+ * through the helpers below.
+ */
+static xfs_dir2_inou_t *
+xfs_dir2_sfe_inop(
+        struct xfs_dir2_sf_entry *sfep)
+{
+        return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
+}
+xfs_ino_t
+xfs_dir2_sfe_get_ino(
+        struct xfs_dir2_sf_hdr  *hdr,
+        struct xfs_dir2_sf_entry *sfep)
+{
+        return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
+}
+static void
+xfs_dir2_sfe_put_ino(
+        struct xfs_dir2_sf_hdr  *hdr,
+        struct xfs_dir2_sf_entry *sfep,
+        xfs_ino_t               ino)
+{
+        xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
+}
+/*
 * Given a block directory (dp/block), calculate its size as a shortform (sf)
 * directory and a header for the sf directory, if it will fit it the
 * space currently present in the inode.  If it won't fit, the output
@@ -68,7 +142,7 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
 int                                             /* size for sf form */
 xfs_dir2_block_sfsize(
        xfs_inode_t             *dp,            /* incore inode pointer */
-        xfs_dir2_block_t        *block,         /* block directory data */
+        xfs_dir2_data_hdr_t     *hdr,           /* block directory data */
        xfs_dir2_sf_hdr_t       *sfhp)          /* output: header for sf form */
 {
        xfs_dir2_dataptr_t      addr;           /* data entry address */
@@ -88,7 +162,7 @@ xfs_dir2_block_sfsize(
        mp = dp->i_mount;
        count = i8count = namelen = 0;
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
        blp = xfs_dir2_block_leaf_p(btp);
        /*
@@ -101,7 +175,7 @@ xfs_dir2_block_sfsize(
                 * Calculate the pointer to the entry at hand.
                 */
                dep = (xfs_dir2_data_entry_t *)
-                      ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+                      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
                /*
                 * Detect . and .., so we can special-case them.
                 * . is not included in sf directories.
@@ -138,7 +212,7 @@ xfs_dir2_block_sfsize(
         */
        sfhp->count = count;
        sfhp->i8count = i8count;
-        xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
+        xfs_dir2_sf_put_parent_ino(sfhp, parent);
        return size;
 }
@@ -153,7 +227,7 @@ xfs_dir2_block_to_sf(
        int                     size,           /* shortform directory size */
        xfs_dir2_sf_hdr_t       *sfhp)          /* shortform directory hdr */
 {
-        xfs_dir2_block_t        *block;         /* block structure */
+        xfs_dir2_data_hdr_t     *hdr;           /* block header */
        xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
        xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
        xfs_inode_t             *dp;            /* incore directory inode */
@@ -164,8 +238,7 @@ xfs_dir2_block_to_sf(
        xfs_mount_t             *mp;            /* filesystem mount point */
        char                    *ptr;           /* current data pointer */
        xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform directory header */
-        xfs_ino_t               temp;
        trace_xfs_dir2_block_to_sf(args);
@@ -176,13 +249,14 @@ xfs_dir2_block_to_sf(
         * Make a copy of the block data, so we can shrink the inode
         * and add local data.
         */
-        block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
+        hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
-        memcpy(block, bp->data, mp->m_dirblksize);
+        memcpy(hdr, bp->data, mp->m_dirblksize);
        logflags = XFS_ILOG_CORE;
        if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
                ASSERT(error != ENOSPC);
                goto out;
        }
        /*
         * The buffer is now unconditionally gone, whether
         * xfs_dir2_shrink_inode worked or not.
@@ -198,14 +272,14 @@ xfs_dir2_block_to_sf(
        /*
         * Copy the header into the newly allocate local space.
         */
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
        dp->i_d.di_size = size;
        /*
         * Set up to loop over the block's entries.
         */
-        btp = xfs_dir2_block_tail_p(mp, block);
+        btp = xfs_dir2_block_tail_p(mp, hdr);
-        ptr = (char *)block->u;
+        ptr = (char *)(hdr + 1);
        endptr = (char *)xfs_dir2_block_leaf_p(btp);
        sfep = xfs_dir2_sf_firstentry(sfp);
        /*
@@ -233,7 +307,7 @@ xfs_dir2_block_to_sf(
                else if (dep->namelen == 2 &&
                         dep->name[0] == '.' && dep->name[1] == '.')
                        ASSERT(be64_to_cpu(dep->inumber) ==
-                               xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+                               xfs_dir2_sf_get_parent_ino(sfp));
                /*
                 * Normal entry, copy it into shortform.
                 */
@@ -241,11 +315,11 @@ xfs_dir2_block_to_sf(
                        sfep->namelen = dep->namelen;
                        xfs_dir2_sf_put_offset(sfep,
                                (xfs_dir2_data_aoff_t)
-                                ((char *)dep - (char *)block));
+                                ((char *)dep - (char *)hdr));
                        memcpy(sfep->name, dep->name, dep->namelen);
-                        temp = be64_to_cpu(dep->inumber);
+                        xfs_dir2_sfe_put_ino(sfp, sfep,
-                        xfs_dir2_sf_put_inumber(sfp, &temp,
+                                             be64_to_cpu(dep->inumber));
-                                xfs_dir2_sf_inumberp(sfep));
                        sfep = xfs_dir2_sf_nextentry(sfp, sfep);
                }
                ptr += xfs_dir2_data_entsize(dep->namelen);
@@ -254,7 +328,7 @@ xfs_dir2_block_to_sf(
        xfs_dir2_sf_check(args);
 out:
        xfs_trans_log_inode(args->trans, dp, logflags);
-        kmem_free(block);
+        kmem_free(hdr);
        return error;
 }
@@ -277,7 +351,7 @@ xfs_dir2_sf_addname(
        xfs_dir2_data_aoff_t    offset = 0;     /* offset for new entry */
        int                     old_isize;      /* di_size before adding name */
        int                     pick;           /* which algorithm to use */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        xfs_dir2_sf_entry_t     *sfep = NULL;   /* shortform entry */
        trace_xfs_dir2_sf_addname(args);
@@ -294,19 +368,19 @@ xfs_dir2_sf_addname(
        }
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
        /*
         * Compute entry (and change in) size.
         */
-        add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+        add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
        incr_isize = add_entsize;
        objchange = 0;
 #if XFS_BIG_INUMS
        /*
         * Do we have to change to 8 byte inodes?
         */
-        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
                /*
                 * Yes, adjust the entry size and the total size.
                 */
@@ -314,7 +388,7 @@ xfs_dir2_sf_addname(
                        (uint)sizeof(xfs_dir2_ino8_t) -
                        (uint)sizeof(xfs_dir2_ino4_t);
                incr_isize +=
-                        (sfp->hdr.count + 2) *
+                        (sfp->count + 2) *
                        ((uint)sizeof(xfs_dir2_ino8_t) -
                         (uint)sizeof(xfs_dir2_ino4_t));
                objchange = 1;
@@ -384,21 +458,21 @@ xfs_dir2_sf_addname_easy(
 {
        int                     byteoff;        /* byte offset in sf dir */
        xfs_inode_t             *dp;            /* incore directory inode */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        dp = args->dp;
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        byteoff = (int)((char *)sfep - (char *)sfp);
        /*
         * Grow the in-inode space.
         */
-        xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
+        xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
                XFS_DATA_FORK);
        /*
         * Need to set up again due to realloc of the inode data.
         */
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
        /*
         * Fill in the new entry.
@@ -406,15 +480,14 @@ xfs_dir2_sf_addname_easy(
        sfep->namelen = args->namelen;
        xfs_dir2_sf_put_offset(sfep, offset);
        memcpy(sfep->name, args->name, sfep->namelen);
-        xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+        xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
-                xfs_dir2_sf_inumberp(sfep));
        /*
         * Update the header and inode.
         */
-        sfp->hdr.count++;
+        sfp->count++;
 #if XFS_BIG_INUMS
        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
-                sfp->hdr.i8count++;
+                sfp->i8count++;
 #endif
        dp->i_d.di_size = new_isize;
        xfs_dir2_sf_check(args);
@@ -444,19 +517,19 @@ xfs_dir2_sf_addname_hard(
        xfs_dir2_data_aoff_t    offset;         /* current offset value */
        int                     old_isize;      /* previous di_size */
        xfs_dir2_sf_entry_t     *oldsfep;       /* entry in original dir */
-        xfs_dir2_sf_t           *oldsfp;        /* original shortform dir */
+        xfs_dir2_sf_hdr_t       *oldsfp;        /* original shortform dir */
        xfs_dir2_sf_entry_t     *sfep;          /* entry in new dir */
-        xfs_dir2_sf_t           *sfp;           /* new shortform dir */
+        xfs_dir2_sf_hdr_t       *sfp;           /* new shortform dir */
        /*
         * Copy the old directory to the stack buffer.
         */
        dp = args->dp;
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        old_isize = (int)dp->i_d.di_size;
        buf = kmem_alloc(old_isize, KM_SLEEP);
-        oldsfp = (xfs_dir2_sf_t *)buf;
+        oldsfp = (xfs_dir2_sf_hdr_t *)buf;
        memcpy(oldsfp, sfp, old_isize);
        /*
         * Loop over the old directory finding the place we're going
@@ -485,7 +558,7 @@ xfs_dir2_sf_addname_hard(
        /*
         * Reset the pointer since the buffer was reallocated.
         */
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        /*
         * Copy the first part of the directory, including the header.
         */
@@ -498,12 +571,11 @@ xfs_dir2_sf_addname_hard(
        sfep->namelen = args->namelen;
        xfs_dir2_sf_put_offset(sfep, offset);
        memcpy(sfep->name, args->name, sfep->namelen);
-        xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+        xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
-                xfs_dir2_sf_inumberp(sfep));
+        sfp->count++;
-        sfp->hdr.count++;
 #if XFS_BIG_INUMS
        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
-                sfp->hdr.i8count++;
+                sfp->i8count++;
 #endif
        /*
         * If there's more left to copy, do that.
@@ -537,14 +609,14 @@ xfs_dir2_sf_addname_pick(
        xfs_mount_t             *mp;            /* filesystem mount point */
        xfs_dir2_data_aoff_t    offset;         /* data block offset */
        xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        int                     size;           /* entry's data size */
        int                     used;           /* data bytes used */
        dp = args->dp;
        mp = dp->i_mount;
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        size = xfs_dir2_data_entsize(args->namelen);
        offset = XFS_DIR2_DATA_FIRST_OFFSET;
        sfep = xfs_dir2_sf_firstentry(sfp);
@@ -554,7 +626,7 @@ xfs_dir2_sf_addname_pick(
         * Keep track of data offset and whether we've seen a place
         * to insert the new entry.
         */
-        for (i = 0; i < sfp->hdr.count; i++) {
+        for (i = 0; i < sfp->count; i++) {
                if (!holefit)
                        holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
                offset = xfs_dir2_sf_get_offset(sfep) +
@@ -566,7 +638,7 @@ xfs_dir2_sf_addname_pick(
         * was a data block (block form directory).
         */
        used = offset +
-               (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+               (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
               (uint)sizeof(xfs_dir2_block_tail_t);
        /*
         * If it won't fit in a block form then we can't insert it,
@@ -612,30 +684,30 @@ xfs_dir2_sf_check(
        xfs_ino_t               ino;            /* entry inode number */
        int                     offset;         /* data offset */
        xfs_dir2_sf_entry_t     *sfep;          /* shortform dir entry */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        dp = args->dp;
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        offset = XFS_DIR2_DATA_FIRST_OFFSET;
-        ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+        ino = xfs_dir2_sf_get_parent_ino(sfp);
        i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-             i < sfp->hdr.count;
+             i < sfp->count;
             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
-                ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+                ino = xfs_dir2_sfe_get_ino(sfp, sfep);
                i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
                offset =
                        xfs_dir2_sf_get_offset(sfep) +
                        xfs_dir2_data_entsize(sfep->namelen);
        }
-        ASSERT(i8count == sfp->hdr.i8count);
+        ASSERT(i8count == sfp->i8count);
        ASSERT(XFS_BIG_INUMS || i8count == 0);
        ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
        ASSERT(offset +
-               (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+               (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
               (uint)sizeof(xfs_dir2_block_tail_t) <=
               dp->i_mount->m_dirblksize);
 }
@@ -651,7 +723,7 @@ xfs_dir2_sf_create(
 {
        xfs_inode_t     *dp;            /* incore directory inode */
        int             i8count;        /* parent inode is an 8-byte number */
-        xfs_dir2_sf_t   *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t *sfp;         /* shortform structure */
        int             size;           /* directory size */
        trace_xfs_dir2_sf_create(args);
@@ -681,13 +753,13 @@ xfs_dir2_sf_create(
        /*
         * Fill in the header,
         */
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        sfp->hdr.i8count = i8count;
+        sfp->i8count = i8count;
        /*
         * Now can put in the inode number, since i8count is set.
         */
-        xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
+        xfs_dir2_sf_put_parent_ino(sfp, pino);
-        sfp->hdr.count = 0;
+        sfp->count = 0;
        dp->i_d.di_size = size;
        xfs_dir2_sf_check(args);
        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -705,7 +777,7 @@ xfs_dir2_sf_getdents(
        xfs_mount_t             *mp;            /* filesystem mount point */
        xfs_dir2_dataptr_t      off;            /* current entry's offset */
        xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        xfs_dir2_dataptr_t      dot_offset;
        xfs_dir2_dataptr_t      dotdot_offset;
        xfs_ino_t               ino;
@@ -724,9 +796,9 @@ xfs_dir2_sf_getdents(
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
        /*
         * If the block number in the offset is out of range, we're done.
@@ -759,7 +831,7 @@ xfs_dir2_sf_getdents(
         * Put .. entry unless we're starting past it.
         */
        if (*offset <= dotdot_offset) {
-                ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+                ino = xfs_dir2_sf_get_parent_ino(sfp);
                if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) {
                        *offset = dotdot_offset & 0x7fffffff;
                        return 0;
@@ -770,7 +842,7 @@ xfs_dir2_sf_getdents(
         * Loop while there are more entries and put'ing works.
         */
        sfep = xfs_dir2_sf_firstentry(sfp);
-        for (i = 0; i < sfp->hdr.count; i++) {
+        for (i = 0; i < sfp->count; i++) {
                off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                xfs_dir2_sf_get_offset(sfep));
@@ -779,7 +851,7 @@ xfs_dir2_sf_getdents(
                        continue;
                }
-                ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+                ino = xfs_dir2_sfe_get_ino(sfp, sfep);
                if (filldir(dirent, (char *)sfep->name, sfep->namelen,
                            off & 0x7fffffff, ino, DT_UNKNOWN)) {
                        *offset = off & 0x7fffffff;
@@ -805,7 +877,7 @@ xfs_dir2_sf_lookup(
        int                     i;              /* entry index */
        int                     error;
        xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        enum xfs_dacmp          cmp;            /* comparison result */
        xfs_dir2_sf_entry_t     *ci_sfep;       /* case-insens. entry */
@@ -824,8 +896,8 @@ xfs_dir2_sf_lookup(
        }
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
        /*
         * Special case for .
         */
@@ -839,7 +911,7 @@ xfs_dir2_sf_lookup(
         */
        if (args->namelen == 2 &&
            args->name[0] == '.' && args->name[1] == '.') {
-                args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+                args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
                args->cmpresult = XFS_CMP_EXACT;
                return XFS_ERROR(EEXIST);
        }
@@ -847,7 +919,7 @@ xfs_dir2_sf_lookup(
         * Loop over all the entries trying to match ours.
         */
        ci_sfep = NULL;
-        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
                                i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                /*
                 * Compare name and if it's an exact match, return the inode
@@ -858,8 +930,7 @@ xfs_dir2_sf_lookup(
                                                                sfep->namelen);
                if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
                        args->cmpresult = cmp;
-                        args->inumber = xfs_dir2_sf_get_inumber(sfp,
+                        args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
-                                                xfs_dir2_sf_inumberp(sfep));
                        if (cmp == XFS_CMP_EXACT)
                                return XFS_ERROR(EEXIST);
                        ci_sfep = sfep;
@@ -891,7 +962,7 @@ xfs_dir2_sf_removename(
        int                     newsize;        /* new inode size */
        int                     oldsize;        /* old inode size */
        xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        trace_xfs_dir2_sf_removename(args);
@@ -908,32 +979,31 @@ xfs_dir2_sf_removename(
        }
        ASSERT(dp->i_df.if_bytes == oldsize);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+        ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
        /*
         * Loop over the old directory entries.
         * Find the one we're deleting.
         */
-        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
                                i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
                                                                XFS_CMP_EXACT) {
-                        ASSERT(xfs_dir2_sf_get_inumber(sfp,
+                        ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
-                                                xfs_dir2_sf_inumberp(sfep)) ==
+                               args->inumber);
-                                                                args->inumber);
                        break;
                }
        }
        /*
         * Didn't find it.
         */
-        if (i == sfp->hdr.count)
+        if (i == sfp->count)
                return XFS_ERROR(ENOENT);
        /*
         * Calculate sizes.
         */
        byteoff = (int)((char *)sfep - (char *)sfp);
-        entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+        entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
        newsize = oldsize - entsize;
        /*
         * Copy the part if any after the removed entry, sliding it down.
@@ -944,22 +1014,22 @@ xfs_dir2_sf_removename(
        /*
         * Fix up the header and file size.
         */
-        sfp->hdr.count--;
+        sfp->count--;
        dp->i_d.di_size = newsize;
        /*
         * Reallocate, making it smaller.
         */
        xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 #if XFS_BIG_INUMS
        /*
         * Are we changing inode number size?
         */
        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
-                if (sfp->hdr.i8count == 1)
+                if (sfp->i8count == 1)
                        xfs_dir2_sf_toino4(args);
                else
-                        sfp->hdr.i8count--;
+                        sfp->i8count--;
        }
 #endif
        xfs_dir2_sf_check(args);
@@ -983,7 +1053,7 @@ xfs_dir2_sf_replace(
        int                     i8elevated;     /* sf_toino8 set i8count=1 */
 #endif
        xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
        trace_xfs_dir2_sf_replace(args);
@@ -999,19 +1069,19 @@ xfs_dir2_sf_replace(
        }
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+        ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
 #if XFS_BIG_INUMS
        /*
         * New inode number is large, and need to convert to 8-byte inodes.
         */
-        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+        if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
                int     error;                  /* error return value */
                int     newsize;                /* new inode size */
                newsize =
                        dp->i_df.if_bytes +
-                        (sfp->hdr.count + 1) *
+                        (sfp->count + 1) *
                        ((uint)sizeof(xfs_dir2_ino8_t) -
                         (uint)sizeof(xfs_dir2_ino4_t));
                /*
@@ -1029,7 +1099,7 @@ xfs_dir2_sf_replace(
                 */
                xfs_dir2_sf_toino8(args);
                i8elevated = 1;
-                sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+                sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        } else
                i8elevated = 0;
 #endif
@@ -1040,34 +1110,32 @@ xfs_dir2_sf_replace(
        if (args->namelen == 2 &&
            args->name[0] == '.' && args->name[1] == '.') {
 #if XFS_BIG_INUMS || defined(DEBUG)
-                ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+                ino = xfs_dir2_sf_get_parent_ino(sfp);
                ASSERT(args->inumber != ino);
 #endif
-                xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
+                xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
        }
        /*
         * Normal entry, look for the name.
         */
        else {
                for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-                                i < sfp->hdr.count;
+                                i < sfp->count;
                                i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
                        if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
                                                                XFS_CMP_EXACT) {
 #if XFS_BIG_INUMS || defined(DEBUG)
-                                ino = xfs_dir2_sf_get_inumber(sfp,
+                                ino = xfs_dir2_sfe_get_ino(sfp, sfep);
-                                        xfs_dir2_sf_inumberp(sfep));
                                ASSERT(args->inumber != ino);
 #endif
-                                xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+                                xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
-                                        xfs_dir2_sf_inumberp(sfep));
                                break;
                        }
                }
                /*
                 * Didn't find it.
                 */
-                if (i == sfp->hdr.count) {
+                if (i == sfp->count) {
                        ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 #if XFS_BIG_INUMS
                        if (i8elevated)
@@ -1085,10 +1153,10 @@ xfs_dir2_sf_replace(
                /*
                 * And the old count was one, so need to convert to small.
                 */
-                if (sfp->hdr.i8count == 1)
+                if (sfp->i8count == 1)
                        xfs_dir2_sf_toino4(args);
                else
-                        sfp->hdr.i8count--;
+                        sfp->i8count--;
        }
        /*
         * See if the old number was small, the new number is large.
@@ -1099,9 +1167,9 @@ xfs_dir2_sf_replace(
                 * add to the i8count unless we just converted to 8-byte
                 * inodes (which does an implied i8count = 1)
                 */
-                ASSERT(sfp->hdr.i8count != 0);
+                ASSERT(sfp->i8count != 0);
                if (!i8elevated)
-                        sfp->hdr.i8count++;
+                        sfp->i8count++;
        }
 #endif
        xfs_dir2_sf_check(args);
@@ -1121,13 +1189,12 @@ xfs_dir2_sf_toino4(
        char                    *buf;           /* old dir's buffer */
        xfs_inode_t             *dp;            /* incore directory inode */
        int                     i;              /* entry index */
-        xfs_ino_t               ino;            /* entry inode number */
        int                     newsize;        /* new inode size */
        xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
-        xfs_dir2_sf_t           *oldsfp;        /* old sf directory */
+        xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
        int                     oldsize;        /* old inode size */
        xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
-        xfs_dir2_sf_t           *sfp;           /* new sf directory */
+        xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
        trace_xfs_dir2_sf_toino4(args);
@@ -1140,44 +1207,42 @@ xfs_dir2_sf_toino4(
         */
        oldsize = dp->i_df.if_bytes;
        buf = kmem_alloc(oldsize, KM_SLEEP);
-        oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        ASSERT(oldsfp->hdr.i8count == 1);
+        ASSERT(oldsfp->i8count == 1);
        memcpy(buf, oldsfp, oldsize);
        /*
         * Compute the new inode size.
         */
        newsize =
                oldsize -
-                (oldsfp->hdr.count + 1) *
+                (oldsfp->count + 1) *
                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
        xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
        xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
        /*
         * Reset our pointers, the data has moved.
         */
-        oldsfp = (xfs_dir2_sf_t *)buf;
+        oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        /*
         * Fill in the new header.
         */
-        sfp->hdr.count = oldsfp->hdr.count;
+        sfp->count = oldsfp->count;
-        sfp->hdr.i8count = 0;
+        sfp->i8count = 0;
-        ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+        xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
-        xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
        /*
         * Copy the entries field by field.
         */
        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
                    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-             i < sfp->hdr.count;
+             i < sfp->count;
             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
                  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
                sfep->namelen = oldsfep->namelen;
                sfep->offset = oldsfep->offset;
                memcpy(sfep->name, oldsfep->name, sfep->namelen);
-                ino = xfs_dir2_sf_get_inumber(oldsfp,
+                xfs_dir2_sfe_put_ino(sfp, sfep,
-                        xfs_dir2_sf_inumberp(oldsfep));
+                        xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
-                xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
        }
        /*
         * Clean up the inode.
@@ -1199,13 +1264,12 @@ xfs_dir2_sf_toino8(
        char                    *buf;           /* old dir's buffer */
        xfs_inode_t             *dp;            /* incore directory inode */
        int                     i;              /* entry index */
-        xfs_ino_t               ino;            /* entry inode number */
        int                     newsize;        /* new inode size */
        xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
-        xfs_dir2_sf_t           *oldsfp;        /* old sf directory */
+        xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
        int                     oldsize;        /* old inode size */
        xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
-        xfs_dir2_sf_t           *sfp;           /* new sf directory */
+        xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
        trace_xfs_dir2_sf_toino8(args);
@@ -1218,44 +1282,42 @@ xfs_dir2_sf_toino8(
         */
        oldsize = dp->i_df.if_bytes;
        buf = kmem_alloc(oldsize, KM_SLEEP);
-        oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-        ASSERT(oldsfp->hdr.i8count == 0);
+        ASSERT(oldsfp->i8count == 0);
        memcpy(buf, oldsfp, oldsize);
        /*
         * Compute the new inode size.
         */
        newsize =
                oldsize +
-                (oldsfp->hdr.count + 1) *
+                (oldsfp->count + 1) *
                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
        xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
        xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
        /*
         * Reset our pointers, the data has moved.
         */
-        oldsfp = (xfs_dir2_sf_t *)buf;
+        oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-        sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        /*
         * Fill in the new header.
         */
-        sfp->hdr.count = oldsfp->hdr.count;
+        sfp->count = oldsfp->count;
-        sfp->hdr.i8count = 1;
+        sfp->i8count = 1;
-        ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+        xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
-        xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
        /*
         * Copy the entries field by field.
         */
        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
                    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-             i < sfp->hdr.count;
+             i < sfp->count;
             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
                  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
                sfep->namelen = oldsfep->namelen;
                sfep->offset = oldsfep->offset;
                memcpy(sfep->name, oldsfep->name, sfep->namelen);
-                ino = xfs_dir2_sf_get_inumber(oldsfp,
+                xfs_dir2_sfe_put_ino(sfp, sfep,
-                        xfs_dir2_sf_inumberp(oldsfep));
+                        xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
-                xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
        }
        /*
         * Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
deleted file mode 100644
index 6ac44b550d39..000000000000
--- a/fs/xfs/xfs_dir2_sf.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_SF_H__
-#define __XFS_DIR2_SF_H__
-/*
- * Directory layout when stored internal to an inode.
- *
- * Small directories are packed as tightly as possible so as to
- * fit into the literal area of the inode.
- */
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_block;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-/*
- * Inode number stored as 8 8-bit values.
- */
-typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
-/*
- * Inode number stored as 4 8-bit values.
- * Works a lot of the time, when all the inode numbers in a directory
- * fit in 32 bits.
- */
-typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
-typedef union {
-        xfs_dir2_ino8_t i8;
-        xfs_dir2_ino4_t i4;
-} xfs_dir2_inou_t;
-#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
-/*
- * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
- * Only need 16 bits, this is the byte offset into the single block form.
- */
-typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
-/*
- * The parent directory has a dedicated field, and the self-pointer must
- * be calculated on the fly.
- *
- * Entries are packed toward the top as tightly as possible.  The header
- * and the elements must be memcpy'd out into a work area to get correct
- * alignment for the inode number fields.
- */
-typedef struct xfs_dir2_sf_hdr {
-        __uint8_t               count;          /* count of entries */
-        __uint8_t               i8count;        /* count of 8-byte inode #s */
-        xfs_dir2_inou_t         parent;         /* parent dir inode number */
-} __arch_pack xfs_dir2_sf_hdr_t;
-typedef struct xfs_dir2_sf_entry {
-        __uint8_t               namelen;        /* actual name length */
-        xfs_dir2_sf_off_t       offset;         /* saved offset */
-        __uint8_t               name[1];        /* name, variable size */
-        xfs_dir2_inou_t         inumber;        /* inode number, var. offset */
-} __arch_pack xfs_dir2_sf_entry_t; 
-typedef struct xfs_dir2_sf {
-        xfs_dir2_sf_hdr_t       hdr;            /* shortform header */
-        xfs_dir2_sf_entry_t     list[1];        /* shortform entries */
-} xfs_dir2_sf_t;
-static inline int xfs_dir2_sf_hdr_size(int i8count)
-{
-        return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
-                ((i8count) == 0) * \
-                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
-{
-        return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
-}
-static inline xfs_intino_t
-xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
-{
-        return ((sfp)->hdr.i8count == 0 ? \
-                (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \
-                (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
-}
-static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
-                                                xfs_dir2_inou_t *to)
-{
-        if ((sfp)->hdr.i8count == 0)
-                XFS_PUT_DIR_INO4(*(from), (to)->i4);
-        else
-                XFS_PUT_DIR_INO8(*(from), (to)->i8);
-}
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
-{
-        return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
-}
-static inline void
-xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
-{
-        INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
-}
-static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
-{
-        return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
-                ((sfp)->hdr.i8count == 0) * \
-                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-static inline int
-xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
-        return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
-                ((sfp)->hdr.i8count == 0) * \
-                ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
-{
-        return ((xfs_dir2_sf_entry_t *) \
-                ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
-}
-static inline xfs_dir2_sf_entry_t *
-xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
-        return ((xfs_dir2_sf_entry_t *) \
-                ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
-}
-/*
- * Functions.
- */
-extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
-                                 struct xfs_dir2_block *block,
-                                 xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
-                                int size, xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
-extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
-                                xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-#endif  /* __XFS_DIR2_SF_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 8f6fc1a96386..c13fed8c394a 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -249,6 +249,11 @@ typedef struct xfs_fsop_resblks {
 #define XFS_MAX_LOG_BYTES \
        ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
+/* Used for sanity checks on superblock */
+#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
+#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) *      \
+                         (s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
 /*
 * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
 */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 84ebeec16642..dd5628bd8d0b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -683,7 +683,7 @@ xfs_dialloc(
                        return 0;
                }
                agi = XFS_BUF_TO_AGI(agbp);
-                ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+                ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
        } else {
                /*
                 * Continue where we left off before.  In this case, we
@@ -691,7 +691,7 @@ xfs_dialloc(
                 */
                agbp = *IO_agbp;
                agi = XFS_BUF_TO_AGI(agbp);
-                ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+                ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
                ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
        }
        mp = tp->t_mountp;
@@ -775,7 +775,7 @@ nextag:
                if (error)
                        goto nextag;
                agi = XFS_BUF_TO_AGI(agbp);
-                ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+                ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
        }
        /*
         * Here with an allocation group that has a free inode.
@@ -944,7 +944,7 @@ nextag:
         * See if the most recently allocated block has any free.
         */
 newino:
-        if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
+        if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
                error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
                                         XFS_LOOKUP_EQ, &i);
                if (error)
@@ -1085,7 +1085,7 @@ xfs_difree(
                return error;
        }
        agi = XFS_BUF_TO_AGI(agbp);
-        ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+        ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
        ASSERT(agbno < be32_to_cpu(agi->agi_length));
        /*
         * Initialize the cursor.
@@ -1438,7 +1438,7 @@ xfs_ialloc_log_agi(
        xfs_agi_t               *agi;   /* allocation group header */
        agi = XFS_BUF_TO_AGI(bp);
-        ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+        ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
 #endif
        /*
         * Compute byte offsets for the first and last fields.
@@ -1492,7 +1492,7 @@ xfs_read_agi(
        /*
         * Validate the magic number of the agi block.
         */
-        agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
+        agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
                XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
                be32_to_cpu(agi->agi_seqno) == agno;
        if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 16921f55c542..c6a75815aea0 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -31,7 +31,6 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
@@ -205,72 +204,6 @@ xfs_inobt_recs_inorder(
 }
 #endif  /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t        *xfs_inobt_trace_buf;
-STATIC void
-xfs_inobt_trace_enter(
-        struct xfs_btree_cur    *cur,
-        const char              *func,
-        char                    *s,
-        int                     type,
-        int                     line,
-        __psunsigned_t          a0,
-        __psunsigned_t          a1,
-        __psunsigned_t          a2,
-        __psunsigned_t          a3,
-        __psunsigned_t          a4,
-        __psunsigned_t          a5,
-        __psunsigned_t          a6,
-        __psunsigned_t          a7,
-        __psunsigned_t          a8,
-        __psunsigned_t          a9,
-        __psunsigned_t          a10)
-{
-        ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
-                (void *)func, (void *)s, NULL, (void *)cur,
-                (void *)a0, (void *)a1, (void *)a2, (void *)a3,
-                (void *)a4, (void *)a5, (void *)a6, (void *)a7,
-                (void *)a8, (void *)a9, (void *)a10);
-}
-STATIC void
-xfs_inobt_trace_cursor(
-        struct xfs_btree_cur    *cur,
-        __uint32_t              *s0,
-        __uint64_t              *l0,
-        __uint64_t              *l1)
-{
-        *s0 = cur->bc_private.a.agno;
-        *l0 = cur->bc_rec.i.ir_startino;
-        *l1 = cur->bc_rec.i.ir_free;
-}
-STATIC void
-xfs_inobt_trace_key(
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_key     *key,
-        __uint64_t              *l0,
-        __uint64_t              *l1)
-{
-        *l0 = be32_to_cpu(key->inobt.ir_startino);
-        *l1 = 0;
-}
-STATIC void
-xfs_inobt_trace_record(
-        struct xfs_btree_cur    *cur,
-        union xfs_btree_rec     *rec,
-        __uint64_t              *l0,
-        __uint64_t              *l1,
-        __uint64_t              *l2)
-{
-        *l0 = be32_to_cpu(rec->inobt.ir_startino);
-        *l1 = be32_to_cpu(rec->inobt.ir_freecount);
-        *l2 = be64_to_cpu(rec->inobt.ir_free);
-}
-#endif /* XFS_BTREE_TRACE */
 static const struct xfs_btree_ops xfs_inobt_ops = {
        .rec_len                = sizeof(xfs_inobt_rec_t),
        .key_len                = sizeof(xfs_inobt_key_t),
@@ -286,18 +219,10 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
        .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
        .init_ptr_from_cur      = xfs_inobt_init_ptr_from_cur,
        .key_diff               = xfs_inobt_key_diff,
 #ifdef DEBUG
        .keys_inorder           = xfs_inobt_keys_inorder,
        .recs_inorder           = xfs_inobt_recs_inorder,
 #endif
-#ifdef XFS_BTREE_TRACE
-        .trace_enter            = xfs_inobt_trace_enter,
-        .trace_cursor           = xfs_inobt_trace_cursor,
-        .trace_key              = xfs_inobt_trace_key,
-        .trace_record           = xfs_inobt_trace_record,
-#endif
 };
 /*
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3631783b2b53..7759812c1bbe 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,7 +38,6 @@
 #include "xfs_trans_priv.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_btree_trace.h"
 #include "xfs_trace.h"
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a098a20ca63e..3cc21ddf9f7e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -37,7 +37,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
@@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
 /*
- * Used in xfs_itruncate().  This is the maximum number of extents
+ * Used in xfs_itruncate_extents().  This is the maximum number of extents
 * freed from a file in a single transaction.
 */
 #define XFS_ITRUNC_MAX_EXTENTS  2
@@ -167,7 +166,7 @@ xfs_imap_to_bp(
                dip = (xfs_dinode_t *)xfs_buf_offset(bp,
                                        (i << mp->m_sb.sb_inodelog));
-                di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
+                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
                            XFS_DINODE_GOOD_VERSION(dip->di_version);
                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                XFS_ERRTAG_ITOBP_INOTOBP,
@@ -802,7 +801,7 @@ xfs_iread(
         * If we got something that isn't an inode it means someone
         * (nfs or dmi) has a stale handle.
         */
-        if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
+        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
 #ifdef DEBUG
                xfs_alert(mp,
                        "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -1179,15 +1178,15 @@ xfs_ialloc(
 * at least do it for regular files.
 */
 #ifdef DEBUG
-void
+STATIC void
 xfs_isize_check(
-        xfs_mount_t     *mp,
+        struct xfs_inode        *ip,
-        xfs_inode_t     *ip,
+        xfs_fsize_t             isize)
-        xfs_fsize_t     isize)
 {
-        xfs_fileoff_t   map_first;
+        struct xfs_mount        *mp = ip->i_mount;
-        int             nimaps;
+        xfs_fileoff_t           map_first;
-        xfs_bmbt_irec_t imaps[2];
+        int                     nimaps;
+        xfs_bmbt_irec_t         imaps[2];
        if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
                return;
@@ -1214,168 +1213,14 @@ xfs_isize_check(
        ASSERT(nimaps == 1);
        ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
 }
+#else   /* DEBUG */
+#define xfs_isize_check(ip, isize)
 #endif  /* DEBUG */
 /*
- * Calculate the last possible buffered byte in a file.  This must
+ * Free up the underlying blocks past new_size.  The new size must be smaller
- * include data that was buffered beyond the EOF by the write code.
+ * than the current size.  This routine can be used both for the attribute and
- * This also needs to deal with overflowing the xfs_fsize_t type
+ * data fork, and does not modify the inode size, which is left to the caller.
- * which can happen for sizes near the limit.
- *
- * We also need to take into account any blocks beyond the EOF.  It
- * may be the case that they were buffered by a write which failed.
- * In that case the pages will still be in memory, but the inode size
- * will never have been updated.
- */
-STATIC xfs_fsize_t
-xfs_file_last_byte(
-        xfs_inode_t     *ip)
-{
-        xfs_mount_t     *mp;
-        xfs_fsize_t     last_byte;
-        xfs_fileoff_t   last_block;
-        xfs_fileoff_t   size_last_block;
-        int             error;
-        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
-        mp = ip->i_mount;
-        /*
-         * Only check for blocks beyond the EOF if the extents have
-         * been read in.  This eliminates the need for the inode lock,
-         * and it also saves us from looking when it really isn't
-         * necessary.
-         */
-        if (ip->i_df.if_flags & XFS_IFEXTENTS) {
-                xfs_ilock(ip, XFS_ILOCK_SHARED);
-                error = xfs_bmap_last_offset(NULL, ip, &last_block,
-                        XFS_DATA_FORK);
-                xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                if (error) {
-                        last_block = 0;
-                }
-        } else {
-                last_block = 0;
-        }
-        size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
-        last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
-        last_byte = XFS_FSB_TO_B(mp, last_block);
-        if (last_byte < 0) {
-                return XFS_MAXIOFFSET(mp);
-        }
-        last_byte += (1 << mp->m_writeio_log);
-        if (last_byte < 0) {
-                return XFS_MAXIOFFSET(mp);
-        }
-        return last_byte;
-}
-/*
- * Start the truncation of the file to new_size.  The new size
- * must be smaller than the current size.  This routine will
- * clear the buffer and page caches of file data in the removed
- * range, and xfs_itruncate_finish() will remove the underlying
- * disk blocks.
- *
- * The inode must have its I/O lock locked EXCLUSIVELY, and it
- * must NOT have the inode lock held at all.  This is because we're
- * calling into the buffer/page cache code and we can't hold the
- * inode lock when we do so.
- *
- * We need to wait for any direct I/Os in flight to complete before we
- * proceed with the truncate. This is needed to prevent the extents
- * being read or written by the direct I/Os from being removed while the
- * I/O is in flight as there is no other method of synchronising
- * direct I/O with the truncate operation.  Also, because we hold
- * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
- * started until the truncate completes and drops the lock. Essentially,
- * the xfs_ioend_wait() call forms an I/O barrier that provides strict
- * ordering between direct I/Os and the truncate operation.
- *
- * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
- * or XFS_ITRUNC_MAYBE.  The XFS_ITRUNC_MAYBE value should be used
- * in the case that the caller is locking things out of order and
- * may not be able to call xfs_itruncate_finish() with the inode lock
- * held without dropping the I/O lock.  If the caller must drop the
- * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
- * must be called again with all the same restrictions as the initial
- * call.
- */
-int
-xfs_itruncate_start(
-        xfs_inode_t     *ip,
-        uint            flags,
-        xfs_fsize_t     new_size)
-{
-        xfs_fsize_t     last_byte;
-        xfs_off_t       toss_start;
-        xfs_mount_t     *mp;
-        int             error = 0;
-        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-        ASSERT((new_size == 0) || (new_size <= ip->i_size));
-        ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
-               (flags == XFS_ITRUNC_MAYBE));
-        mp = ip->i_mount;
-        /* wait for the completion of any pending DIOs */
-        if (new_size == 0 || new_size < ip->i_size)
-                xfs_ioend_wait(ip);
-        /*
-         * Call toss_pages or flushinval_pages to get rid of pages
-         * overlapping the region being removed.  We have to use
-         * the less efficient flushinval_pages in the case that the
-         * caller may not be able to finish the truncate without
-         * dropping the inode's I/O lock.  Make sure
-         * to catch any pages brought in by buffers overlapping
-         * the EOF by searching out beyond the isize by our
-         * block size. We round new_size up to a block boundary
-         * so that we don't toss things on the same block as
-         * new_size but before it.
-         *
-         * Before calling toss_page or flushinval_pages, make sure to
-         * call remapf() over the same region if the file is mapped.
-         * This frees up mapped file references to the pages in the
-         * given range and for the flushinval_pages case it ensures
-         * that we get the latest mapped changes flushed out.
-         */
-        toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-        toss_start = XFS_FSB_TO_B(mp, toss_start);
-        if (toss_start < 0) {
-                /*
-                 * The place to start tossing is beyond our maximum
-                 * file size, so there is no way that the data extended
-                 * out there.
-                 */
-                return 0;
-        }
-        last_byte = xfs_file_last_byte(ip);
-        trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
-        if (last_byte > toss_start) {
-                if (flags & XFS_ITRUNC_DEFINITE) {
-                        xfs_tosspages(ip, toss_start,
-                                        -1, FI_REMAPF_LOCKED);
-                } else {
-                        error = xfs_flushinval_pages(ip, toss_start,
-                                        -1, FI_REMAPF_LOCKED);
-                }
-        }
-#ifdef DEBUG
-        if (new_size == 0) {
-                ASSERT(VN_CACHED(VFS_I(ip)) == 0);
-        }
-#endif
-        return error;
-}
-/*
- * Shrink the file to the given new_size.  The new size must be smaller than
- * the current size.  This will free up the underlying blocks in the removed
- * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
 *
 * The transaction passed to this routine must have made a permanent log
 * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
@@ -1387,31 +1232,6 @@ xfs_itruncate_start(
 * will be "held" within the returned transaction.  This routine does NOT
 * require any disk space to be reserved for it within the transaction.
 *
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
- * indicates the fork which is to be truncated.  For the attribute fork we only
- * support truncation to size 0.
- *
- * We use the sync parameter to indicate whether or not the first transaction
- * we perform might have to be synchronous.  For the attr fork, it needs to be
- * so if the unlink of the inode is not yet known to be permanent in the log.
- * This keeps us from freeing and reusing the blocks of the attribute fork
- * before the unlink of the inode becomes permanent.
- *
- * For the data fork, we normally have to run synchronously if we're being
- * called out of the inactive path or we're being called out of the create path
- * where we're truncating an existing file.  Either way, the truncate needs to
- * be sync so blocks don't reappear in the file with altered data in case of a
- * crash.  wsync filesystems can run the first case async because anything that
- * shrinks the inode has to run sync so by the time we're called here from
- * inactive, the inode size is permanently set to 0.
- *
- * Calls from the truncate path always need to be sync unless we're in a wsync
- * filesystem and the file has already been unlinked.
- *
- * The caller is responsible for correctly setting the sync parameter.  It gets
- * too hard for us to guess here which path we're being called out of just
- * based on inode state.
- *
 * If we get an error, we must return with the inode locked and linked into the
 * current transaction. This keeps things simple for the higher level code,
 * because it always knows that the inode is locked and held in the transaction
@@ -1419,124 +1239,30 @@ xfs_itruncate_start(
 * dirty on error so that transactions can be easily aborted if possible.
 */
 int
-xfs_itruncate_finish(
+xfs_itruncate_extents(
-        xfs_trans_t     **tp,
+        struct xfs_trans        **tpp,
-        xfs_inode_t     *ip,
+        struct xfs_inode        *ip,
-        xfs_fsize_t     new_size,
+        int                     whichfork,
-        int             fork,
+        xfs_fsize_t             new_size)
-        int             sync)
 {
-        xfs_fsblock_t   first_block;
+        struct xfs_mount        *mp = ip->i_mount;
-        xfs_fileoff_t   first_unmap_block;
+        struct xfs_trans        *tp = *tpp;
-        xfs_fileoff_t   last_block;
+        struct xfs_trans        *ntp;
-        xfs_filblks_t   unmap_len=0;
+        xfs_bmap_free_t         free_list;
-        xfs_mount_t     *mp;
+        xfs_fsblock_t           first_block;
-        xfs_trans_t     *ntp;
+        xfs_fileoff_t           first_unmap_block;
-        int             done;
+        xfs_fileoff_t           last_block;
-        int             committed;
+        xfs_filblks_t           unmap_len;
-        xfs_bmap_free_t free_list;
+        int                     committed;
-        int             error;
+        int                     error = 0;
+        int                     done = 0;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT((new_size == 0) || (new_size <= ip->i_size));
+        ASSERT(new_size <= ip->i_size);
-        ASSERT(*tp != NULL);
+        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
-        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
-        ASSERT(ip->i_transp == *tp);
        ASSERT(ip->i_itemp != NULL);
        ASSERT(ip->i_itemp->ili_lock_flags == 0);
+        ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
-        ntp = *tp;
-        mp = (ntp)->t_mountp;
-        ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
-        /*
-         * We only support truncating the entire attribute fork.
-         */
-        if (fork == XFS_ATTR_FORK) {
-                new_size = 0LL;
-        }
-        first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-        trace_xfs_itruncate_finish_start(ip, new_size);
-        /*
-         * The first thing we do is set the size to new_size permanently
-         * on disk.  This way we don't have to worry about anyone ever
-         * being able to look at the data being freed even in the face
-         * of a crash.  What we're getting around here is the case where
-         * we free a block, it is allocated to another file, it is written
-         * to, and then we crash.  If the new data gets written to the
-         * file but the log buffers containing the free and reallocation
-         * don't, then we'd end up with garbage in the blocks being freed.
-         * As long as we make the new_size permanent before actually
-         * freeing any blocks it doesn't matter if they get written to.
-         *
-         * The callers must signal into us whether or not the size
-         * setting here must be synchronous.  There are a few cases
-         * where it doesn't have to be synchronous.  Those cases
-         * occur if the file is unlinked and we know the unlink is
-         * permanent or if the blocks being truncated are guaranteed
-         * to be beyond the inode eof (regardless of the link count)
-         * and the eof value is permanent.  Both of these cases occur
-         * only on wsync-mounted filesystems.  In those cases, we're
-         * guaranteed that no user will ever see the data in the blocks
-         * that are being truncated so the truncate can run async.
-         * In the free beyond eof case, the file may wind up with
-         * more blocks allocated to it than it needs if we crash
-         * and that won't get fixed until the next time the file
-         * is re-opened and closed but that's ok as that shouldn't
-         * be too many blocks.
-         *
-         * However, we can't just make all wsync xactions run async
-         * because there's one call out of the create path that needs
-         * to run sync where it's truncating an existing file to size
-         * 0 whose size is > 0.
-         *
-         * It's probably possible to come up with a test in this
-         * routine that would correctly distinguish all the above
-         * cases from the values of the function parameters and the
-         * inode state but for sanity's sake, I've decided to let the
-         * layers above just tell us.  It's simpler to correctly figure
-         * out in the layer above exactly under what conditions we
-         * can run async and I think it's easier for others read and
-         * follow the logic in case something has to be changed.
-         * cscope is your friend -- rcc.
-         *
-         * The attribute fork is much simpler.
-         *
-         * For the attribute fork we allow the caller to tell us whether
-         * the unlink of the inode that led to this call is yet permanent
-         * in the on disk log.  If it is not and we will be freeing extents
-         * in this inode then we make the first transaction synchronous
-         * to make sure that the unlink is permanent by the time we free
-         * the blocks.
-         */
-        if (fork == XFS_DATA_FORK) {
-                if (ip->i_d.di_nextents > 0) {
-                        /*
-                         * If we are not changing the file size then do
-                         * not update the on-disk file size - we may be
-                         * called from xfs_inactive_free_eofblocks().  If we
-                         * update the on-disk file size and then the system
-                         * crashes before the contents of the file are
-                         * flushed to disk then the files may be full of
-                         * holes (ie NULL files bug).
-                         */
-                        if (ip->i_size != new_size) {
-                                ip->i_d.di_size = new_size;
-                                ip->i_size = new_size;
-                                xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
-                        }
-                }
-        } else if (sync) {
-                ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
-                if (ip->i_d.di_anextents > 0)
-                        xfs_trans_set_sync(ntp);
-        }
-        ASSERT(fork == XFS_DATA_FORK ||
-                (fork == XFS_ATTR_FORK &&
-                        ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
-                         (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
        /*
         * Since it is possible for space to become allocated beyond
@@ -1547,128 +1273,142 @@ xfs_itruncate_finish(
         * beyond the maximum file size (ie it is the same as last_block),
         * then there is nothing to do.
         */
+        first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
        last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-        ASSERT(first_unmap_block <= last_block);
+        if (first_unmap_block == last_block)
-        done = 0;
+                return 0;
-        if (last_block == first_unmap_block) {
-                done = 1;
+        ASSERT(first_unmap_block < last_block);
-        } else {
+        unmap_len = last_block - first_unmap_block + 1;
-                unmap_len = last_block - first_unmap_block + 1;
-        }
        while (!done) {
-                /*
-                 * Free up up to XFS_ITRUNC_MAX_EXTENTS.  xfs_bunmapi()
-                 * will tell us whether it freed the entire range or
-                 * not.  If this is a synchronous mount (wsync),
-                 * then we can tell bunmapi to keep all the
-                 * transactions asynchronous since the unlink
-                 * transaction that made this inode inactive has
-                 * already hit the disk.  There's no danger of
-                 * the freed blocks being reused, there being a
-                 * crash, and the reused blocks suddenly reappearing
-                 * in this file with garbage in them once recovery
-                 * runs.
-                 */
                xfs_bmap_init(&free_list, &first_block);
-                error = xfs_bunmapi(ntp, ip,
+                error = xfs_bunmapi(tp, ip,
                                    first_unmap_block, unmap_len,
-                                    xfs_bmapi_aflag(fork),
+                                    xfs_bmapi_aflag(whichfork),
                                    XFS_ITRUNC_MAX_EXTENTS,
                                    &first_block, &free_list,
                                    &done);
-                if (error) {
+                if (error)
-                        /*
+                        goto out_bmap_cancel;
-                         * If the bunmapi call encounters an error,
-                         * return to the caller where the transaction
-                         * can be properly aborted.  We just need to
-                         * make sure we're not holding any resources
-                         * that we were not when we came in.
-                         */
-                        xfs_bmap_cancel(&free_list);
-                        return error;
-                }
                /*
                 * Duplicate the transaction that has the permanent
                 * reservation and commit the old transaction.
                 */
-                error = xfs_bmap_finish(tp, &free_list, &committed);
+                error = xfs_bmap_finish(&tp, &free_list, &committed);
-                ntp = *tp;
                if (committed)
-                        xfs_trans_ijoin(ntp, ip);
+                        xfs_trans_ijoin(tp, ip);
+                if (error)
-                if (error) {
+                        goto out_bmap_cancel;
-                        /*
-                         * If the bmap finish call encounters an error, return
-                         * to the caller where the transaction can be properly
-                         * aborted.  We just need to make sure we're not
-                         * holding any resources that we were not when we came
-                         * in.
-                         *
-                         * Aborting from this point might lose some blocks in
-                         * the file system, but oh well.
-                         */
-                        xfs_bmap_cancel(&free_list);
-                        return error;
-                }
                if (committed) {
                        /*
                         * Mark the inode dirty so it will be logged and
                         * moved forward in the log as part of every commit.
                         */
-                        xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+                        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                }
-                ntp = xfs_trans_dup(ntp);
+                ntp = xfs_trans_dup(tp);
-                error = xfs_trans_commit(*tp, 0);
+                error = xfs_trans_commit(tp, 0);
-                *tp = ntp;
+                tp = ntp;
-                xfs_trans_ijoin(ntp, ip);
+                xfs_trans_ijoin(tp, ip);
                if (error)
-                        return error;
+                        goto out;
                /*
-                 * transaction commit worked ok so we can drop the extra ticket
+                 * Transaction commit worked ok so we can drop the extra ticket
                 * reference that we gained in xfs_trans_dup()
                 */
-                xfs_log_ticket_put(ntp->t_ticket);
+                xfs_log_ticket_put(tp->t_ticket);
-                error = xfs_trans_reserve(ntp, 0,
+                error = xfs_trans_reserve(tp, 0,
                                        XFS_ITRUNCATE_LOG_RES(mp), 0,
                                        XFS_TRANS_PERM_LOG_RES,
                                        XFS_ITRUNCATE_LOG_COUNT);
                if (error)
-                        return error;
+                        goto out;
        }
+out:
+        *tpp = tp;
+        return error;
+out_bmap_cancel:
        /*
-         * Only update the size in the case of the data fork, but
+         * If the bunmapi call encounters an error, return to the caller where
-         * always re-log the inode so that our permanent transaction
+         * the transaction can be properly aborted.  We just need to make sure
-         * can keep on rolling it forward in the log.
+         * we're not holding any resources that we were not when we came in.
         */
-        if (fork == XFS_DATA_FORK) {
+        xfs_bmap_cancel(&free_list);
-                xfs_isize_check(mp, ip, new_size);
+        goto out;
+}
+int
+xfs_itruncate_data(
+        struct xfs_trans        **tpp,
+        struct xfs_inode        *ip,
+        xfs_fsize_t             new_size)
+{
+        int                     error;
+        trace_xfs_itruncate_data_start(ip, new_size);
+        /*
+         * The first thing we do is set the size to new_size permanently on
+         * disk.  This way we don't have to worry about anyone ever being able
+         * to look at the data being freed even in the face of a crash.
+         * What we're getting around here is the case where we free a block, it
+         * is allocated to another file, it is written to, and then we crash.
+         * If the new data gets written to the file but the log buffers
+         * containing the free and reallocation don't, then we'd end up with
+         * garbage in the blocks being freed.  As long as we make the new_size
+         * permanent before actually freeing any blocks it doesn't matter if
+         * they get written to.
+         */
+        if (ip->i_d.di_nextents > 0) {
                /*
-                 * If we are not changing the file size then do
+                 * If we are not changing the file size then do not update
-                 * not update the on-disk file size - we may be
+                 * the on-disk file size - we may be called from
-                 * called from xfs_inactive_free_eofblocks().  If we
+                 * xfs_inactive_free_eofblocks().  If we update the on-disk
-                 * update the on-disk file size and then the system
+                 * file size and then the system crashes before the contents
-                 * crashes before the contents of the file are
+                 * of the file are flushed to disk then the files may be
-                 * flushed to disk then the files may be full of
+                 * full of holes (ie NULL files bug).
-                 * holes (ie NULL files bug).
                 */
                if (ip->i_size != new_size) {
                        ip->i_d.di_size = new_size;
                        ip->i_size = new_size;
+                        xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
                }
        }
-        xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
-        ASSERT((new_size != 0) ||
+        error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
-               (fork == XFS_ATTR_FORK) ||
+        if (error)
-               (ip->i_delayed_blks == 0));
+                return error;
-        ASSERT((new_size != 0) ||
-               (fork == XFS_ATTR_FORK) ||
+        /*
-               (ip->i_d.di_nextents == 0));
+         * If we are not changing the file size then do not update the on-disk
-        trace_xfs_itruncate_finish_end(ip, new_size);
+         * file size - we may be called from xfs_inactive_free_eofblocks().
+         * If we update the on-disk file size and then the system crashes
+         * before the contents of the file are flushed to disk then the files
+         * may be full of holes (ie NULL files bug).
+         */
+        xfs_isize_check(ip, new_size);
+        if (ip->i_size != new_size) {
+                ip->i_d.di_size = new_size;
+                ip->i_size = new_size;
+        }
+        ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
+        ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
+        /*
+         * Always re-log the inode so that our permanent transaction can keep
+         * on rolling it forward in the log.
+         */
+        xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+        trace_xfs_itruncate_data_end(ip, new_size);
        return 0;
 }
@@ -1694,7 +1434,6 @@ xfs_iunlink(
        ASSERT(ip->i_d.di_nlink == 0);
        ASSERT(ip->i_d.di_mode != 0);
-        ASSERT(ip->i_transp == tp);
        mp = tp->t_mountp;
@@ -1717,7 +1456,7 @@ xfs_iunlink(
        ASSERT(agi->agi_unlinked[bucket_index]);
        ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
-        if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
+        if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
                /*
                 * There is already another inode in the bucket we need
                 * to add ourselves to.  Add us at the front of the list.
@@ -1728,8 +1467,7 @@ xfs_iunlink(
                if (error)
                        return error;
-                ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
+                ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
-                /* both on-disk, don't endian flip twice */
                dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
                offset = ip->i_imap.im_boffset +
                        offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1794,7 +1532,7 @@ xfs_iunlink_remove(
        agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
        ASSERT(agino != 0);
        bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
-        ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
+        ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
        ASSERT(agi->agi_unlinked[bucket_index]);
        if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1959,7 +1697,7 @@ xfs_ifree_cluster(
                 * stale first, we will not attempt to lock them in the loop
                 * below as the XFS_ISTALE flag will be set.
                 */
-                lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+                lip = bp->b_fspriv;
                while (lip) {
                        if (lip->li_type == XFS_LI_INODE) {
                                iip = (xfs_inode_log_item_t *)lip;
@@ -2086,7 +1824,6 @@ xfs_ifree(
        xfs_buf_t               *ibp;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_d.di_nlink == 0);
        ASSERT(ip->i_d.di_nextents == 0);
        ASSERT(ip->i_d.di_anextents == 0);
@@ -2733,7 +2470,7 @@ cluster_corrupt_out:
                 * mark the buffer as an error and call them.  Otherwise
                 * mark it as stale and brelse.
                 */
-                if (XFS_BUF_IODONE_FUNC(bp)) {
+                if (bp->b_iodone) {
                        XFS_BUF_UNDONE(bp);
                        XFS_BUF_STALE(bp);
                        XFS_BUF_ERROR(bp,EIO);
@@ -2920,7 +2657,7 @@ xfs_iflush_int(
         */
        xfs_synchronize_times(ip);
-        if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
+        if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
                               mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
                xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
                        "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -3073,8 +2810,8 @@ xfs_iflush_int(
                 */
                xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
-                ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+                ASSERT(bp->b_fspriv != NULL);
-                ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
+                ASSERT(bp->b_iodone != NULL);
        } else {
                /*
                 * We're flushing an inode which is not in the AIL and has
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 964cfea77686..a97644ab945a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
        xfs_ifork_t             i_df;           /* data fork */
        /* Transaction and locking information. */
-        struct xfs_trans        *i_transp;      /* ptr to owning transaction*/
        struct xfs_inode_log_item *i_itemp;     /* logging information */
        mrlock_t                i_lock;         /* inode lock */
        mrlock_t                i_iolock;       /* inode IO lock */
@@ -458,16 +457,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
 extern struct lock_class_key xfs_iolock_reclaimable;
 /*
- * Flags for xfs_itruncate_start().
- */
-#define XFS_ITRUNC_DEFINITE     0x1
-#define XFS_ITRUNC_MAYBE        0x2
-#define XFS_ITRUNC_FLAGS \
-        { XFS_ITRUNC_DEFINITE,  "DEFINITE" }, \
-        { XFS_ITRUNC_MAYBE,     "MAYBE" }
-/*
 * For multiple groups support: if S_ISGID bit is set in the parent
 * directory, group of new file is set to that of the parent, and
 * new subdirectory gets S_ISGID bit from parent.
@@ -501,9 +490,10 @@ uint		xfs_ip2xflags(struct xfs_inode *);
 uint            xfs_dic2xflags(struct xfs_dinode *);
 int             xfs_ifree(struct xfs_trans *, xfs_inode_t *,
                           struct xfs_bmap_free *);
-int             xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
+int             xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
-int             xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
+                                      int, xfs_fsize_t);
-                                     xfs_fsize_t, int, int);
+int             xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
+                                   xfs_fsize_t);
 int             xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 void            xfs_iext_realloc(xfs_inode_t *, int, int);
@@ -579,13 +569,6 @@ void		xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
 #define xfs_ipincount(ip)       ((unsigned int) atomic_read(&ip->i_pincount))
-#ifdef DEBUG
-void            xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
-                                xfs_fsize_t);
-#else   /* DEBUG */
-#define xfs_isize_check(mp, ip, isize)
-#endif  /* DEBUG */
 #if defined(DEBUG)
 void            xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #else
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b1e88d56069c..588406dc6a35 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -632,13 +632,8 @@ xfs_inode_item_unlock(
        struct xfs_inode        *ip = iip->ili_inode;
        unsigned short          lock_flags;
-        ASSERT(iip->ili_inode->i_itemp != NULL);
+        ASSERT(ip->i_itemp != NULL);
-        ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        /*
-         * Clear the transaction pointer in the inode.
-         */
-        ip->i_transp = NULL;
        /*
         * If the inode needed a separate buffer with which to log
@@ -664,8 +659,8 @@ xfs_inode_item_unlock(
        lock_flags = iip->ili_lock_flags;
        iip->ili_lock_flags = 0;
        if (lock_flags) {
-                xfs_iunlock(iip->ili_inode, lock_flags);
+                xfs_iunlock(ip, lock_flags);
-                IRELE(iip->ili_inode);
+                IRELE(ip);
        }
 }
@@ -879,7 +874,7 @@ xfs_iflush_done(
         * Scan the buffer IO completions for other inodes being completed and
         * attach them to the current inode log item.
         */
-        blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+        blip = bp->b_fspriv;
        prev = NULL;
        while (blip != NULL) {
                if (lip->li_cb != xfs_iflush_done) {
@@ -891,7 +886,7 @@ xfs_iflush_done(
                /* remove from list */
                next = blip->li_bio_list;
                if (!prev) {
-                        XFS_BUF_SET_FSPRIVATE(bp, next);
+                        bp->b_fspriv = next;
                } else {
                        prev->li_bio_list = next;
                }
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index b8e4ee4e89a4..b253c0ea5bec 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -28,17 +28,6 @@
 typedef __uint32_t      xfs_agino_t;    /* within allocation grp inode number */
-/*
- * Useful inode bits for this kernel.
- * Used in some places where having 64-bits in the 32-bit kernels
- * costs too much.
- */
-#if XFS_BIG_INUMS
-typedef xfs_ino_t       xfs_intino_t;
-#else
-typedef __uint32_t      xfs_intino_t;
-#endif
 #define NULLFSINO       ((xfs_ino_t)-1)
 #define NULLAGINO       ((xfs_agino_t)-1)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 41d5b8f2bf92..06ff8437ed8e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -871,15 +871,9 @@ xlog_space_left(
 void
 xlog_iodone(xfs_buf_t *bp)
 {
-        xlog_in_core_t  *iclog;
+        xlog_in_core_t  *iclog = bp->b_fspriv;
-        xlog_t          *l;
+        xlog_t          *l = iclog->ic_log;
-        int             aborted;
+        int             aborted = 0;
-        iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
-        XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
-        aborted = 0;
-        l = iclog->ic_log;
        /*
         * Race to shutdown the filesystem if we see an error.
@@ -1056,10 +1050,9 @@ xlog_alloc_log(xfs_mount_t	*mp,
        bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
        if (!bp)
                goto out_free_log;
-        XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+        bp->b_iodone = xlog_iodone;
-        XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+        ASSERT(xfs_buf_islocked(bp));
        log->l_xbuf = bp;
        spin_lock_init(&log->l_icloglock);
@@ -1090,10 +1083,8 @@ xlog_alloc_log(xfs_mount_t	*mp,
                                                log->l_iclog_size, 0);
                if (!bp)
                        goto out_free_iclog;
-                if (!XFS_BUF_CPSEMA(bp))
-                        ASSERT(0);
+                bp->b_iodone = xlog_iodone;
-                XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
-                XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
                iclog->ic_bp = bp;
                iclog->ic_data = bp->b_addr;
 #ifdef DEBUG
@@ -1118,7 +1109,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
                iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
                ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
-                ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
+                ASSERT(xfs_buf_islocked(iclog->ic_bp));
                init_waitqueue_head(&iclog->ic_force_wait);
                init_waitqueue_head(&iclog->ic_write_wait);
@@ -1254,9 +1245,8 @@ STATIC int
 xlog_bdstrat(
        struct xfs_buf          *bp)
 {
-        struct xlog_in_core     *iclog;
+        struct xlog_in_core     *iclog = bp->b_fspriv;
-        iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
                XFS_BUF_ERROR(bp, EIO);
                XFS_BUF_STALE(bp);
@@ -1269,7 +1259,6 @@ xlog_bdstrat(
                return 0;
        }
-        bp->b_flags |= _XBF_RUN_QUEUES;
        xfs_buf_iorequest(bp);
        return 0;
 }
@@ -1351,8 +1340,6 @@ xlog_sync(xlog_t		*log,
        }
        bp = iclog->ic_bp;
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
-        XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
        XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
        XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
@@ -1366,22 +1353,28 @@ xlog_sync(xlog_t		*log,
                iclog->ic_bwritecnt = 1;
        }
        XFS_BUF_SET_COUNT(bp, count);
-        XFS_BUF_SET_FSPRIVATE(bp, iclog);       /* save for later */
+        bp->b_fspriv = iclog;
        XFS_BUF_ZEROFLAGS(bp);
        XFS_BUF_BUSY(bp);
        XFS_BUF_ASYNC(bp);
-        bp->b_flags |= XBF_LOG_BUFFER;
+        bp->b_flags |= XBF_SYNCIO;
        if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+                bp->b_flags |= XBF_FUA;
                /*
-                 * If we have an external log device, flush the data device
+                 * Flush the data device before flushing the log to make
-                 * before flushing the log to make sure all meta data
+                 * sure all meta data written back from the AIL actually made
-                 * written back from the AIL actually made it to disk
+                 * it to disk before stamping the new log tail LSN into the
-                 * before writing out the new log tail LSN in the log buffer.
+                 * log buffer.  For an external log we need to issue the
+                 * flush explicitly, and unfortunately synchronously here;
+                 * for an internal log we can simply use the block layer
+                 * state machine for preflushes.
                 */
                if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
                        xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
-                XFS_BUF_ORDERED(bp);
+                else
+                        bp->b_flags |= XBF_FLUSH;
        }
        ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -1404,19 +1397,16 @@ xlog_sync(xlog_t		*log,
        }
        if (split) {
                bp = iclog->ic_log->l_xbuf;
-                ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
-                                                        (unsigned long)1);
-                XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
                XFS_BUF_SET_ADDR(bp, 0);             /* logical 0 */
                XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
                                            (__psint_t)count), split);
-                XFS_BUF_SET_FSPRIVATE(bp, iclog);
+                bp->b_fspriv = iclog;
                XFS_BUF_ZEROFLAGS(bp);
                XFS_BUF_BUSY(bp);
                XFS_BUF_ASYNC(bp);
-                bp->b_flags |= XBF_LOG_BUFFER;
+                bp->b_flags |= XBF_SYNCIO;
                if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
-                        XFS_BUF_ORDERED(bp);
+                        bp->b_flags |= XBF_FUA;
                dptr = XFS_BUF_PTR(bp);
                /*
                 * Bump the cycle numbers at the start of each block
@@ -3521,13 +3511,13 @@ xlog_verify_iclog(xlog_t	 *log,
        spin_unlock(&log->l_icloglock);
        /* check log magic numbers */
-        if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM)
+        if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
                xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
        ptr = (xfs_caddr_t) &iclog->ic_header;
        for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
             ptr += BBSIZE) {
-                if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
+                if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
                        xfs_emerg(log->l_mp, "%s: unexpected magic num",
                                __func__);
        }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 04142caedb2b..8fe4206de057 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -91,6 +91,8 @@ xlog_get_bp(
        xlog_t          *log,
        int             nbblks)
 {
+        struct xfs_buf  *bp;
        if (!xlog_buf_bbcount_valid(log, nbblks)) {
                xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
                        nbblks);
@@ -118,8 +120,10 @@ xlog_get_bp(
                nbblks += log->l_sectBBsize;
        nbblks = round_up(nbblks, log->l_sectBBsize);
-        return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
+        bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0);
-                                        BBTOB(nbblks), 0);
+        if (bp)
+                xfs_buf_unlock(bp);
+        return bp;
 }
 STATIC void
@@ -264,7 +268,7 @@ xlog_bwrite(
        XFS_BUF_ZEROFLAGS(bp);
        XFS_BUF_BUSY(bp);
        XFS_BUF_HOLD(bp);
-        XFS_BUF_PSEMA(bp, PRIBIO);
+        xfs_buf_lock(bp);
        XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
        XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
@@ -300,14 +304,14 @@ xlog_header_check_recover(
        xfs_mount_t             *mp,
        xlog_rec_header_t       *head)
 {
-        ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
+        ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
        /*
         * IRIX doesn't write the h_fmt field and leaves it zeroed
         * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
         * a dirty log created in IRIX.
         */
-        if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
+        if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
                xfs_warn(mp,
        "dirty log written in incompatible format - can't recover");
                xlog_header_check_dump(mp, head);
@@ -333,7 +337,7 @@ xlog_header_check_mount(
        xfs_mount_t             *mp,
        xlog_rec_header_t       *head)
 {
-        ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
+        ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
        if (uuid_is_nil(&head->h_fs_uuid)) {
                /*
@@ -367,7 +371,7 @@ xlog_recover_iodone(
                xfs_force_shutdown(bp->b_target->bt_mount,
                                        SHUTDOWN_META_IO_ERROR);
        }
-        XFS_BUF_CLR_IODONE_FUNC(bp);
+        bp->b_iodone = NULL;
        xfs_buf_ioend(bp, 0);
 }
@@ -534,7 +538,7 @@ xlog_find_verify_log_record(
                head = (xlog_rec_header_t *)offset;
-                if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno))
+                if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
                        break;
                if (!smallmem)
@@ -916,7 +920,7 @@ xlog_find_tail(
                if (error)
                        goto done;
-                if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
+                if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
                        found = 1;
                        break;
                }
@@ -933,8 +937,8 @@ xlog_find_tail(
                        if (error)
                                goto done;
-                        if (XLOG_HEADER_MAGIC_NUM ==
+                        if (*(__be32 *)offset ==
-                            be32_to_cpu(*(__be32 *)offset)) {
+                            cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
                                found = 2;
                                break;
                        }
@@ -1947,7 +1951,7 @@ xfs_qm_dqcheck(
         * This is all fine; things are still consistent, and we haven't lost
         * any quota information. Just don't complain about bad dquot blks.
         */
-        if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
+        if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
                if (flags & XFS_QMOPT_DOWARN)
                        xfs_alert(mp,
                        "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
@@ -2174,7 +2178,7 @@ xlog_recover_buffer_pass2(
                error = xfs_bwrite(mp, bp);
        } else {
                ASSERT(bp->b_target->bt_mount == mp);
-                XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+                bp->b_iodone = xlog_recover_iodone;
                xfs_bdwrite(mp, bp);
        }
@@ -2238,7 +2242,7 @@ xlog_recover_inode_pass2(
         * Make sure the place we're flushing out to really looks
         * like an inode!
         */
-        if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
+        if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
                xfs_buf_relse(bp);
                xfs_alert(mp,
        "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
@@ -2434,7 +2438,7 @@ xlog_recover_inode_pass2(
 write_inode_buffer:
        ASSERT(bp->b_target->bt_mount == mp);
-        XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+        bp->b_iodone = xlog_recover_iodone;
        xfs_bdwrite(mp, bp);
 error:
        if (need_free)
@@ -2556,7 +2560,7 @@ xlog_recover_dquot_pass2(
        ASSERT(dq_f->qlf_size == 2);
        ASSERT(bp->b_target->bt_mount == mp);
-        XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
+        bp->b_iodone = xlog_recover_iodone;
        xfs_bdwrite(mp, bp);
        return (0);
@@ -3295,7 +3299,7 @@ xlog_valid_rec_header(
 {
        int                     hlen;
-        if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) {
+        if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
                XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
                                XFS_ERRLEVEL_LOW, log->l_mp);
                return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b49b82363d20..7f25245da289 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -348,7 +348,7 @@ xfs_mount_validate_sb(
        }
        /*
-         * More sanity checking. These were stolen directly from
+         * More sanity checking.  Most of these were stolen directly from
         * xfs_repair.
         */
        if (unlikely(
@@ -371,23 +371,13 @@ xfs_mount_validate_sb(
            (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
            (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
            (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)  ||
-            (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
+            (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)    ||
+            sbp->sb_dblocks == 0                                        ||
+            sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)                      ||
+            sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
                if (loud)
-                        xfs_warn(mp, "SB sanity check 1 failed");
+                        XFS_CORRUPTION_ERROR("SB sanity check failed",
-                return XFS_ERROR(EFSCORRUPTED);
+                                XFS_ERRLEVEL_LOW, mp, sbp);
-        }
-        /*
-         * Sanity check AG count, size fields against data size field
-         */
-        if (unlikely(
-            sbp->sb_dblocks == 0 ||
-            sbp->sb_dblocks >
-             (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
-            sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
-                              sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
-                if (loud)
-                        xfs_warn(mp, "SB sanity check 2 failed");
                return XFS_ERROR(EFSCORRUPTED);
        }
@@ -864,7 +854,8 @@ xfs_update_alignment(xfs_mount_t *mp)
                if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
                    (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
                        if (mp->m_flags & XFS_MOUNT_RETERR) {
-                                xfs_warn(mp, "alignment check 1 failed");
+                                xfs_warn(mp, "alignment check failed: "
+                                         "(sunit/swidth vs. blocksize)");
                                return XFS_ERROR(EINVAL);
                        }
                        mp->m_dalign = mp->m_swidth = 0;
@@ -875,6 +866,8 @@ xfs_update_alignment(xfs_mount_t *mp)
                        mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
                        if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
                                if (mp->m_flags & XFS_MOUNT_RETERR) {
+                                        xfs_warn(mp, "alignment check failed: "
+                                                 "(sunit/swidth vs. ag size)");
                                        return XFS_ERROR(EINVAL);
                                }
                                xfs_warn(mp,
@@ -889,8 +882,8 @@ xfs_update_alignment(xfs_mount_t *mp)
                                mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
                        } else {
                                if (mp->m_flags & XFS_MOUNT_RETERR) {
-                                        xfs_warn(mp,
+                                        xfs_warn(mp, "alignment check failed: "
-                "stripe alignment turned off: sunit(%d) less than bsize(%d)",
+                                                "sunit(%d) less than bsize(%d)",
                                                mp->m_dalign,
                                                mp->m_blockmask +1);
                                        return XFS_ERROR(EINVAL);
@@ -1096,10 +1089,6 @@ xfs_mount_reset_sbqflags(
        if (mp->m_flags & XFS_MOUNT_RDONLY)
                return 0;
-#ifdef QUOTADEBUG
-        xfs_notice(mp, "Writing superblock quota changes");
-#endif
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
                                      XFS_DEFAULT_LOG_COUNT);
@@ -1532,7 +1521,7 @@ xfs_unmountfs(
                xfs_warn(mp, "Unable to free reserved block pool. "
                                "Freespace may not be correct on next mount.");
-        error = xfs_log_sbcount(mp, 1);
+        error = xfs_log_sbcount(mp);
        if (error)
                xfs_warn(mp, "Unable to update superblock counters. "
                                "Freespace may not be correct on next mount.");
@@ -1568,18 +1557,14 @@ xfs_fs_writable(xfs_mount_t *mp)
 /*
 * xfs_log_sbcount
 *
- * Called either periodically to keep the on disk superblock values
+ * Sync the superblock counters to disk.
- * roughly up to date or from unmount to make sure the values are
- * correct on a clean unmount.
 *
 * Note this code can be called during the process of freezing, so
- * we may need to use the transaction allocator which does not not
+ * we may need to use the transaction allocator which does not
 * block when the transaction subsystem is in its frozen state.
 */
 int
-xfs_log_sbcount(
+xfs_log_sbcount(xfs_mount_t *mp)
-        xfs_mount_t     *mp,
-        uint            sync)
 {
        xfs_trans_t     *tp;
        int             error;
@@ -1605,8 +1590,7 @@ xfs_log_sbcount(
        }
        xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
-        if (sync)
+        xfs_trans_set_sync(tp);
-                xfs_trans_set_sync(tp);
        error = xfs_trans_commit(tp, 0);
        return error;
 }
@@ -1941,22 +1925,19 @@ unwind:
 * the superblock buffer if it can be locked without sleeping.
 * If it can't then we'll return NULL.
 */
-xfs_buf_t *
+struct xfs_buf *
 xfs_getsb(
-        xfs_mount_t     *mp,
+        struct xfs_mount        *mp,
-        int             flags)
+        int                     flags)
 {
-        xfs_buf_t       *bp;
+        struct xfs_buf          *bp = mp->m_sb_bp;
-        ASSERT(mp->m_sb_bp != NULL);
+        if (!xfs_buf_trylock(bp)) {
-        bp = mp->m_sb_bp;
+                if (flags & XBF_TRYLOCK)
-        if (flags & XBF_TRYLOCK) {
-                if (!XFS_BUF_CPSEMA(bp)) {
                        return NULL;
-                }
+                xfs_buf_lock(bp);
-        } else {
-                XFS_BUF_PSEMA(bp, PRIBIO);
        }
        XFS_BUF_HOLD(bp);
        ASSERT(XFS_BUF_ISDONE(bp));
        return bp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3d68bb267c5f..bb24dac42a25 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -371,7 +371,7 @@ typedef struct xfs_mod_sb {
        int64_t         msb_delta;      /* Change to make to specified field */
 } xfs_mod_sb_t;
-extern int      xfs_log_sbcount(xfs_mount_t *, uint);
+extern int      xfs_log_sbcount(xfs_mount_t *);
 extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int      xfs_mountfs(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c83f63b33aae..efc147f0e9b6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1426,6 +1426,7 @@ xfs_trans_committed(
 static inline void
 xfs_log_item_batch_insert(
        struct xfs_ail          *ailp,
+        struct xfs_ail_cursor   *cur,
        struct xfs_log_item     **log_items,
        int                     nr_items,
        xfs_lsn_t               commit_lsn)
@@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
        spin_lock(&ailp->xa_lock);
        /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
-        xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
+        xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
        for (i = 0; i < nr_items; i++)
                IOP_UNPIN(log_items[i], 0);
@@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
 * as an iclog write error even though we haven't started any IO yet. Hence in
 * this case all we need to do is IOP_COMMITTED processing, followed by an
 * IOP_UNPIN(aborted) call.
+ *
+ * The AIL cursor is used to optimise the insert process. If commit_lsn is not
+ * at the end of the AIL, the insert cursor avoids the need to walk
+ * the AIL to find the insertion point on every xfs_log_item_batch_insert()
+ * call. This saves a lot of needless list walking and is a net win, even
+ * though it slightly increases that amount of AIL lock traffic to set it up
+ * and tear it down.
 */
 void
 xfs_trans_committed_bulk(
@@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
 #define LOG_ITEM_BATCH_SIZE     32
        struct xfs_log_item     *log_items[LOG_ITEM_BATCH_SIZE];
        struct xfs_log_vec      *lv;
+        struct xfs_ail_cursor   cur;
        int                     i = 0;
+        spin_lock(&ailp->xa_lock);
+        xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
+        spin_unlock(&ailp->xa_lock);
        /* unpin all the log items */
        for (lv = log_vector; lv; lv = lv->lv_next ) {
                struct xfs_log_item     *lip = lv->lv_item;
@@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
                        /*
                         * Not a bulk update option due to unusual item_lsn.
                         * Push into AIL immediately, rechecking the lsn once
-                         * we have the ail lock. Then unpin the item.
+                         * we have the ail lock. Then unpin the item. This does
+                         * not affect the AIL cursor the bulk insert path is
+                         * using.
                         */
                        spin_lock(&ailp->xa_lock);
                        if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
@@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
                /* Item is a candidate for bulk AIL insert.  */
                log_items[i++] = lv->lv_item;
                if (i >= LOG_ITEM_BATCH_SIZE) {
-                        xfs_log_item_batch_insert(ailp, log_items,
+                        xfs_log_item_batch_insert(ailp, &cur, log_items,
                                        LOG_ITEM_BATCH_SIZE, commit_lsn);
                        i = 0;
                }
@@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
        /* make sure we insert the remainder! */
        if (i)
-                xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
+                xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
+        spin_lock(&ailp->xa_lock);
+        xfs_trans_ail_cursor_done(ailp, &cur);
+        spin_unlock(&ailp->xa_lock);
 }
 /*
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 5fc2380092c8..43233e92f0f6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -163,17 +163,11 @@ xfs_ail_max_lsn(
 }
 /*
- * AIL traversal cursor initialisation.
+ * The cursor keeps track of where our current traversal is up to by tracking
- *
+ * the next item in the list for us. However, for this to be safe, removing an
- * The cursor keeps track of where our current traversal is up
+ * object from the AIL needs to invalidate any cursor that points to it. hence
- * to by tracking the next ƣtem in the list for us. However, for
+ * the traversal cursor needs to be linked to the struct xfs_ail so that
- * this to be safe, removing an object from the AIL needs to invalidate
+ * deletion can search all the active cursors for invalidation.
- * any cursor that points to it. hence the traversal cursor needs to
- * be linked to the struct xfs_ail so that deletion can search all the
- * active cursors for invalidation.
- *
- * We don't link the push cursor because it is embedded in the struct
- * xfs_ail and hence easily findable.
 */
 STATIC void
 xfs_trans_ail_cursor_init(
@@ -181,31 +175,12 @@ xfs_trans_ail_cursor_init(
        struct xfs_ail_cursor   *cur)
 {
        cur->item = NULL;
-        if (cur == &ailp->xa_cursors)
+        list_add_tail(&cur->list, &ailp->xa_cursors);
-                return;
-        cur->next = ailp->xa_cursors.next;
-        ailp->xa_cursors.next = cur;
-}
-/*
- * Set the cursor to the next item, because when we look
- * up the cursor the current item may have been freed.
- */
-STATIC void
-xfs_trans_ail_cursor_set(
-        struct xfs_ail          *ailp,
-        struct xfs_ail_cursor   *cur,
-        struct xfs_log_item     *lip)
-{
-        if (lip)
-                cur->item = xfs_ail_next(ailp, lip);
 }
 /*
- * Get the next item in the traversal and advance the cursor.
+ * Get the next item in the traversal and advance the cursor.  If the cursor
- * If the cursor was invalidated (inidicated by a lip of 1),
+ * was invalidated (indicated by a lip of 1), restart the traversal.
- * restart the traversal.
 */
 struct xfs_log_item *
 xfs_trans_ail_cursor_next(
@@ -216,45 +191,31 @@ xfs_trans_ail_cursor_next(
        if ((__psint_t)lip & 1)
                lip = xfs_ail_min(ailp);
-        xfs_trans_ail_cursor_set(ailp, cur, lip);
+        if (lip)
+                cur->item = xfs_ail_next(ailp, lip);
        return lip;
 }
 /*
- * Now that the traversal is complete, we need to remove the cursor
+ * When the traversal is complete, we need to remove the cursor from the list
- * from the list of traversing cursors. Avoid removing the embedded
+ * of traversing cursors.
- * push cursor, but use the fact it is always present to make the
- * list deletion simple.
 */
 void
 xfs_trans_ail_cursor_done(
        struct xfs_ail          *ailp,
-        struct xfs_ail_cursor   *done)
+        struct xfs_ail_cursor   *cur)
 {
-        struct xfs_ail_cursor   *prev = NULL;
+        cur->item = NULL;
-        struct xfs_ail_cursor   *cur;
+        list_del_init(&cur->list);
-        done->item = NULL;
-        if (done == &ailp->xa_cursors)
-                return;
-        prev = &ailp->xa_cursors;
-        for (cur = prev->next; cur; prev = cur, cur = prev->next) {
-                if (cur == done) {
-                        prev->next = cur->next;
-                        break;
-                }
-        }
-        ASSERT(cur);
 }
 /*
- * Invalidate any cursor that is pointing to this item. This is
+ * Invalidate any cursor that is pointing to this item. This is called when an
- * called when an item is removed from the AIL. Any cursor pointing
+ * item is removed from the AIL. Any cursor pointing to this object is now
- * to this object is now invalid and the traversal needs to be
+ * invalid and the traversal needs to be terminated so it doesn't reference a
- * terminated so it doesn't reference a freed object. We set the
+ * freed object. We set the low bit of the cursor item pointer so we can
- * cursor item to a value of 1 so we can distinguish between an
+ * distinguish between an invalidation and the end of the list when getting the
- * invalidation and the end of the list when getting the next item
+ * next item from the cursor.
- * from the cursor.
 */
 STATIC void
 xfs_trans_ail_cursor_clear(
@@ -263,8 +224,7 @@ xfs_trans_ail_cursor_clear(
 {
        struct xfs_ail_cursor   *cur;
-        /* need to search all cursors */
+        list_for_each_entry(cur, &ailp->xa_cursors, list) {
-        for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
                if (cur->item == lip)
                        cur->item = (struct xfs_log_item *)
                                        ((__psint_t)cur->item | 1);
@@ -272,9 +232,10 @@ xfs_trans_ail_cursor_clear(
 }
 /*
- * Return the item in the AIL with the current lsn.
+ * Find the first item in the AIL with the given @lsn by searching in ascending
- * Return the current tree generation number for use
+ * LSN order and initialise the cursor to point to the next item for a
- * in calls to xfs_trans_next_ail().
+ * ascending traversal.  Pass a @lsn of zero to initialise the cursor to the
+ * first item in the AIL. Returns NULL if the list is empty.
 */
 xfs_log_item_t *
 xfs_trans_ail_cursor_first(
@@ -285,46 +246,112 @@ xfs_trans_ail_cursor_first(
        xfs_log_item_t          *lip;
        xfs_trans_ail_cursor_init(ailp, cur);
-        lip = xfs_ail_min(ailp);
-        if (lsn == 0)
+        if (lsn == 0) {
+                lip = xfs_ail_min(ailp);
                goto out;
+        }
        list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
                if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
                        goto out;
        }
-        lip = NULL;
+        return NULL;
 out:
-        xfs_trans_ail_cursor_set(ailp, cur, lip);
+        if (lip)
+                cur->item = xfs_ail_next(ailp, lip);
        return lip;
 }
+static struct xfs_log_item *
+__xfs_trans_ail_cursor_last(
+        struct xfs_ail          *ailp,
+        xfs_lsn_t               lsn)
+{
+        xfs_log_item_t          *lip;
+        list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
+                if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
+                        return lip;
+        }
+        return NULL;
+}
+/*
+ * Find the last item in the AIL with the given @lsn by searching in descending
+ * LSN order and initialise the cursor to point to that item.  If there is no
+ * item with the value of @lsn, then it sets the cursor to the last item with an
+ * LSN lower than @lsn.  Returns NULL if the list is empty.
+ */
+struct xfs_log_item *
+xfs_trans_ail_cursor_last(
+        struct xfs_ail          *ailp,
+        struct xfs_ail_cursor   *cur,
+        xfs_lsn_t               lsn)
+{
+        xfs_trans_ail_cursor_init(ailp, cur);
+        cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
+        return cur->item;
+}
 /*
- * splice the log item list into the AIL at the given LSN.
+ * Splice the log item list into the AIL at the given LSN. We splice to the
+ * tail of the given LSN to maintain insert order for push traversals. The
+ * cursor is optional, allowing repeated updates to the same LSN to avoid
+ * repeated traversals.
 */
 static void
 xfs_ail_splice(
-        struct xfs_ail  *ailp,
+        struct xfs_ail          *ailp,
-        struct list_head *list,
+        struct xfs_ail_cursor   *cur,
-        xfs_lsn_t       lsn)
+        struct list_head        *list,
+        xfs_lsn_t               lsn)
 {
-        xfs_log_item_t  *next_lip;
+        struct xfs_log_item     *lip = cur ? cur->item : NULL;
+        struct xfs_log_item     *next_lip;
-        /* If the list is empty, just insert the item.  */
+        /*
-        if (list_empty(&ailp->xa_ail)) {
+         * Get a new cursor if we don't have a placeholder or the existing one
-                list_splice(list, &ailp->xa_ail);
+         * has been invalidated.
-                return;
+         */
+        if (!lip || (__psint_t)lip & 1) {
+                lip = __xfs_trans_ail_cursor_last(ailp, lsn);
+                if (!lip) {
+                        /* The list is empty, so just splice and return.  */
+                        if (cur)
+                                cur->item = NULL;
+                        list_splice(list, &ailp->xa_ail);
+                        return;
+                }
        }
-        list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
+        /*
-                if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
+         * Our cursor points to the item we want to insert _after_, so we have
-                        break;
+         * to update the cursor to point to the end of the list we are splicing
+         * in so that it points to the correct location for the next splice.
+         * i.e. before the splice
+         *
+         *  lsn -> lsn -> lsn + x -> lsn + x ...
+         *          ^
+         *          | cursor points here
+         *
+         * After the splice we have:
+         *
+         *  lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
+         *          ^                            ^
+         *          | cursor points here         | needs to move here
+         *
+         * So we set the cursor to the last item in the list to be spliced
+         * before we execute the splice, resulting in the cursor pointing to
+         * the correct item after the splice occurs.
+         */
+        if (cur) {
+                next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
+                cur->item = next_lip;
        }
+        list_splice(list, &lip->li_ail);
-        ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
-               XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
-        list_splice_init(list, &next_lip->li_ail);
 }
 /*
@@ -351,7 +378,7 @@ xfs_ail_worker(
        struct xfs_ail          *ailp = container_of(to_delayed_work(work),
                                        struct xfs_ail, xa_work);
        xfs_mount_t             *mp = ailp->xa_mount;
-        struct xfs_ail_cursor   *cur = &ailp->xa_cursors;
+        struct xfs_ail_cursor   cur;
        xfs_log_item_t          *lip;
        xfs_lsn_t               lsn;
        xfs_lsn_t               target;
@@ -363,13 +390,12 @@ xfs_ail_worker(
        spin_lock(&ailp->xa_lock);
        target = ailp->xa_target;
-        xfs_trans_ail_cursor_init(ailp, cur);
+        lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
-        lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
        if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
                /*
                 * AIL is empty or our push has reached the end.
                 */
-                xfs_trans_ail_cursor_done(ailp, cur);
+                xfs_trans_ail_cursor_done(ailp, &cur);
                spin_unlock(&ailp->xa_lock);
                goto out_done;
        }
@@ -457,12 +483,12 @@ xfs_ail_worker(
                if (stuck > 100)
                        break;
-                lip = xfs_trans_ail_cursor_next(ailp, cur);
+                lip = xfs_trans_ail_cursor_next(ailp, &cur);
                if (lip == NULL)
                        break;
                lsn = lip->li_lsn;
        }
-        xfs_trans_ail_cursor_done(ailp, cur);
+        xfs_trans_ail_cursor_done(ailp, &cur);
        spin_unlock(&ailp->xa_lock);
        if (flush_log) {
@@ -645,6 +671,7 @@ xfs_trans_unlocked_item(
 void
 xfs_trans_ail_update_bulk(
        struct xfs_ail          *ailp,
+        struct xfs_ail_cursor   *cur,
        struct xfs_log_item     **log_items,
        int                     nr_items,
        xfs_lsn_t               lsn) __releases(ailp->xa_lock)
@@ -674,7 +701,7 @@ xfs_trans_ail_update_bulk(
                list_add(&lip->li_ail, &tmp);
        }
-        xfs_ail_splice(ailp, &tmp, lsn);
+        xfs_ail_splice(ailp, cur, &tmp, lsn);
        if (!mlip_changed) {
                spin_unlock(&ailp->xa_lock);
@@ -793,6 +820,7 @@ xfs_trans_ail_init(
        ailp->xa_mount = mp;
        INIT_LIST_HEAD(&ailp->xa_ail);
+        INIT_LIST_HEAD(&ailp->xa_cursors);
        spin_lock_init(&ailp->xa_lock);
        INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
        mp->m_ail = ailp;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 03b3b7f85a3b..15584fc3ed7d 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -81,7 +81,7 @@ _xfs_trans_bjoin(
        struct xfs_buf_log_item *bip;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+        ASSERT(bp->b_transp == NULL);
        /*
         * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
@@ -89,7 +89,7 @@ _xfs_trans_bjoin(
         * The checks to see if one is there are in xfs_buf_item_init().
         */
        xfs_buf_item_init(bp, tp->t_mountp);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+        bip = bp->b_fspriv;
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -110,7 +110,7 @@ _xfs_trans_bjoin(
         * Initialize b_fsprivate2 so we can find it with incore_match()
         * in xfs_trans_get_buf() and friends above.
         */
-        XFS_BUF_SET_FSPRIVATE2(bp, tp);
+        bp->b_transp = tp;
 }
@@ -160,7 +160,7 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
         */
        bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
        if (bp != NULL) {
-                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+                ASSERT(xfs_buf_islocked(bp));
                if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
                        XFS_BUF_SUPER_STALE(bp);
@@ -172,8 +172,8 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
                else if (XFS_BUF_ISSTALE(bp))
                        ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
-                ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+                ASSERT(bp->b_transp == tp);
-                bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+                bip = bp->b_fspriv;
                ASSERT(bip != NULL);
                ASSERT(atomic_read(&bip->bli_refcount) > 0);
                bip->bli_recur++;
@@ -232,8 +232,8 @@ xfs_trans_getsb(xfs_trans_t	*tp,
         * recursion count and return the buffer to the caller.
         */
        bp = mp->m_sb_bp;
-        if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) {
+        if (bp->b_transp == tp) {
-                bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+                bip = bp->b_fspriv;
                ASSERT(bip != NULL);
                ASSERT(atomic_read(&bip->bli_refcount) > 0);
                bip->bli_recur++;
@@ -327,9 +327,9 @@ xfs_trans_read_buf(
         */
        bp = xfs_trans_buf_item_match(tp, target, blkno, len);
        if (bp != NULL) {
-                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+                ASSERT(xfs_buf_islocked(bp));
-                ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+                ASSERT(bp->b_transp == tp);
-                ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+                ASSERT(bp->b_fspriv != NULL);
                ASSERT((XFS_BUF_ISERROR(bp)) == 0);
                if (!(XFS_BUF_ISDONE(bp))) {
                        trace_xfs_trans_read_buf_io(bp, _RET_IP_);
@@ -363,7 +363,7 @@ xfs_trans_read_buf(
                }
-                bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+                bip = bp->b_fspriv;
                bip->bli_recur++;
                ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -460,32 +460,30 @@ xfs_trans_brelse(xfs_trans_t	*tp,
                 xfs_buf_t      *bp)
 {
        xfs_buf_log_item_t      *bip;
-        xfs_log_item_t          *lip;
        /*
         * Default to a normal brelse() call if the tp is NULL.
         */
        if (tp == NULL) {
-                ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+                struct xfs_log_item     *lip = bp->b_fspriv;
+                ASSERT(bp->b_transp == NULL);
                /*
                 * If there's a buf log item attached to the buffer,
                 * then let the AIL know that the buffer is being
                 * unlocked.
                 */
-                if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+                if (lip != NULL && lip->li_type == XFS_LI_BUF) {
-                        lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+                        bip = bp->b_fspriv;
-                        if (lip->li_type == XFS_LI_BUF) {
+                        xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
-                                bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
-                                xfs_trans_unlocked_item(bip->bli_item.li_ailp,
-                                                        lip);
-                        }
                }
                xfs_buf_relse(bp);
                return;
        }
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+        bip = bp->b_fspriv;
        ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -556,7 +554,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
                xfs_buf_item_relse(bp);
                bip = NULL;
        }
-        XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+        bp->b_transp = NULL;
        /*
         * If we've still got a buf log item on the buffer, then
@@ -581,16 +579,15 @@ void
 xfs_trans_bhold(xfs_trans_t     *tp,
                xfs_buf_t       *bp)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_flags |= XFS_BLI_HOLD;
        trace_xfs_trans_bhold(bip);
 }
@@ -603,19 +600,17 @@ void
 xfs_trans_bhold_release(xfs_trans_t     *tp,
                        xfs_buf_t       *bp)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
        ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        ASSERT(bip->bli_flags & XFS_BLI_HOLD);
-        bip->bli_flags &= ~XFS_BLI_HOLD;
+        bip->bli_flags &= ~XFS_BLI_HOLD;
        trace_xfs_trans_bhold_release(bip);
 }
@@ -634,14 +629,14 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
                  uint          first,
                  uint          last)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
        ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
-        ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) ||
+        ASSERT(bp->b_iodone == NULL ||
-               (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks));
+               bp->b_iodone == xfs_buf_iodone_callbacks);
        /*
         * Mark the buffer as needing to be written out eventually,
@@ -656,9 +651,8 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
        XFS_BUF_DELAYWRITE(bp);
        XFS_BUF_DONE(bp);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
-        XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
+        bp->b_iodone = xfs_buf_iodone_callbacks;
        bip->bli_item.li_cb = xfs_buf_iodone;
        trace_xfs_trans_log_buf(bip);
@@ -706,13 +700,11 @@ xfs_trans_binval(
        xfs_trans_t     *tp,
        xfs_buf_t       *bp)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        trace_xfs_trans_binval(bip);
@@ -780,13 +772,11 @@ xfs_trans_inode_buf(
        xfs_trans_t     *tp,
        xfs_buf_t       *bp)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_flags |= XFS_BLI_INODE_BUF;
@@ -806,13 +796,11 @@ xfs_trans_stale_inode_buf(
        xfs_trans_t     *tp,
        xfs_buf_t       *bp)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_flags |= XFS_BLI_STALE_INODE;
@@ -833,13 +821,11 @@ xfs_trans_inode_alloc_buf(
        xfs_trans_t     *tp,
        xfs_buf_t       *bp)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
@@ -863,16 +849,14 @@ xfs_trans_dquot_buf(
        xfs_buf_t       *bp,
        uint            type)
 {
-        xfs_buf_log_item_t      *bip;
+        xfs_buf_log_item_t      *bip = bp->b_fspriv;
        ASSERT(XFS_BUF_ISBUSY(bp));
-        ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+        ASSERT(bp->b_transp == tp);
-        ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+        ASSERT(bip != NULL);
        ASSERT(type == XFS_BLF_UDQUOT_BUF ||
               type == XFS_BLF_PDQUOT_BUF ||
               type == XFS_BLF_GDQUOT_BUF);
-        bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_format.blf_flags |= type;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 048b0c689d3e..c8dea2fd7e68 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -55,7 +55,6 @@ xfs_trans_ijoin(
 {
        xfs_inode_log_item_t    *iip;
-        ASSERT(ip->i_transp == NULL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        if (ip->i_itemp == NULL)
                xfs_inode_item_init(ip, ip->i_mount);
@@ -68,12 +67,6 @@ xfs_trans_ijoin(
        xfs_trans_add_item(tp, &iip->ili_item);
        xfs_trans_inode_broot_debug(ip);
-        /*
-         * Initialize i_transp so we can find it with xfs_inode_incore()
-         * in xfs_trans_iget() above.
-         */
-        ip->i_transp = tp;
 }
 /*
@@ -111,7 +104,6 @@ xfs_trans_ichgtime(
        ASSERT(tp);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        ASSERT(ip->i_transp == tp);
        tv = current_fs_time(inode->i_sb);
@@ -140,7 +132,6 @@ xfs_trans_log_inode(
        xfs_inode_t     *ip,
        uint            flags)
 {
-        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_itemp != NULL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 6b164e9e9a1f..212946b97239 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -53,7 +53,7 @@ void	xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
 * of the list to trigger traversal restarts.
 */
 struct xfs_ail_cursor {
-        struct xfs_ail_cursor   *next;
+        struct list_head        list;
        struct xfs_log_item     *item;
 };
@@ -66,7 +66,7 @@ struct xfs_ail {
        struct xfs_mount        *xa_mount;
        struct list_head        xa_ail;
        xfs_lsn_t               xa_target;
-        struct xfs_ail_cursor   xa_cursors;
+        struct list_head        xa_cursors;
        spinlock_t              xa_lock;
        struct delayed_work     xa_work;
        xfs_lsn_t               xa_last_pushed_lsn;
@@ -82,6 +82,7 @@ struct xfs_ail {
 extern struct workqueue_struct  *xfs_ail_wq;    /* AIL workqueue */
 void    xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
+                                struct xfs_ail_cursor *cur,
                                struct xfs_log_item **log_items, int nr_items,
                                xfs_lsn_t lsn) __releases(ailp->xa_lock);
 static inline void
@@ -90,7 +91,7 @@ xfs_trans_ail_update(
        struct xfs_log_item     *lip,
        xfs_lsn_t               lsn) __releases(ailp->xa_lock)
 {
-        xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
+        xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
 }
 void    xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
@@ -111,10 +112,13 @@ xfs_lsn_t		xfs_ail_min_lsn(struct xfs_ail *ailp);
 void                    xfs_trans_unlocked_item(struct xfs_ail *,
                                        xfs_log_item_t *);
-struct xfs_log_item     *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
+struct xfs_log_item *   xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
                                        struct xfs_ail_cursor *cur,
                                        xfs_lsn_t lsn);
-struct xfs_log_item     *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
+struct xfs_log_item *   xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
+                                        struct xfs_ail_cursor *cur,
+                                        xfs_lsn_t lsn);
+struct xfs_log_item *   xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
                                        struct xfs_ail_cursor *cur);
 void                    xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
                                        struct xfs_ail_cursor *cur);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 619720705bc6..88d121486c52 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -50,430 +50,6 @@
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
-int
-xfs_setattr(
-        struct xfs_inode        *ip,
-        struct iattr            *iattr,
-        int                     flags)
-{
-        xfs_mount_t             *mp = ip->i_mount;
-        struct inode            *inode = VFS_I(ip);
-        int                     mask = iattr->ia_valid;
-        xfs_trans_t             *tp;
-        int                     code;
-        uint                    lock_flags;
-        uint                    commit_flags=0;
-        uid_t                   uid=0, iuid=0;
-        gid_t                   gid=0, igid=0;
-        struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
-        int                     need_iolock = 1;
-        trace_xfs_setattr(ip);
-        if (mp->m_flags & XFS_MOUNT_RDONLY)
-                return XFS_ERROR(EROFS);
-        if (XFS_FORCED_SHUTDOWN(mp))
-                return XFS_ERROR(EIO);
-        code = -inode_change_ok(inode, iattr);
-        if (code)
-                return code;
-        olddquot1 = olddquot2 = NULL;
-        udqp = gdqp = NULL;
-        /*
-         * If disk quotas is on, we make sure that the dquots do exist on disk,
-         * before we start any other transactions. Trying to do this later
-         * is messy. We don't care to take a readlock to look at the ids
-         * in inode here, because we can't hold it across the trans_reserve.
-         * If the IDs do change before we take the ilock, we're covered
-         * because the i_*dquot fields will get updated anyway.
-         */
-        if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
-                uint    qflags = 0;
-                if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
-                        uid = iattr->ia_uid;
-                        qflags |= XFS_QMOPT_UQUOTA;
-                } else {
-                        uid = ip->i_d.di_uid;
-                }
-                if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
-                        gid = iattr->ia_gid;
-                        qflags |= XFS_QMOPT_GQUOTA;
-                }  else {
-                        gid = ip->i_d.di_gid;
-                }
-                /*
-                 * We take a reference when we initialize udqp and gdqp,
-                 * so it is important that we never blindly double trip on
-                 * the same variable. See xfs_create() for an example.
-                 */
-                ASSERT(udqp == NULL);
-                ASSERT(gdqp == NULL);
-                code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
-                                         qflags, &udqp, &gdqp);
-                if (code)
-                        return code;
-        }
-        /*
-         * For the other attributes, we acquire the inode lock and
-         * first do an error checking pass.
-         */
-        tp = NULL;
-        lock_flags = XFS_ILOCK_EXCL;
-        if (flags & XFS_ATTR_NOLOCK)
-                need_iolock = 0;
-        if (!(mask & ATTR_SIZE)) {
-                tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-                commit_flags = 0;
-                code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
-                                         0, 0, 0);
-                if (code) {
-                        lock_flags = 0;
-                        goto error_return;
-                }
-        } else {
-                if (need_iolock)
-                        lock_flags |= XFS_IOLOCK_EXCL;
-        }
-        xfs_ilock(ip, lock_flags);
-        /*
-         * Change file ownership.  Must be the owner or privileged.
-         */
-        if (mask & (ATTR_UID|ATTR_GID)) {
-                /*
-                 * These IDs could have changed since we last looked at them.
-                 * But, we're assured that if the ownership did change
-                 * while we didn't have the inode locked, inode's dquot(s)
-                 * would have changed also.
-                 */
-                iuid = ip->i_d.di_uid;
-                igid = ip->i_d.di_gid;
-                gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
-                uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-                /*
-                 * Do a quota reservation only if uid/gid is actually
-                 * going to change.
-                 */
-                if (XFS_IS_QUOTA_RUNNING(mp) &&
-                    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-                     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
-                        ASSERT(tp);
-                        code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-                                                capable(CAP_FOWNER) ?
-                                                XFS_QMOPT_FORCE_RES : 0);
-                        if (code)       /* out of quota */
-                                goto error_return;
-                }
-        }
-        /*
-         * Truncate file.  Must have write permission and not be a directory.
-         */
-        if (mask & ATTR_SIZE) {
-                /* Short circuit the truncate case for zero length files */
-                if (iattr->ia_size == 0 &&
-                    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
-                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                        lock_flags &= ~XFS_ILOCK_EXCL;
-                        if (mask & ATTR_CTIME) {
-                                inode->i_mtime = inode->i_ctime =
-                                                current_fs_time(inode->i_sb);
-                                xfs_mark_inode_dirty_sync(ip);
-                        }
-                        code = 0;
-                        goto error_return;
-                }
-                if (S_ISDIR(ip->i_d.di_mode)) {
-                        code = XFS_ERROR(EISDIR);
-                        goto error_return;
-                } else if (!S_ISREG(ip->i_d.di_mode)) {
-                        code = XFS_ERROR(EINVAL);
-                        goto error_return;
-                }
-                /*
-                 * Make sure that the dquots are attached to the inode.
-                 */
-                code = xfs_qm_dqattach_locked(ip, 0);
-                if (code)
-                        goto error_return;
-                /*
-                 * Now we can make the changes.  Before we join the inode
-                 * to the transaction, if ATTR_SIZE is set then take care of
-                 * the part of the truncation that must be done without the
-                 * inode lock.  This needs to be done before joining the inode
-                 * to the transaction, because the inode cannot be unlocked
-                 * once it is a part of the transaction.
-                 */
-                if (iattr->ia_size > ip->i_size) {
-                        /*
-                         * Do the first part of growing a file: zero any data
-                         * in the last block that is beyond the old EOF.  We
-                         * need to do this before the inode is joined to the
-                         * transaction to modify the i_size.
-                         */
-                        code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
-                        if (code)
-                                goto error_return;
-                }
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                lock_flags &= ~XFS_ILOCK_EXCL;
-                /*
-                 * We are going to log the inode size change in this
-                 * transaction so any previous writes that are beyond the on
-                 * disk EOF and the new EOF that have not been written out need
-                 * to be written here. If we do not write the data out, we
-                 * expose ourselves to the null files problem.
-                 *
-                 * Only flush from the on disk size to the smaller of the in
-                 * memory file size or the new size as that's the range we
-                 * really care about here and prevents waiting for other data
-                 * not within the range we care about here.
-                 */
-                if (ip->i_size != ip->i_d.di_size &&
-                    iattr->ia_size > ip->i_d.di_size) {
-                        code = xfs_flush_pages(ip,
-                                        ip->i_d.di_size, iattr->ia_size,
-                                        XBF_ASYNC, FI_NONE);
-                        if (code)
-                                goto error_return;
-                }
-                /* wait for all I/O to complete */
-                xfs_ioend_wait(ip);
-                code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
-                                            xfs_get_blocks);
-                if (code)
-                        goto error_return;
-                tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-                code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                         XFS_TRANS_PERM_LOG_RES,
-                                         XFS_ITRUNCATE_LOG_COUNT);
-                if (code)
-                        goto error_return;
-                truncate_setsize(inode, iattr->ia_size);
-                commit_flags = XFS_TRANS_RELEASE_LOG_RES;
-                lock_flags |= XFS_ILOCK_EXCL;
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                xfs_trans_ijoin(tp, ip);
-                /*
-                 * Only change the c/mtime if we are changing the size
-                 * or we are explicitly asked to change it. This handles
-                 * the semantic difference between truncate() and ftruncate()
-                 * as implemented in the VFS.
-                 *
-                 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
-                 * is a special case where we need to update the times despite
-                 * not having these flags set.  For all other operations the
-                 * VFS set these flags explicitly if it wants a timestamp
-                 * update.
-                 */
-                if (iattr->ia_size != ip->i_size &&
-                    (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
-                        iattr->ia_ctime = iattr->ia_mtime =
-                                current_fs_time(inode->i_sb);
-                        mask |= ATTR_CTIME | ATTR_MTIME;
-                }
-                if (iattr->ia_size > ip->i_size) {
-                        ip->i_d.di_size = iattr->ia_size;
-                        ip->i_size = iattr->ia_size;
-                        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-                } else if (iattr->ia_size <= ip->i_size ||
-                           (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
-                        /*
-                         * signal a sync transaction unless
-                         * we're truncating an already unlinked
-                         * file on a wsync filesystem
-                         */
-                        code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
-                                            XFS_DATA_FORK,
-                                            ((ip->i_d.di_nlink != 0 ||
-                                              !(mp->m_flags & XFS_MOUNT_WSYNC))
-                                             ? 1 : 0));
-                        if (code)
-                                goto abort_return;
-                        /*
-                         * Truncated "down", so we're removing references
-                         * to old data here - if we now delay flushing for
-                         * a long time, we expose ourselves unduly to the
-                         * notorious NULL files problem.  So, we mark this
-                         * vnode and flush it when the file is closed, and
-                         * do not wait the usual (long) time for writeout.
-                         */
-                        xfs_iflags_set(ip, XFS_ITRUNCATED);
-                }
-        } else if (tp) {
-                xfs_trans_ijoin(tp, ip);
-        }
-        /*
-         * Change file ownership.  Must be the owner or privileged.
-         */
-        if (mask & (ATTR_UID|ATTR_GID)) {
-                /*
-                 * CAP_FSETID overrides the following restrictions:
-                 *
-                 * The set-user-ID and set-group-ID bits of a file will be
-                 * cleared upon successful return from chown()
-                 */
-                if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-                    !capable(CAP_FSETID)) {
-                        ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-                }
-                /*
-                 * Change the ownerships and register quota modifications
-                 * in the transaction.
-                 */
-                if (iuid != uid) {
-                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
-                                ASSERT(mask & ATTR_UID);
-                                ASSERT(udqp);
-                                olddquot1 = xfs_qm_vop_chown(tp, ip,
-                                                        &ip->i_udquot, udqp);
-                        }
-                        ip->i_d.di_uid = uid;
-                        inode->i_uid = uid;
-                }
-                if (igid != gid) {
-                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-                                ASSERT(!XFS_IS_PQUOTA_ON(mp));
-                                ASSERT(mask & ATTR_GID);
-                                ASSERT(gdqp);
-                                olddquot2 = xfs_qm_vop_chown(tp, ip,
-                                                        &ip->i_gdquot, gdqp);
-                        }
-                        ip->i_d.di_gid = gid;
-                        inode->i_gid = gid;
-                }
-        }
-        /*
-         * Change file access modes.
-         */
-        if (mask & ATTR_MODE) {
-                umode_t mode = iattr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        mode &= ~S_ISGID;
-                ip->i_d.di_mode &= S_IFMT;
-                ip->i_d.di_mode |= mode & ~S_IFMT;
-                inode->i_mode &= S_IFMT;
-                inode->i_mode |= mode & ~S_IFMT;
-        }
-        /*
-         * Change file access or modified times.
-         */
-        if (mask & ATTR_ATIME) {
-                inode->i_atime = iattr->ia_atime;
-                ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-                ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-                ip->i_update_core = 1;
-        }
-        if (mask & ATTR_CTIME) {
-                inode->i_ctime = iattr->ia_ctime;
-                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-                ip->i_update_core = 1;
-        }
-        if (mask & ATTR_MTIME) {
-                inode->i_mtime = iattr->ia_mtime;
-                ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-                ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-                ip->i_update_core = 1;
-        }
-        /*
-         * And finally, log the inode core if any attribute in it
-         * has been changed.
-         */
-        if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
-                    ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
-                xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        XFS_STATS_INC(xs_ig_attrchg);
-        /*
-         * If this is a synchronous mount, make sure that the
-         * transaction goes to disk before returning to the user.
-         * This is slightly sub-optimal in that truncates require
-         * two sync transactions instead of one for wsync filesystems.
-         * One for the truncate and one for the timestamps since we
-         * don't want to change the timestamps unless we're sure the
-         * truncate worked.  Truncates are less than 1% of the laddis
-         * mix so this probably isn't worth the trouble to optimize.
-         */
-        code = 0;
-        if (mp->m_flags & XFS_MOUNT_WSYNC)
-                xfs_trans_set_sync(tp);
-        code = xfs_trans_commit(tp, commit_flags);
-        xfs_iunlock(ip, lock_flags);
-        /*
-         * Release any dquot(s) the inode had kept before chown.
-         */
-        xfs_qm_dqrele(olddquot1);
-        xfs_qm_dqrele(olddquot2);
-        xfs_qm_dqrele(udqp);
-        xfs_qm_dqrele(gdqp);
-        if (code)
-                return code;
-        /*
-         * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
-         *           update.  We could avoid this with linked transactions
-         *           and passing down the transaction pointer all the way
-         *           to attr_set.  No previous user of the generic
-         *           Posix ACL code seems to care about this issue either.
-         */
-        if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
-                code = -xfs_acl_chmod(inode);
-                if (code)
-                        return XFS_ERROR(code);
-        }
-        return 0;
- abort_return:
-        commit_flags |= XFS_TRANS_ABORT;
- error_return:
-        xfs_qm_dqrele(udqp);
-        xfs_qm_dqrele(gdqp);
-        if (tp) {
-                xfs_trans_cancel(tp, commit_flags);
-        }
-        if (lock_flags != 0) {
-                xfs_iunlock(ip, lock_flags);
-        }
-        return code;
-}
 /*
 * The maximum pathlen is 1024 bytes. Since the minimum file system
 * blocksize is 512 bytes, we can get a max of 2 extents back from
@@ -621,13 +197,6 @@ xfs_free_eofblocks(
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-                /*
-                 * Do the xfs_itruncate_start() call before
-                 * reserving any log space because
-                 * itruncate_start will call into the buffer
-                 * cache and we can't
-                 * do that within a transaction.
-                 */
                if (flags & XFS_FREE_EOF_TRYLOCK) {
                        if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
                                xfs_trans_cancel(tp, 0);
@@ -636,13 +205,6 @@ xfs_free_eofblocks(
                } else {
                        xfs_ilock(ip, XFS_IOLOCK_EXCL);
                }
-                error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
-                                    ip->i_size);
-                if (error) {
-                        xfs_trans_cancel(tp, 0);
-                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                        return error;
-                }
                error = xfs_trans_reserve(tp, 0,
                                          XFS_ITRUNCATE_LOG_RES(mp),
@@ -658,15 +220,12 @@ xfs_free_eofblocks(
                xfs_ilock(ip, XFS_ILOCK_EXCL);
                xfs_trans_ijoin(tp, ip);
-                error = xfs_itruncate_finish(&tp, ip,
+                error = xfs_itruncate_data(&tp, ip, ip->i_size);
-                                             ip->i_size,
-                                             XFS_DATA_FORK,
-                                             0);
-                /*
-                 * If we get an error at this point we
-                 * simply don't bother truncating the file.
-                 */
                if (error) {
+                        /*
+                         * If we get an error at this point we simply don't
+                         * bother truncating the file.
+                         */
                        xfs_trans_cancel(tp,
                                         (XFS_TRANS_RELEASE_LOG_RES |
                                          XFS_TRANS_ABORT));
@@ -1084,20 +643,9 @@ xfs_inactive(
        tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
        if (truncate) {
-                /*
-                 * Do the xfs_itruncate_start() call before
-                 * reserving any log space because itruncate_start
-                 * will call into the buffer cache and we can't
-                 * do that within a transaction.
-                 */
                xfs_ilock(ip, XFS_IOLOCK_EXCL);
-                error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
+                xfs_ioend_wait(ip);
-                if (error) {
-                        xfs_trans_cancel(tp, 0);
-                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                        return VN_INACTIVE_CACHE;
-                }
                error = xfs_trans_reserve(tp, 0,
                                          XFS_ITRUNCATE_LOG_RES(mp),
@@ -1114,16 +662,7 @@ xfs_inactive(
                xfs_ilock(ip, XFS_ILOCK_EXCL);
                xfs_trans_ijoin(tp, ip);
-                /*
+                error = xfs_itruncate_data(&tp, ip, 0);
-                 * normally, we have to run xfs_itruncate_finish sync.
-                 * But if filesystem is wsync and we're in the inactive
-                 * path, then we know that nlink == 0, and that the
-                 * xaction that made nlink == 0 is permanently committed
-                 * since xfs_remove runs as a synchronous transaction.
-                 */
-                error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
-                                (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
                if (error) {
                        xfs_trans_cancel(tp,
                                XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2430,6 +1969,8 @@ xfs_zero_remaining_bytes(
        if (!bp)
                return XFS_ERROR(ENOMEM);
+        xfs_buf_unlock(bp);
        for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
                offset_fsb = XFS_B_TO_FSBT(mp, offset);
                nimap = 1;
@@ -2784,7 +2325,7 @@ xfs_change_file_space(
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = startoffset;
-                error = xfs_setattr(ip, &iattr, attr_flags);
+                error = xfs_setattr_size(ip, &iattr, attr_flags);
                if (error)
                        return error;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 3bcd23353d6c..35d3d513e1e9 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -13,7 +13,8 @@ struct xfs_inode;
 struct xfs_iomap;
-int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
+int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
+int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
 #define XFS_ATTR_DMI            0x01    /* invocation from a DMI function */
 #define XFS_ATTR_NONBLOCK       0x02    /* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK         0x04    /* Don't grab any conflicting locks */